library(tidyverse)
library(openintro)
# import file
superbowl_ads <- read.csv("https://raw.githubusercontent.com/carolc57/Data607/main/superbowl-ads.csv")
head (superbowl_ads) #get a glimpse of the data
## year brand
## 1 2018 Toyota
## 2 2020 Bud Light
## 3 2006 Bud Light
## 4 2018 Hynudai
## 5 2003 Bud Light
## 6 2020 Toyota
## superbowl_ads_dot_com_url
## 1 https://superbowl-ads.com/good-odds-toyota/
## 2 https://superbowl-ads.com/2020-bud-light-seltzer-inside-posts-brain/
## 3 https://superbowl-ads.com/2006-bud-light-bear-attack/
## 4 https://superbowl-ads.com/hope-detector-nfl-super-bowl-lii-hyundai/
## 5 https://superbowl-ads.com/2003-bud-light-hermit-crab/
## 6 https://superbowl-ads.com/2020-toyota-go-places-with-cobie-smulders/
## youtube_url funny show_product_quickly
## 1 https://www.youtube.com/watch?v=zeBZvwYQ-hA False False
## 2 https://www.youtube.com/watch?v=nbbp0VW7z8w True True
## 3 https://www.youtube.com/watch?v=yk0MQD5YgV8 True False
## 4 https://www.youtube.com/watch?v=lNPccrGk77A False True
## 5 https://www.youtube.com/watch?v=ovQYgnXHooY True True
## 6 https://www.youtube.com/watch?v=f34Ji70u3nk True True
## patriotic celebrity danger animals use_sex
## 1 False False False False False
## 2 False True True False False
## 3 False False True True False
## 4 False False False False False
## 5 False False True True True
## 6 False True True True False
#Create a new data frame with a subset of the columns and rows. I chose to focus on ads from the period 2011 to 2020 and remove url and youtube columns. Make sure to rename it
superbowl_ads_post_2010 <- subset(superbowl_ads, year >2010, select = -c(3,4))
head (superbowl_ads_post_2010) #get a glimpse of the data
## year brand funny show_product_quickly patriotic celebrity danger animals
## 1 2018 Toyota False False False False False False
## 2 2020 Bud Light True True False True True False
## 4 2018 Hynudai False True False False False False
## 6 2020 Toyota True True False True True True
## 7 2020 Coca-Cola True False False True False True
## 8 2020 Kia False False False True False False
## use_sex
## 1 False
## 2 False
## 4 False
## 6 False
## 7 False
## 8 False
# working with character data; load stringr library
library(stringr)
superbowl_ads_post_2010_c <- superbowl_ads_post_2010
superbowl_ads_post_2010_c$funny = str_replace_all(superbowl_ads_post_2010_c$funny, "True","Yes")
superbowl_ads_post_2010_c$funny = str_replace_all(superbowl_ads_post_2010_c$funny, "False","No")
superbowl_ads_post_2010_c$animals = str_replace_all(superbowl_ads_post_2010_c$animals, "True","Yes")
superbowl_ads_post_2010_c$animals = str_replace_all(superbowl_ads_post_2010_c$animals, "False","No")
head (superbowl_ads_post_2010_c) #get a glimpse of the data
## year brand funny show_product_quickly patriotic celebrity danger animals
## 1 2018 Toyota No False False False False No
## 2 2020 Bud Light Yes True False True True No
## 4 2018 Hynudai No True False False False No
## 6 2020 Toyota Yes True False True True Yes
## 7 2020 Coca-Cola Yes False False True False Yes
## 8 2020 Kia No False False True False No
## use_sex
## 1 False
## 2 False
## 4 False
## 6 False
## 7 False
## 8 False
test <- subset(superbowl_ads_post_2010_c, funny == "Yes" & danger == "True" )
head (test)
## year brand funny show_product_quickly patriotic celebrity danger animals
## 2 2020 Bud Light Yes True False True True No
## 6 2020 Toyota Yes True False True True Yes
## 20 2020 Doritos Yes True False True True Yes
## 25 2011 Kia Yes True False False True No
## 28 2013 Hynudai Yes True False False True Yes
## 32 2016 Hynudai Yes False False False True Yes
## use_sex
## 2 False
## 6 False
## 20 False
## 25 True
## 28 False
## 32 False
#How many ads did car maker Hynudai run from 2011 to 2020?
Hyundai_ads <- sum(superbowl_ads_post_2010_c$brand == 'Hynudai')
sprintf(paste("Hynudai had", Hyundai_ads, "Superbowl ads from 2011 to 2022"))
## [1] "Hynudai had 17 Superbowl ads from 2011 to 2022"
#Graphical representation of data using
library(dplyr)
superbowl_adsdf <- count(superbowl_ads_post_2010_c, brand )
print (superbowl_adsdf)
## brand n
## 1 Bud Light 16
## 2 Budweiser 16
## 3 Coca-Cola 14
## 4 Doritos 15
## 5 E-Trade 3
## 6 Hynudai 17
## 7 Kia 12
## 8 NFL 8
## 9 Pepsi 9
## 10 Toyota 9
head(superbowl_adsdf)
## brand n
## 1 Bud Light 16
## 2 Budweiser 16
## 3 Coca-Cola 14
## 4 Doritos 15
## 5 E-Trade 3
## 6 Hynudai 17
#I wanted to create a scatterplot to graphically represent the data but couldn’t get it to work.