library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Load necessary libraries
library(dplyr)
library(ggplot2)
Data Preparation
First, we load the dataset.
setwd("C:/Users/hp/Downloads/SUPERBOWL")
# Load the dataset
superbowl <- read.csv(file = "superbowl.csv",fileEncoding = 'UTF-8-BOM')
head(superbowl)
## id time new_brand week_of adspend month year volume pos neg mixed
## 1 1 1 Beetle 2-Jan-12 2.7 1 2012 2661 533 62 2066
## 2 1 2 Beetle 9-Jan-12 4.0 1 2012 3620 677 87 2856
## 3 1 3 Beetle 16-Jan-12 1.3 1 2012 4138 753 98 3287
## 4 1 4 Beetle 23-Jan-12 0.7 1 2012 3255 674 99 2482
## 5 1 5 Beetle 30-Jan-12 7105.0 1 2012 5144 1199 172 3773
## 6 1 6 Beetle 6-Feb-12 2576.2 2 2012 8021 2150 435 5436
## superbowl
## 1 0
## 2 0
## 3 0
## 4 0
## 5 1
## 6 1
# Convert 'week_of' column to Date format for better analysis
superbowl$week_of <- as.Date(superbowl$week_of, format = "%d-%b-%y")
# Separate data before and after the Super Bowl (Jan 30, 2012)
pre_superbowl <- superbowl |> filter(superbowl == 0)
post_superbowl <- superbowl |> filter(superbowl == 1)
# Calculate average ad spend and buzz metrics before and after Super Bowl for each brand
summary_table <- superbowl %>%
group_by(new_brand, superbowl) %>%
summarise(
avg_adspend = mean(adspend, na.rm = TRUE),
avg_volume = mean(volume, na.rm = TRUE),
avg_positive = mean(pos, na.rm = TRUE),
avg_negative = mean(neg, na.rm = TRUE),
avg_mixed = mean(mixed, na.rm = TRUE)
)
## `summarise()` has grouped output by 'new_brand'. You can override using the
## `.groups` argument.
print(summary_table)
## # A tibble: 6 × 7
## # Groups: new_brand [3]
## new_brand superbowl avg_adspend avg_volume avg_positive avg_negative avg_mixed
## <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Beetle 0 2.17 3418. 659. 86.5 2673.
## 2 Beetle 1 891. 4319. 965. 156. 3198.
## 3 CR-Z 0 1.25 1172. 268 60.5 843
## 4 CR-Z 1 0.709 1486. 339. 49.8 1097.
## 5 Camaro 0 71.1 87596. 21432 4458. 61705
## 6 Camaro 1 518. 93613. 23509. 4679. 65426.
# Compare total buzz (volume) before and after Super Bowl for each brand
buzz_comparison <- superbowl %>%
group_by(new_brand) %>%
summarise(
total_buzz_pre = sum(volume[superbowl == 0], na.rm = TRUE),
total_buzz_post = sum(volume[superbowl == 1], na.rm = TRUE)
)
print(buzz_comparison)
## # A tibble: 3 × 3
## new_brand total_buzz_pre total_buzz_post
## <chr> <int> <int>
## 1 Beetle 13674 47507
## 2 CR-Z 4686 16348
## 3 Camaro 350382 1029746
# Visualize trends in ad spend and buzz over time
library(ggplot2)
# Plot ad spend over time for each brand
ggplot(superbowl, aes(x = week_of, y = adspend, color = new_brand)) +
geom_line() +
geom_point() +
labs(title = "Ad Spend Over Time", x = "Week", y = "Ad Spend (in $1000s)") +
theme_minimal()
# Plot social media buzz (volume) over time for each brand
ggplot(superbowl, aes(x = week_of, y = volume, color = new_brand)) +
geom_line() +
geom_point() +
labs(title = "Social Media Buzz Over Time", x = "Week", y = "Buzz Volume") +
theme_minimal()
Interpretation of Results
Ad Spend Trends:
Social Media Buzz Trends:
Pre vs. Post Super Bowl Analysis:
Brand-Specific Insights: