# Read the data in
dat <- read.csv("nfl_pass_rush_receive_raw_data.csv")
# Reformat the dates
dates <- dat %>%
mutate(game_date = as.Date(game_date, format = "%m/%d/%Y"))
# Classify the seasons
seasons <- dates %>%
mutate(szn = case_when(game_date >= "2019-09-05" & game_date <
"2020-02-13" ~ "2019", game_date >= "2020-09-10" & game_date <
"2021-02-07" ~ "2020", game_date >= "2021-09-09" & game_date <
"2022-02-14" ~ "2021"))
# The code above took me about 2 hours to figure out and
# I'm so excited!! I did that so we could not only make
# comparisons about the team (and their players) with the
# highest receiving yards, but also make comparisons across
# the three most recent seasons.
recnew <- filter(seasons, rec > 12)
ggplot(recnew, aes(x = team, y = rec, fill = player)) + geom_bar(stat = "identity") +
facet_wrap(~szn) + scale_x_discrete("Team", guide = guide_axis(n.dodge = 2)) +
scale_y_continuous("Number of Receptions") + scale_fill_discrete(name = "Player") +
labs(title = "Los Angeles Chargers Lead in Receptions during 2019 and 2020 seasons",
subtitle = "What happened to Allen and Ekeler during the 2021-2022 season?")

# Okay this is a really great one! I think this tells us so
# much about how each of the Top 10 QBs in the 2021 season
# performs on each of these variables. I think I will add a
# few more of the variables so we can make more
# comparisons.
qbdata <- seasons %>%
filter(pos == "QB", szn == "2021") %>%
group_by(player) %>%
summarise(pass_cmp = mean(pass_cmp), pass_att = mean(pass_att),
pass_yds = mean(pass_yds), pass_td = mean(pass_td), targets = mean(targets),
pass_int = mean(pass_int), pass_sacked = mean(pass_sacked),
pass_rating = mean(pass_rating))
NFC <- qbdata %>%
filter((player == "Josh Allen") | (player == "Patrick Mahomes") |
(player == "Justin Herbert") | (player == "Lamar Jackson") |
(player == "Kyler Murray") | (player == "Joe Burrow") |
(player == "Jalen Hurts") | (player == "Tom Brady") |
(player == "Dak Prescott") | (player == "Russell Wilson"))
ggparcoord(NFC, columns = 2:9, groupColumn = "player") + labs(title = "Eagles QB Jalen Hurts Leads in Number of Targets",
subtitle = "But comes in last for number of touchdowns")

ggsave("qbs.png")
kupp <- filter(seasons, player == "Cooper Kupp")
ggplot(kupp, aes(x = szn, y = rec_yds, fill = Surface)) + geom_bar(stat = "identity")

## This is pretty funny - it allows us to see the # of
## receiving yards Kupp had separated out by the surface
## type. He apparently is a fan of the matrix turf (new
## stadium was built for the 2021 season with the new turf,
## so that makes sense). I want to see how Kupp performs
## when he's home versus away
home <- kupp %>%
mutate(location = case_when(team == home_team ~ "Home", team ==
vis_team ~ "Away"))
ggplot(home, aes(x = location, y = rec_yds, fill = Vegas_Favorite)) +
geom_bar(stat = "identity")

# More yards away than home...I added the Vegas Favorite
# variable afterward just for fun - the Rams were clearly
# predicted to win most games!
ggplot(home, aes(x = szn, y = rec_yds, fill = location)) + geom_boxplot() # tried geom_violin before, but we weren't really able to see the outliers and variation that takes place

## There are some outliers here, let's look at those and
## see what we can figure out.
outliers <- home %>%
filter(szn == "2020" & location == "Home" & rec_yds > 50 |
szn == "2021" & location == "Away" & rec_yds > 175)
## now we can see which teams Kupp completed more yards
## against that we see in the boxplot
ggplot(home, aes(x = szn, y = rec_yds, fill = location)) + geom_boxplot() +
theme_bw() + scale_x_discrete("Season") + scale_y_continuous("Receiving Yards") +
scale_fill_discrete(name = "Location") + labs(title = "Kupp's Performance for Home and Away Games",
subtitle = "Equal During Record 2021 Season")

# New dataset only comparing Rams and Chiefs
matchup <- seasons %>%
filter(team == "LAR" | team == "KAN") %>%
filter(team == home_team) %>%
filter(szn == "2019" | szn == "2020" | szn == "2021") %>%
filter(pass_target_yds > 0) %>%
rename(Quarterback = player) %>%
rename(Home_Score = home_score) %>%
rename(Opponent_Score = vis_score) %>%
rename(QB_Passing_Yards = pass_target_yds)
matchup2 <- seasons %>%
filter(team == "LAR" | team == "KAN") %>%
filter(team == home_team) %>%
filter(szn == "2019" | szn == "2020" | szn == "2021") %>%
filter(pass_target_yds > 0) %>%
rename(Quarterback = player) %>%
rename(Home_Score = home_score) %>%
rename(Opponent_Score = vis_score) %>%
rename(QB_Passing_Yards = pass_target_yds) %>%
rename(Season = szn) %>%
filter(Quarterback == "Jared Goff" | Quarterback == "Matthew Stafford" |
Quarterback == "Patrick Mahomes" | Quarterback == "Matt Moore")
preplot <- ggplot(matchup2, aes(x = team, y = QB_Passing_Yards,
fill = Quarterback, label = Season)) + geom_bar(stat = "identity") +
scale_x_discrete("Team") + scale_y_continuous("Total QB Passing Yards") +
labs(title = "Chiefs (KAN) and Rams (LAR) Quarterback Performance") +
theme_classic()
ggplotly(preplot)
plot <- ggplot(matchup, aes(x = Home_Score, y = Opponent_Score,
size = QB_Passing_Yards, colour = team, label = Quarterback)) +
geom_point(alpha = 0.9, show.legend = FALSE) + facet_wrap(~szn) +
theme_light() + labs(title = "Comparing Chiefs (KAN) and Rams (LAR) Home Game Matchups") +
scale_color_hue() + scale_x_continuous("Chiefs/Rams Home Score") +
scale_y_continuous("Opponent Score")
ggplotly(plot)