Information on the assignment/code
my_path <- "C:/Users/nrhar/OneDrive/Documents/SportsAnalytics"
setwd(my_path)
library(dplyr)
library(data.table)
library(lubridate)
library(httr)
library(ggplot2)
library(tidytext)
library(RColorBrewer)
df7 <- fread("games1.csv")
cols_to_use <- c("season", "away_team", "away_score", "home_team", "home_score")
df7 <- subset(df7, select = cols_to_use)
df7$winner <- ifelse(df7$away_score > df7$home_score, df7$away_team, df7$home_team)
df7$loser <- ifelse(df7$away_score < df7$home_score, df7$away_team, df7$home_team)
df7 <- na.omit(df7)
df7$score_diff <- abs(df7$away_score - df7$home_score)
winners <- df7 %>%
mutate(team = winner) %>%
group_by(season, team) %>%
summarise(win_count = n(), .groups = "keep") %>%
data.frame()
losers <- df7 %>%
mutate(team = loser) %>%
group_by(season, team) %>%
summarise(loss_count = n(), .groups = "keep") %>%
data.frame()
df8 <- merge(x = winners, y = losers, by=c("season", "team"), all=TRUE)
df8[is.na(df8)] <- 0
df9 <- df8 %>%
mutate(team = recode(team, 'OAK' = 'LV', 'SD' = 'LAC', 'STL' = 'LA')) %>%
group_by(team) %>%
summarise(wins = sum(win_count), losses = sum(loss_count), .groups = 'keep') %>%
mutate(total_games = wins + losses,
win_pc = round(100*wins/total_games, 0),
loss_pc = round(100*losses/total_games, 0)) %>%
data.frame()
ggplot(df9, aes(x = reorder(team, -wins), y = wins, fill = win_pc)) +
geom_bar(stat="identity") +
labs(x = "Team", y = "Win Count", title = "Wins by Team", fill = "Win Percentage") +
geom_text(aes(label = paste0(win_pc, "%")), vjust = -0.5) +
scale_fill_continuous(
limits = c(30, 80),
labels = paste0(seq(30, 80, 10), "%"),
breaks = seq(30, 80, 10),
low = "red",
high = "dark green") +
theme(plot.title = element_text(hjust=0.5))
More about code
df9 <- df8 %>%
filter(season >= 2019 & season <= 2021) %>%
mutate(team = recode(team, 'OAK' = 'LV', 'SD' = 'LAC', 'STL' = 'LA')) %>%
group_by(season) %>%
mutate(total = win_count + loss_count,
win_pc = round(100*win_count/total, 0),
loss_pc = round(100*loss_count/total, 0),
team = reorder_within(team, win_count, season),
team_abbr = substr(team, 0, nchar(as.character(team))-7),
rank = rank(-win_pc, ties.method = "random")) %>%
data.frame()
ggplot(df9, aes(x = reorder(team, -win_count), y = win_count, fill = win_pc)) +
geom_bar(stat="identity") +
geom_text(aes(label = paste0(win_pc, "%")), vjust = -0.5, size = 3) +
labs(x = "Team", y = "Win Count", title = "Wins by Team", fill = "Win Percentage") +
scale_y_continuous(limits = c(0, max(df9$win_count)*1.1)) +
scale_x_reordered() +
scale_fill_continuous(
limits = c(1, 90),
labels = paste0(seq(0, 90, 10), "%"),
breaks = seq(0, 90, 10),
low = "red",
high = "dark green") +
facet_wrap(ncol = 1, nrow = 3, ~season, scales = 'free') +
theme(plot.title = element_text(hjust=0.5))
cols <- colorRampPalette(brewer.pal(8, "Set2"))
myPal <- cols(32)
ggplot(df9, aes(x = season, y = rank, group = team_abbr)) +
geom_line(aes(color = team_abbr), size = 2) +
geom_point(shape = 21, size = 4, fill = "white") +
scale_y_reverse(breaks = seq(max(df9$rank), 1, -1)) +
geom_text(data = df9 %>% filter(season == min(season)),
aes(x = season, y = rank, label = team_abbr),
size = 3, hjust = 1.5) +
geom_text(data = df9 %>% filter(season == max(season)),
aes(x = season, y = rank, label = team_abbr),
size = 3, hjust = -0.5) +
scale_x_continuous(breaks = min(df9$season):max(df9$season),
labels = as.character(min(df9$season):max(df9$season))) +
scale_color_manual(values = myPal) +
labs(title = "Bump Chart for Teams by Win Percentage",
x = "Season",
y = "Ranks",
colour = "Teams") +
theme(plot.title = element_text(hjust = 0.5))
More stuff goes here if desired