This is how I went about doing my assignment…
2nd paragraph… paragraph
my_path <- "U:/"
setwd(my_path)
library(data.table)
library(ggplot2)
library(dplyr)
library(tidytext)
library(RColorBrewer)
df <- fread("GamesGitHub.csv")
cols_to_use <- c("season", "away_team", "away_score", "home_team", "home_score")
df <- subset(df, select = cols_to_use)
df$winner <- ifelse(df$away_score > df$home_score, df$away_team, df$home_team)
df$loser <- ifelse(df$away_score > df$home_score, df$home_team, df$away_team)
df <- na.omit(df)
df$score_diff <- abs(df$away_score - df$home_score)
table(df$score_diff == 0)
##
## FALSE TRUE
## 6188 13
winners <- df %>%
mutate(team = winner) %>%
group_by(season, team) %>%
summarise(win_count = n(), .groups = "keep" ) %>%
data.frame()
losers <- df %>%
mutate(team = loser) %>%
group_by(season, team) %>%
summarise(lose_count = n(), .groups = "keep" ) %>%
data.frame()
head(losers, 10)
## season team lose_count
## 1 1999 ARI 10
## 2 1999 ATL 11
## 3 1999 BAL 8
## 4 1999 BUF 6
## 5 1999 CAR 8
## 6 1999 CHI 10
## 7 1999 CIN 12
## 8 1999 CLE 14
## 9 1999 DAL 9
## 10 1999 DEN 10
df1 <- merge(x = winners, y = losers, by = c("season", "team"), all = TRUE)
df1[is.na(df1)] <- 0
df2 <- df1 %>%
mutate(team = recode(team, 'OAK' = 'LV', 'SD' = 'LAC', 'STL' = 'LA') ) %>%
group_by(team) %>%
summarise(wins = sum(win_count), losses = sum(lose_count), .groups = "keep") %>%
mutate(total_games = wins + losses,
win_percentage = round(100*wins/total_games, 0),
loss_percentage = round(100*losses/total_games, 0)) %>%
data.frame()
ggplot(df2, aes(x = reorder(team, -wins), y = wins, fill = win_percentage)) +
geom_bar(stat = "identity") +
labs(x = "team", y = "win count", title = "Wins by Team", fill = "Win Percentage") +
geom_text(aes(label = paste0(win_percentage, "%")), vjust = -0.5) +
scale_fill_continuous(
limits = c(31,80),
labels = paste0(seq(30,80,10), "%"),
breaks = seq(30,80,10),
low = "red",
high = "dark green") +
theme(plot.title = element_text(hjust = 0.5))
More paragraph text,,,
df2 <- df1 %>%
filter(season >= 2019 & season <= 2021) %>%
mutate(team = recode(team, 'OAK' = 'LV', 'SD' = 'LAC', 'STL' = 'LA') ) %>%
group_by(season) %>%
mutate(total = win_count + lose_count,
win_percentage = round(100*win_count/total, 0),
loss_percentage = round(100*lose_count/total, 0),
team = reorder_within(team, win_count, season),
team_abbr = substr(team, 0, nchar(as.character(team))-7),
rank = rank(-win_percentage, ties.method = "random" )) %>%
data.frame()
df2$team
## [1] ARI___2019 ATL___2019 BAL___2019 BUF___2019 CAR___2019 CHI___2019
## [7] CIN___2019 CLE___2019 DAL___2019 DEN___2019 DET___2019 GB___2019
## [13] HOU___2019 IND___2019 JAX___2019 KC___2019 LA___2019 LAC___2019
## [19] MIA___2019 MIN___2019 NE___2019 NO___2019 NYG___2019 NYJ___2019
## [25] LV___2019 PHI___2019 PIT___2019 SEA___2019 SF___2019 TB___2019
## [31] TEN___2019 WAS___2019 ARI___2020 ATL___2020 BAL___2020 BUF___2020
## [37] CAR___2020 CHI___2020 CIN___2020 CLE___2020 DAL___2020 DEN___2020
## [43] DET___2020 GB___2020 HOU___2020 IND___2020 JAX___2020 KC___2020
## [49] LA___2020 LAC___2020 LV___2020 MIA___2020 MIN___2020 NE___2020
## [55] NO___2020 NYG___2020 NYJ___2020 PHI___2020 PIT___2020 SEA___2020
## [61] SF___2020 TB___2020 TEN___2020 WAS___2020 ARI___2021 ATL___2021
## [67] BAL___2021 BUF___2021 CAR___2021 CHI___2021 CIN___2021 CLE___2021
## [73] DAL___2021 DEN___2021 DET___2021 GB___2021 HOU___2021 IND___2021
## [79] JAX___2021 KC___2021 LA___2021 LAC___2021 LV___2021 MIA___2021
## [85] MIN___2021 NE___2021 NO___2021 NYG___2021 NYJ___2021 PHI___2021
## [91] PIT___2021 SEA___2021 SF___2021 TB___2021 TEN___2021 WAS___2021
## 96 Levels: CIN___2019 DET___2019 WAS___2019 NYG___2019 ... LA___2021
ggplot(df2, aes(x = reorder(team, -win_count), y = win_count, fill = win_percentage)) +
geom_bar(stat = "identity") +
geom_text(aes(label = paste0(win_percentage, "%")), vjust = -0.5, size = 3 ) +
labs(x = "team", y = "win count", title = "Wins by Team", fill = "Win Percentage") +
scale_y_continuous(limits = c(0, max(df2$win_count)*1.1) ) +
scale_x_reordered() +
scale_fill_continuous(
limits = c(1,90),
labels = paste0(seq(0,90,10), "%"),
breaks = seq(0,90,10),
low = "red",
high = "dark green") +
facet_wrap(ncol = 1, nrow = 8, ~season, scales = "free") +
theme(plot.title = element_text(hjust = 0.5))
cols <- colorRampPalette(brewer.pal(8,"Set2") )
my_Pal <- cols(32)
ggplot(df2, aes(x = season, y = rank, group = team_abbr) ) +
geom_line(aes(color = team_abbr), size = 2 ) +
geom_point(shape = 21, size = 4, fill = "white") +
scale_y_reverse(breaks = seq(max(df2$rank), 1, -1) ) +
geom_text(data = df2 %>% filter(season == min(season)),
aes(x = season - 0.05,
y = rank,
label = team_abbr),
size = 3,
hjust = 1) +
geom_text(data = df2 %>% filter(season == max(season)),
aes(x = season - 0.05,
y = rank,
label = team_abbr),
size = 3,
hjust = 0) +
scale_x_continuous(breaks = min(df2$season):max(df2$season),
labels = as.character(min(df2$season):max(df2$season))) +
scale_color_manual(values = my_Pal) +
labs(title = "Bump Chart",
x = "Season",
y = "Rank",
color = "Teams") +
theme(plot.title = element_text(hjust = 0.5))
More stuff goes here if u want it.