This is how I went about doing assignment #2
bold
my_path <- "U:/"
setwd(my_path)
library(ggplot2)
library(httr)
library(lubridate)
library(dplyr)
library(data.table)
df <- fread("gamesgithub.csv")
cols_to_use <- c("season", "away_team", "away_score","home_team", "home_score")
df <- subset(df, select = cols_to_use)
df$winner <- ifelse(df$away_score > df$ home_score, df$away_team, df$home_team)
df$loser <- ifelse(df$away_score > df$ home_score, df$home_team, df$away_team)
df <- na.omit(df)
View(df)
df$score_diff <- abs(df$away_score - df$home_score)
winners <- df %>%
mutate(team = winner)%>%
group_by(season, team) %>%
summarise(win_count = n(), .groups = "keep") %>%
data.frame()
head(winners,10)
## season team win_count
## 1 1999 ARI 6
## 2 1999 ATL 5
## 3 1999 BAL 8
## 4 1999 BUF 11
## 5 1999 CAR 8
## 6 1999 CHI 6
## 7 1999 CIN 4
## 8 1999 CLE 2
## 9 1999 DAL 8
## 10 1999 DEN 6
losers <- df %>%
mutate(team = loser) %>%
group_by(season, team) %>%
summarise(lose_count = n(), .groups = "keep") %>%
data.frame()
df1 <- merge(x= winners, y = losers, by=c("season", "team"), all = TRUE)
df1[is.na(df1)] <- 0
table(df1$season)
##
## 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
## 31 31 31 32 32 32 32 32 32 32 32 32 32 32 32 32
## 2015 2016 2017 2018 2019 2020 2021 2022
## 32 32 32 32 32 32 32 32
table(df1$season, df1$team)
##
## ARI ATL BAL BUF CAR CHI CIN CLE DAL DEN DET GB HOU IND JAX KC LA LAC LV
## 1999 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 0 0
## 2000 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 0 0
## 2001 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 0 0
## 2002 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0
## 2003 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0
## 2004 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0
## 2005 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0
## 2006 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0
## 2007 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0
## 2008 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0
## 2009 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0
## 2010 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0
## 2011 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0
## 2012 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0
## 2013 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0
## 2014 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0
## 2015 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0
## 2016 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
## 2017 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
## 2018 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
## 2019 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
## 2020 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2021 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2022 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##
## MIA MIN NE NO NYG NYJ OAK PHI PIT SD SEA SF STL TB TEN WAS
## 1999 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2000 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2001 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2002 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2003 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2004 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2005 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2006 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2007 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2008 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2009 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2010 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2011 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2012 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2013 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2014 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2015 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2016 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
## 2017 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1
## 2018 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1
## 2019 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1
## 2020 1 1 1 1 1 1 0 1 1 0 1 1 0 1 1 1
## 2021 1 1 1 1 1 1 0 1 1 0 1 1 0 1 1 1
## 2022 1 1 1 1 1 1 0 1 1 0 1 1 0 1 1 1
df2 <- df1 %>%
mutate(team=recode(team, "OAK" = "LV", "SD" = "LAC", "STL" = "LA"))%>%
group_by(team)%>%
summarise(wins = sum(win_count), losses = sum(lose_count), .groups = 'keep') %>%
mutate(total_games = wins + losses,
win_pc = 100*wins/total_games,
loss_pc = 100*losses,total_games)%>%
data.frame()
df2
## team wins losses total_games win_pc loss_pc
## 1 ARI 170 212 382 44.50262 21200
## 2 ATL 187 199 386 48.44560 19900
## 3 BAL 235 164 399 58.89724 16400
## 4 BUF 177 203 380 46.57895 20300
## 5 CAR 184 203 387 47.54522 20300
## 6 CHI 182 199 381 47.76903 19900
## 7 CIN 170 213 383 44.38642 21300
## 8 CLE 124 251 375 33.06667 25100
## 9 DAL 199 185 384 51.82292 18500
## 10 DEN 209 178 387 54.00517 17800
## 11 DET 131 245 376 34.84043 24500
## 12 GB 249 152 401 62.09476 15200
## 13 HOU 144 190 334 43.11377 19000
## 14 IND 244 156 400 61.00000 15600
## 15 JAX 151 229 380 39.73684 22900
## 16 KC 212 179 391 54.21995 17900
## 17 LA 194 198 392 49.48980 19800
## 18 LAC 192 192 384 50.00000 19200
## 19 LV 155 226 381 40.68241 22600
## 20 MIA 182 197 379 48.02111 19700
## 21 MIN 198 189 387 51.16279 18900
## 22 NE 293 121 414 70.77295 12100
## 23 NO 218 173 391 55.75448 17300
## 24 NYG 187 201 388 48.19588 20100
## 25 NYJ 169 215 384 44.01042 21500
## 26 PHI 226 173 399 56.64160 17300
## 27 PIT 250 149 399 62.65664 14900
## 28 SEA 225 175 400 56.25000 17500
## 29 SF 180 209 389 46.27249 20900
## 30 TB 184 203 387 47.54522 20300
## 31 TEN 205 185 390 52.56410 18500
## 32 WAS 159 221 380 41.84211 22100
ggplot(df2, aes(x=team, y = wins, fill = win_pc))+
geom_bar(stat="identity")
More Text