R Markdown

This is how I went about doing assignment #2

bold

  • hi
  • you
      my_path <- "U:/"
      setwd(my_path)
      library(ggplot2)
      library(httr)
      library(lubridate)
      library(dplyr)
      library(data.table)
      
      
      df <- fread("gamesgithub.csv")
      
      cols_to_use <- c("season", "away_team", "away_score","home_team", "home_score")
      
      df <- subset(df, select = cols_to_use)
      
      
      df$winner <- ifelse(df$away_score > df$ home_score, df$away_team, df$home_team)
      df$loser <- ifelse(df$away_score > df$ home_score, df$home_team, df$away_team)
      
      df <- na.omit(df)
      
      View(df)
      
      
      df$score_diff <- abs(df$away_score - df$home_score)
      
      winners <- df %>%
        mutate(team = winner)%>%
        group_by(season, team) %>%
        summarise(win_count = n(), .groups = "keep") %>%
        data.frame()
      head(winners,10)
      ##    season team win_count
      ## 1    1999  ARI         6
      ## 2    1999  ATL         5
      ## 3    1999  BAL         8
      ## 4    1999  BUF        11
      ## 5    1999  CAR         8
      ## 6    1999  CHI         6
      ## 7    1999  CIN         4
      ## 8    1999  CLE         2
      ## 9    1999  DAL         8
      ## 10   1999  DEN         6
      losers <- df %>%
        mutate(team = loser) %>%
        group_by(season, team) %>%
        summarise(lose_count = n(), .groups = "keep") %>%
        data.frame()
      
      df1 <- merge(x= winners, y = losers, by=c("season", "team"), all = TRUE)
      
      df1[is.na(df1)] <- 0
      
      table(df1$season)
      ## 
      ## 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 
      ##   31   31   31   32   32   32   32   32   32   32   32   32   32   32   32   32 
      ## 2015 2016 2017 2018 2019 2020 2021 2022 
      ##   32   32   32   32   32   32   32   32
      table(df1$season, df1$team)
      ##       
      ##        ARI ATL BAL BUF CAR CHI CIN CLE DAL DEN DET GB HOU IND JAX KC LA LAC LV
      ##   1999   1   1   1   1   1   1   1   1   1   1   1  1   0   1   1  1  0   0  0
      ##   2000   1   1   1   1   1   1   1   1   1   1   1  1   0   1   1  1  0   0  0
      ##   2001   1   1   1   1   1   1   1   1   1   1   1  1   0   1   1  1  0   0  0
      ##   2002   1   1   1   1   1   1   1   1   1   1   1  1   1   1   1  1  0   0  0
      ##   2003   1   1   1   1   1   1   1   1   1   1   1  1   1   1   1  1  0   0  0
      ##   2004   1   1   1   1   1   1   1   1   1   1   1  1   1   1   1  1  0   0  0
      ##   2005   1   1   1   1   1   1   1   1   1   1   1  1   1   1   1  1  0   0  0
      ##   2006   1   1   1   1   1   1   1   1   1   1   1  1   1   1   1  1  0   0  0
      ##   2007   1   1   1   1   1   1   1   1   1   1   1  1   1   1   1  1  0   0  0
      ##   2008   1   1   1   1   1   1   1   1   1   1   1  1   1   1   1  1  0   0  0
      ##   2009   1   1   1   1   1   1   1   1   1   1   1  1   1   1   1  1  0   0  0
      ##   2010   1   1   1   1   1   1   1   1   1   1   1  1   1   1   1  1  0   0  0
      ##   2011   1   1   1   1   1   1   1   1   1   1   1  1   1   1   1  1  0   0  0
      ##   2012   1   1   1   1   1   1   1   1   1   1   1  1   1   1   1  1  0   0  0
      ##   2013   1   1   1   1   1   1   1   1   1   1   1  1   1   1   1  1  0   0  0
      ##   2014   1   1   1   1   1   1   1   1   1   1   1  1   1   1   1  1  0   0  0
      ##   2015   1   1   1   1   1   1   1   1   1   1   1  1   1   1   1  1  0   0  0
      ##   2016   1   1   1   1   1   1   1   1   1   1   1  1   1   1   1  1  1   0  0
      ##   2017   1   1   1   1   1   1   1   1   1   1   1  1   1   1   1  1  1   1  0
      ##   2018   1   1   1   1   1   1   1   1   1   1   1  1   1   1   1  1  1   1  0
      ##   2019   1   1   1   1   1   1   1   1   1   1   1  1   1   1   1  1  1   1  0
      ##   2020   1   1   1   1   1   1   1   1   1   1   1  1   1   1   1  1  1   1  1
      ##   2021   1   1   1   1   1   1   1   1   1   1   1  1   1   1   1  1  1   1  1
      ##   2022   1   1   1   1   1   1   1   1   1   1   1  1   1   1   1  1  1   1  1
      ##       
      ##        MIA MIN NE NO NYG NYJ OAK PHI PIT SD SEA SF STL TB TEN WAS
      ##   1999   1   1  1  1   1   1   1   1   1  1   1  1   1  1   1   1
      ##   2000   1   1  1  1   1   1   1   1   1  1   1  1   1  1   1   1
      ##   2001   1   1  1  1   1   1   1   1   1  1   1  1   1  1   1   1
      ##   2002   1   1  1  1   1   1   1   1   1  1   1  1   1  1   1   1
      ##   2003   1   1  1  1   1   1   1   1   1  1   1  1   1  1   1   1
      ##   2004   1   1  1  1   1   1   1   1   1  1   1  1   1  1   1   1
      ##   2005   1   1  1  1   1   1   1   1   1  1   1  1   1  1   1   1
      ##   2006   1   1  1  1   1   1   1   1   1  1   1  1   1  1   1   1
      ##   2007   1   1  1  1   1   1   1   1   1  1   1  1   1  1   1   1
      ##   2008   1   1  1  1   1   1   1   1   1  1   1  1   1  1   1   1
      ##   2009   1   1  1  1   1   1   1   1   1  1   1  1   1  1   1   1
      ##   2010   1   1  1  1   1   1   1   1   1  1   1  1   1  1   1   1
      ##   2011   1   1  1  1   1   1   1   1   1  1   1  1   1  1   1   1
      ##   2012   1   1  1  1   1   1   1   1   1  1   1  1   1  1   1   1
      ##   2013   1   1  1  1   1   1   1   1   1  1   1  1   1  1   1   1
      ##   2014   1   1  1  1   1   1   1   1   1  1   1  1   1  1   1   1
      ##   2015   1   1  1  1   1   1   1   1   1  1   1  1   1  1   1   1
      ##   2016   1   1  1  1   1   1   1   1   1  1   1  1   0  1   1   1
      ##   2017   1   1  1  1   1   1   1   1   1  0   1  1   0  1   1   1
      ##   2018   1   1  1  1   1   1   1   1   1  0   1  1   0  1   1   1
      ##   2019   1   1  1  1   1   1   1   1   1  0   1  1   0  1   1   1
      ##   2020   1   1  1  1   1   1   0   1   1  0   1  1   0  1   1   1
      ##   2021   1   1  1  1   1   1   0   1   1  0   1  1   0  1   1   1
      ##   2022   1   1  1  1   1   1   0   1   1  0   1  1   0  1   1   1
      df2 <- df1 %>%
        mutate(team=recode(team, "OAK" = "LV", "SD" = "LAC", "STL" = "LA"))%>%
        group_by(team)%>%
        summarise(wins = sum(win_count), losses = sum(lose_count), .groups = 'keep') %>%
        mutate(total_games = wins + losses,
               win_pc = 100*wins/total_games,
               loss_pc = 100*losses,total_games)%>%
        data.frame()
      df2
      ##    team wins losses total_games   win_pc loss_pc
      ## 1   ARI  170    212         382 44.50262   21200
      ## 2   ATL  187    199         386 48.44560   19900
      ## 3   BAL  235    164         399 58.89724   16400
      ## 4   BUF  177    203         380 46.57895   20300
      ## 5   CAR  184    203         387 47.54522   20300
      ## 6   CHI  182    199         381 47.76903   19900
      ## 7   CIN  170    213         383 44.38642   21300
      ## 8   CLE  124    251         375 33.06667   25100
      ## 9   DAL  199    185         384 51.82292   18500
      ## 10  DEN  209    178         387 54.00517   17800
      ## 11  DET  131    245         376 34.84043   24500
      ## 12   GB  249    152         401 62.09476   15200
      ## 13  HOU  144    190         334 43.11377   19000
      ## 14  IND  244    156         400 61.00000   15600
      ## 15  JAX  151    229         380 39.73684   22900
      ## 16   KC  212    179         391 54.21995   17900
      ## 17   LA  194    198         392 49.48980   19800
      ## 18  LAC  192    192         384 50.00000   19200
      ## 19   LV  155    226         381 40.68241   22600
      ## 20  MIA  182    197         379 48.02111   19700
      ## 21  MIN  198    189         387 51.16279   18900
      ## 22   NE  293    121         414 70.77295   12100
      ## 23   NO  218    173         391 55.75448   17300
      ## 24  NYG  187    201         388 48.19588   20100
      ## 25  NYJ  169    215         384 44.01042   21500
      ## 26  PHI  226    173         399 56.64160   17300
      ## 27  PIT  250    149         399 62.65664   14900
      ## 28  SEA  225    175         400 56.25000   17500
      ## 29   SF  180    209         389 46.27249   20900
      ## 30   TB  184    203         387 47.54522   20300
      ## 31  TEN  205    185         390 52.56410   18500
      ## 32  WAS  159    221         380 41.84211   22100
      ggplot(df2, aes(x=team, y = wins, fill = win_pc))+
        geom_bar(stat="identity")

Diffferent Area to explain

More Text