One of the latest crazes to hit social media is a data visualisation tool called the "Bar Chart race". This shows the progression of a ranked list as time progresses. Charts ranging from city populations to YouTube channel subscriptions have been displayed on various social media sites.

The purpose of this document is to make one of these bar chart races for the most recent Premier League season (2019-2020). The data are sourced from football-data.co.uk.

suppressMessages(x <- read_csv("E0.csv"))
x
## # A tibble: 380 x 106
##    Div   Date  Time  HomeTeam AwayTeam  FTHG  FTAG FTR    HTHG  HTAG HTR  
##    <chr> <chr> <tim> <chr>    <chr>    <dbl> <dbl> <chr> <dbl> <dbl> <chr>
##  1 E0    09/0~ 20:00 Liverpo~ Norwich      4     1 H         4     0 H    
##  2 E0    10/0~ 12:30 West Ham Man City     0     5 A         0     1 A    
##  3 E0    10/0~ 15:00 Bournem~ Sheffie~     1     1 D         0     0 D    
##  4 E0    10/0~ 15:00 Burnley  Southam~     3     0 H         0     0 D    
##  5 E0    10/0~ 15:00 Crystal~ Everton      0     0 D         0     0 D    
##  6 E0    10/0~ 15:00 Watford  Brighton     0     3 A         0     1 A    
##  7 E0    10/0~ 17:30 Tottenh~ Aston V~     3     1 H         0     1 A    
##  8 E0    11/0~ 14:00 Leicest~ Wolves       0     0 D         0     0 D    
##  9 E0    11/0~ 14:00 Newcast~ Arsenal      0     1 A         0     0 D    
## 10 E0    11/0~ 16:30 Man Uni~ Chelsea      4     0 H         1     0 H    
## # ... with 370 more rows, and 95 more variables: Referee <chr>, HS <dbl>,
## #   AS <dbl>, HST <dbl>, AST <dbl>, HF <dbl>, AF <dbl>, HC <dbl>, AC <dbl>,
## #   HY <dbl>, AY <dbl>, HR <dbl>, AR <dbl>, B365H <dbl>, B365D <dbl>,
## #   B365A <dbl>, BWH <dbl>, BWD <dbl>, BWA <dbl>, IWH <dbl>, IWD <dbl>,
## #   IWA <dbl>, PSH <dbl>, PSD <dbl>, PSA <dbl>, WHH <dbl>, WHD <dbl>,
## #   WHA <dbl>, VCH <dbl>, VCD <dbl>, VCA <dbl>, MaxH <dbl>, MaxD <dbl>,
## #   MaxA <dbl>, AvgH <dbl>, AvgD <dbl>, AvgA <dbl>, `B365>2.5` <dbl>,
## #   `B365<2.5` <dbl>, `P>2.5` <dbl>, `P<2.5` <dbl>, `Max>2.5` <dbl>,
## #   `Max<2.5` <dbl>, `Avg>2.5` <dbl>, `Avg<2.5` <dbl>, AHh <dbl>,
## #   B365AHH <dbl>, B365AHA <dbl>, PAHH <dbl>, PAHA <dbl>, MaxAHH <dbl>,
## #   MaxAHA <dbl>, AvgAHH <dbl>, AvgAHA <dbl>, B365CH <dbl>, B365CD <dbl>,
## #   B365CA <dbl>, BWCH <dbl>, BWCD <dbl>, BWCA <dbl>, IWCH <dbl>, IWCD <dbl>,
## #   IWCA <dbl>, PSCH <dbl>, PSCD <dbl>, PSCA <dbl>, WHCH <dbl>, WHCD <dbl>,
## #   WHCA <dbl>, VCCH <dbl>, VCCD <dbl>, VCCA <dbl>, MaxCH <dbl>, MaxCD <dbl>,
## #   MaxCA <dbl>, AvgCH <dbl>, AvgCD <dbl>, AvgCA <dbl>, `B365C>2.5` <dbl>,
## #   `B365C<2.5` <dbl>, `PC>2.5` <dbl>, `PC<2.5` <dbl>, `MaxC>2.5` <dbl>,
## #   `MaxC<2.5` <dbl>, `AvgC>2.5` <dbl>, `AvgC<2.5` <dbl>, AHCh <dbl>,
## #   B365CAHH <dbl>, B365CAHA <dbl>, PCAHH <dbl>, PCAHA <dbl>, MaxCAHH <dbl>,
## #   MaxCAHA <dbl>, AvgCAHH <dbl>, AvgCAHA <dbl>

Each row of the dataset represents a match in the Premier League season. Columns include the home and away teams, the goals scored by each team and a number of various other statistics from each match. The primary concern is for the team, goal and full time result columns. Using this information, we can create new columns showing how many points are awarded for each team after each match.

#Adding columns indicating the home points and away points awarded fo each game
x <- x %>% mutate(HomePoints = ifelse(FTR == "H", 3, ifelse(FTR == "D", 1, 0)), AwayPoints = ifelse(FTR == "H", 0, ifelse(FTR == "D", 1, 3)))

#League table for home results only
suppressMessages(HomeTable <- x %>% group_by(HomeTeam) %>% summarise(HomeGames = n(), HomeWins = sum(FTR == "H"), HomeDraws = sum(FTR == "D"), HomeLosses = sum(FTR == "A"), HomeGoals = sum(FTHG), HomeConcede = sum(FTAG), HomeGD = HomeGoals - HomeConcede, HomePoints = sum(HomePoints)) %>% rename("Team" = "HomeTeam"))

#League table for away results only
suppressMessages(AwayTable <- x %>% group_by(AwayTeam) %>% summarise(AwayGames = n(), AwayWins = sum(FTR == "A"), AwayDraws = sum(FTR == "D"), AwayLosses = sum(FTR == "H"), AwayGoals = sum(FTAG), AwayConcede = sum(FTHG), AwayGD = AwayGoals - AwayConcede, AwayPoints = sum(AwayPoints)) %>% rename("Team" = "AwayTeam"))

#Merging the home and away tables
TotalTable <- merge(HomeTable, AwayTable, by = "Team") %>% mutate(Played = HomeGames + AwayGames, Wins = HomeWins + AwayWins, Draws = HomeDraws + AwayDraws, Losses = HomeLosses + AwayLosses, GF = HomeGoals + AwayGoals, GA = HomeConcede + AwayConcede, GD = HomeGD + AwayGD, Points = HomePoints + AwayPoints)

#Keeping only the total standings
LeagueTable <- TotalTable[,c(1,18:25)]
LeagueTable <- LeagueTable[order(-LeagueTable$Points, -LeagueTable$GD),]
LeagueTable
##                Team Played Wins Draws Losses  GF GA  GD Points
## 10        Liverpool     38   32     3      3  85 33  52     99
## 11         Man City     38   26     3      9 102 35  67     81
## 12       Man United     38   18    12      8  66 36  30     66
## 6           Chelsea     38   20     6     12  69 54  15     66
## 9         Leicester     38   18     8     12  67 41  26     62
## 17        Tottenham     38   16    11     11  61 47  14     59
## 20           Wolves     38   15    14      9  51 40  11     59
## 1           Arsenal     38   14    14     10  56 48   8     56
## 15 Sheffield United     38   14    12     12  39 39   0     54
## 5           Burnley     38   15     9     14  43 50  -7     54
## 16      Southampton     38   15     7     16  51 60  -9     52
## 8           Everton     38   13    10     15  44 56 -12     49
## 13        Newcastle     38   11    11     16  38 58 -20     44
## 7    Crystal Palace     38   11    10     17  31 50 -19     43
## 4          Brighton     38    9    14     15  39 54 -15     41
## 19         West Ham     38   10     9     19  49 62 -13     39
## 2       Aston Villa     38    9     8     21  41 67 -26     35
## 3       Bournemouth     38    9     7     22  40 65 -25     34
## 18          Watford     38    8    10     20  36 64 -28     34
## 14          Norwich     38    5     6     27  26 75 -49     21

The final standings show Liverpool as champions by a significant margin. Man City, Man United and Chelsea finished in the top 4, so qualify fro the Champions League next season. Norwich, Watford and Bournemouth finished in the bottom 3 so will be playing in the Championship next season.

#Create an empty list for storing a team's ranking every matchday
listofranks <- list()

#Creating a for loop to store the league table in the empty list after each match day
suppressMessages(for(i in 1:floor(nrow(x)/10)){z <- x %>% head(n = i*10) %>% mutate(HomePoints = ifelse(FTR == "H", 3, ifelse(FTR == "D", 1, 0)), AwayPoints = ifelse(FTR == "H", 0, ifelse(FTR == "D", 1, 3)))
HomeTabley <- z %>% group_by(HomeTeam) %>% summarise(HomeGames = n(), HomeWins = sum(FTR == "H"), HomeDraws = sum(FTR == "D"), HomeLosses = sum(FTR == "A"), HomeGoals = sum(FTHG), HomeConcede = sum(FTAG), HomeGD = HomeGoals - HomeConcede, HomePoints = sum(HomePoints)) %>% rename("Team" = "HomeTeam")
AwayTabley <- z %>% group_by(AwayTeam) %>% summarise(AwayGames = n(), AwayWins = sum(FTR == "A"), AwayDraws = sum(FTR == "D"), AwayLosses = sum(FTR == "H"), AwayGoals = sum(FTAG), AwayConcede = sum(FTHG), AwayGD = AwayGoals - AwayConcede, AwayPoints = sum(AwayPoints)) %>% rename("Team" = "AwayTeam")

#The merge includes 'all = TRUE' so that the full table can be formed after the first match day, where only 10 teams will be in the home table and the remaining teams will be in the away table
TotalTabley <- merge(HomeTabley, AwayTabley, by = "Team", all = TRUE) 

#The NAs created as a result of the previous merge are changed to 0s
TotalTabley[is.na(TotalTabley)] <- 0

TotalTabley <- TotalTabley %>% mutate(Played = HomeGames + AwayGames, Wins = HomeWins + AwayWins, Draws = HomeDraws + AwayDraws, Losses = HomeLosses + AwayLosses, GF = HomeGoals + AwayGoals, GA = HomeConcede + AwayConcede, GD = HomeGD + AwayGD, Points = HomePoints + AwayPoints)
LeagueTabley <- TotalTabley[,c(1,18:25)]
LeagueTabley <- LeagueTabley[order(-LeagueTabley$Points, -LeagueTabley$GD, -LeagueTabley$GF),] %>% mutate(Rank = 1:20, Week = i)
listofranks[[i]] <- LeagueTabley[,c(1,8:11)]})

#Using rbindlist from the data.table package to combine the list of datasets from before
df <- rbindlist(listofranks)
head(df)
##          Team GD Points Rank Week
## 1:   Man City  5      3    1    1
## 2: Man United  4      3    2    1
## 3:  Liverpool  3      3    3    1
## 4:   Brighton  3      3    4    1
## 5:    Burnley  3      3    5    1
## 6:  Tottenham  2      3    6    1

We now have a dataframe that shows each team ranking at each game week. It is in the format that is needed to create the bar chart race.

#Assigning colours for each team
vals <- c("Arsenal" = "#FF0000", "Aston Villa" = "#660033", "Bournemouth" = "#330000", "Brighton" = "#0066CC", "Burnley" = "#800000", "Chelsea" = "#000066", "Crystal Palace" = "#0033FF", "Everton" = "#003366", "Leicester" = "#000099", "Liverpool" = "#990000", "Man City" = "#0099CC", "Man United" = "#CC0000", "Newcastle" = "#000000", "Norwich" = "#009900", "Sheffield United" = "#FF0033", "Southampton" = "#FF3333", "Tottenham" = "#CCCCCC", "Watford" = "#FFFF00", "West Ham" = "#660033", "Wolves" = "#FF9900")

#Plotting all the different rankings for each gameweek
p <- ggplot(df, aes(Rank)) + 
      geom_tile(aes(y = Points / 2, height = Points, width = 0.9, fill = as.factor(Team)), alpha = 0.8, color = NA) + 
      coord_flip(clip = "off", expand = FALSE) + 
      geom_text(aes(y = 0, colour = as.factor(Team),label = paste(Team, " ")), size = 6, vjust = 0.2, hjust = 1) + 
      geom_text(aes(y = Points, label = paste(" ", Points), hjust = 0)) +
      scale_x_reverse() + 
      scale_fill_manual(values = vals) +
      scale_colour_manual(values = vals) +
      labs(title='Premier League 2019-2020',subtitle='Game Week: {round(frame_time,0)}') +
      transition_time(Week) + ease_aes("cubic-in-out") + theme(axis.line=element_blank(),                                                       axis.text.x=element_blank(),                                                     axis.text.y=element_blank(),                                                     axis.ticks=element_blank(),                                                      axis.title.x=element_blank(),                                                    axis.title.y=element_blank(),
      legend.position="none",
      panel.background=element_blank(),
      panel.border=element_blank(),
      panel.grid.major=element_blank(),
      panel.grid.minor=element_blank(),
      panel.grid.major.x = element_line( size=.1, color="grey" ),
      panel.grid.minor.x = element_line( size=.1, color="grey" ),
      plot.title=element_text(size=25, hjust=0.5, face="bold",  colour="grey",vjust=-1),
      plot.subtitle=element_text(size=16, hjust=0.5, face="italic",color="grey"),
      plot.caption =element_text(size=8, hjust=0.5, face="italic", color="grey"),
      plot.background=element_blank(),
      plot.margin = margin(2,2, 2, 6, "cm"))

#Changing animation parameters to make a clearer race
animate(p, nframes = 750, fps = 25, end_pause = 50, width = 800, height = 500)