library(engsoccerdata)
library(rvest)
library(tidyverse)
library(data.table)
library(magrittr)
library(zoo)
#download and munge data as per https://github.com/hkhare42/recipe_testing/blob/master/adjusted_table.ipynb
data <- read.csv('http://www.football-data.co.uk/mmz4281/1819/E0.csv') %>%
#take only columns we care about
select(date = Date, home = HomeTeam, away = AwayTeam, home_goals = FTHG, away_goals = FTAG) %>%
#convert dates and filter to before last weekend
mutate(date = as.Date(date, "%d/%m/%y")) %>%
filter(date < as.Date("2018-11-24")) %>%
#melt data by team and bind back opponents
setDT() %>%
melt.data.table(measure.vars = c("home", "away"), variable.name = "venue", value.name = "team") %>%
.[, opponent := team[order(-as.numeric(venue), date)]] %>%
#calculate points per match
.[(venue == "home" & home_goals > away_goals) | (venue == "away" & home_goals < away_goals), points := 3] %>%
.[(venue == "home" & home_goals < away_goals) | (venue == "away" & home_goals > away_goals), points := 0] %>%
.[home_goals == away_goals, points := 1] %>%
#reselect columns
.[, c("team", "venue", "opponent", "points")]
#get the teams
teams <- data$team %>% unique() %>% sort()
#create a matrix of all team combinations
team_matrix <- matrix(data = 0,
nrow = length(teams)/2 * (length(teams)-1),
ncol = length(teams),
#name columbs as single teams and rows as combinations
dimnames = list(
c(lapply(seq(length(teams)/2 * (length(teams)-1)), function(n) {
paste0(combn(teams, 2)[,n], collapse = "-")
})),
c(teams)
))
#add in 1/ -1 for each team combinations
for(row in seq(nrow(team_matrix))) {
sub_teams <- str_split(rownames(team_matrix)[row], "-")[[1]]
team_matrix[row,unlist(lapply(sub_teams, grep, colnames(team_matrix)))] <- c(1, -1)
}
#get the points per game difference for any team pair combination
get_ppg_diff <- function(row) {
#the pair of teams
sub_teams <- str_split(rownames(team_matrix)[row], "-")[[1]]
#the matches they have in common
common_matches <- data[team %in% sub_teams] %>%
.[, opponent := replace(opponent, opponent %in% sub_teams, "vs.")] %>%
#shouldn't include single head to head matches but will do for consistency with published work
.[duplicated(paste(venue, opponent))|duplicated(paste(venue, opponent), fromLast = TRUE)|opponent == "vs."]
#.[duplicated(paste(venue, opponent))|duplicated(paste(venue, opponent), fromLast = TRUE)]
#calculate the points per game
if(nrow(common_matches) > 0) {
points <- common_matches %>% group_by(team) %>% summarise(sum(points))
ppg_diff <- -diff(points$`sum(points)`) / (nrow(common_matches)/2)
} else {
ppg_diff <- 0
}
return(ppg_diff)
}
#run the function
ppg_differences <- lapply(seq(nrow(team_matrix)), get_ppg_diff)
#least squares fit using the 1/-1 matrix and the points per game differences
lst_sq_fit <- lsfit(team_matrix, unlist(ppg_differences))
## Warning in lsfit(team_matrix, unlist(ppg_differences)): 'X' matrix was
## collinear
#take the coefficients (ignoring the intercept)
lst_sq_coefficients <- lst_sq_fit$coefficients %>%
.[2:length(.)]
lst_sq_coefficients
## Arsenal Bournemouth Brighton Burnley Cardiff
## 0.77539683 0.05681704 -0.23688179 -0.89296157 -1.00046992
## Chelsea Crystal Palace Everton Fulham Huddersfield
## 0.74511696 -0.68477235 0.16010025 -1.29455096 -0.87860693
## Leicester Liverpool Man City Man United Newcastle
## -0.03932957 1.10078112 1.32493943 0.38302632 -0.88852966
## Southampton Tottenham Watford West Ham Wolves
## -1.00163325 0.97050125 0.04180242 -0.34511069 0.00000000
#take the coefficients * the mean point per game for the last team (coefficient == 0) and multiply by matches played
points <- (lst_sq_coefficients + mean(data[team == last(names(lst_sq_coefficients))]$points)) * table(data$team)
#sort the new table
sort(points, decreasing = TRUE)
##
## Man City Liverpool Tottenham Arsenal Chelsea
## 31.8992732 29.2093734 27.6460150 25.3047619 24.9414035
## Man United Everton Bournemouth Watford Wolves
## 20.5963158 17.9212030 16.6818045 16.5016291 16.0000000
## Leicester Brighton West Ham Crystal Palace Huddersfield
## 15.5280451 13.1574185 11.8586717 7.7827318 5.4567168
## Newcastle Burnley Cardiff Southampton Fulham
## 5.3376441 5.2844612 3.9943609 3.9804010 0.4653885