StatsBomb is a football analytics provider. In order to welcome the future analysts, they committed to provide free data. Here, I’m trying to explore on said data.
# install.packages("devtools")
# install.packages("remotes")
# remotes::install_version("SDMTools", "1.1-221")
# devtools::install_github("statsbomb/StatsBombR")# install.packages("ggsoccer")
# install.packages("here")# if (!require("devtools")) install.packages("devtools")
# devtools::install_github("jogall/soccermatics")# library(devtools)
# library(remotes)
# library(SDMTools)
library(StatsBombR)## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: stringi
## Loading required package: stringr
## Loading required package: tibble
## Loading required package: rvest
## Loading required package: RCurl
## Loading required package: doParallel
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
## Loading required package: httr
## Loading required package: jsonlite
## Loading required package: purrr
##
## Attaching package: 'purrr'
## The following object is masked from 'package:jsonlite':
##
## flatten
## The following objects are masked from 'package:foreach':
##
## accumulate, when
## Loading required package: sp
## Loading required package: tidyr
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:RCurl':
##
## complete
## Warning: replacing previous import 'foreach::when' by 'purrr::when' when loading
## 'StatsBombR'
## Warning: replacing previous import 'jsonlite::flatten' by 'purrr::flatten' when
## loading 'StatsBombR'
## Warning: replacing previous import 'foreach::accumulate' by 'purrr::accumulate'
## when loading 'StatsBombR'
library(ggrepel)## Loading required package: ggplot2
library("ggsoccer")
library("soccermatics")library(tidyverse)## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x purrr::accumulate() masks foreach::accumulate()
## x tidyr::complete() masks RCurl::complete()
## x dplyr::filter() masks stats::filter()
## x purrr::flatten() masks jsonlite::flatten()
## x readr::guess_encoding() masks rvest::guess_encoding()
## x dplyr::lag() masks stats::lag()
## x purrr::when() masks foreach::when()
StatsBomb provides freedata. We can retrieve such data with FreeCompetitions()
data_freecomp <- FreeCompetitions()## [1] "Whilst we are keen to share data and facilitate research, we also urge you to be responsible with the data. Please register your details on https://www.statsbomb.com/resource-centre and read our User Agreement carefully."
glimpse(data_freecomp)## Rows: 40
## Columns: 12
## $ competition_id <int> 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, ~
## $ season_id <int> 4, 1, 2, 27, 26, 25, 24, 23, 22, 21, 41, 39,~
## $ country_name <chr> "Europe", "Europe", "Europe", "Europe", "Eur~
## $ competition_name <chr> "Champions League", "Champions League", "Cha~
## $ competition_gender <chr> "male", "male", "male", "male", "male", "mal~
## $ competition_youth <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FA~
## $ competition_international <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FA~
## $ season_name <chr> "2018/2019", "2017/2018", "2016/2017", "2015~
## $ match_updated <chr> "2021-08-27T11:26:39.802832", "2021-08-27T11~
## $ match_updated_360 <chr> "2021-06-13T16:17:31.694", "2021-06-13T16:17~
## $ match_available_360 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ match_available <chr> "2021-07-09T14:06:05.802", "2021-01-23T21:55~
head(data_freecomp, 5)## competition_id season_id country_name competition_name competition_gender
## 1 16 4 Europe Champions League male
## 2 16 1 Europe Champions League male
## 3 16 2 Europe Champions League male
## 4 16 27 Europe Champions League male
## 5 16 26 Europe Champions League male
## competition_youth competition_international season_name
## 1 FALSE FALSE 2018/2019
## 2 FALSE FALSE 2017/2018
## 3 FALSE FALSE 2016/2017
## 4 FALSE FALSE 2015/2016
## 5 FALSE FALSE 2014/2015
## match_updated match_updated_360 match_available_360
## 1 2021-08-27T11:26:39.802832 2021-06-13T16:17:31.694 <NA>
## 2 2021-08-27T11:26:39.802832 2021-06-13T16:17:31.694 <NA>
## 3 2021-08-27T11:26:39.802832 2021-06-13T16:17:31.694 <NA>
## 4 2021-08-27T11:26:39.802832 2021-06-13T16:17:31.694 <NA>
## 5 2021-08-27T11:26:39.802832 2021-06-13T16:17:31.694 <NA>
## match_available
## 1 2021-07-09T14:06:05.802
## 2 2021-01-23T21:55:30.425330
## 3 2020-07-29T05:00
## 4 2020-07-29T05:00
## 5 2020-07-29T05:00
tail(data_freecomp, 5)## competition_id season_id country_name competition_name
## 36 11 37 Spain La Liga
## 37 49 3 United States of America NWSL
## 38 2 44 England Premier League
## 39 55 43 Europe UEFA Euro
## 40 72 30 International Women's World Cup
## competition_gender competition_youth competition_international season_name
## 36 male FALSE FALSE 2004/2005
## 37 female FALSE FALSE 2018
## 38 male FALSE FALSE 2003/2004
## 39 male FALSE TRUE 2020
## 40 female FALSE TRUE 2019
## match_updated match_updated_360
## 36 2020-07-29T05:00 2021-06-13T16:17:31.694
## 37 2021-11-06T05:53:29.435016 2021-06-13T16:17:31.694
## 38 2021-11-14T22:29:00.646120 2021-06-13T16:17:31.694
## 39 2022-02-01T17:20:34.319496 2021-11-11T13:54:37.507376
## 40 2020-07-29T05:00 2021-06-13T16:17:31.694
## match_available_360 match_available
## 36 <NA> 2020-07-29T05:00
## 37 <NA> 2021-11-06T05:53:29.435016
## 38 <NA> 2021-11-14T22:29:00.646120
## 39 2021-11-11T13:54:37.507376 2022-02-01T17:20:34.319496
## 40 <NA> 2020-07-29T05:00
To see the available matches we can use the FreeMatches() function on the object we store from FreeCompetitions()
data_freematch <- FreeMatches(data_freecomp)## [1] "Whilst we are keen to share data and facilitate research, we also urge you to be responsible with the data. Please register your details on https://www.statsbomb.com/resource-centre and read our User Agreement carefully."
glimpse(data_freematch)## Rows: 1,096
## Columns: 42
## $ match_id <int> 22912, 18245, 18244, 18243, 18242, 1824~
## $ match_date <chr> "2019-06-01", "2018-05-26", "2017-06-03~
## $ kick_off <chr> "21:00:00.000", "20:45:00.000", "20:45:~
## $ home_score <int> 0, 3, 1, 1, 1, 4, 1, 1, 3, 0, 2, 2, 3, ~
## $ away_score <int> 2, 1, 4, 1, 3, 1, 2, 1, 1, 2, 0, 1, 3, ~
## $ match_status <chr> "available", "available", "available", ~
## $ match_status_360 <chr> "scheduled", "scheduled", "scheduled", ~
## $ last_updated <chr> "2020-07-29T05:00", "2021-01-23T21:55:3~
## $ last_updated_360 <chr> "2021-06-13T16:17:31.694", "2021-06-13T~
## $ match_week <int> 13, 7, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,~
## $ competition.competition_id <int> 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,~
## $ competition.country_name <chr> "Europe", "Europe", "Europe", "Europe",~
## $ competition.competition_name <chr> "Champions League", "Champions League",~
## $ season.season_id <int> 4, 1, 2, 27, 26, 25, 24, 23, 22, 21, 41~
## $ season.season_name <chr> "2018/2019", "2017/2018", "2016/2017", ~
## $ home_team.home_team_id <int> 38, 220, 224, 220, 224, 220, 180, 169, ~
## $ home_team.home_team_name <chr> "Tottenham Hotspur", "Real Madrid", "Ju~
## $ home_team.home_team_gender <chr> "male", "male", "male", "male", "male",~
## $ home_team.home_team_group <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
## $ home_team.managers <list> [<data.frame[1 x 6]>], [<data.frame[1 ~
## $ home_team.country.id <int> 68, 214, 112, 214, 112, 214, 85, 85, 21~
## $ home_team.country.name <chr> "England", "Spain", "Italy", "Spain", "~
## $ away_team.away_team_id <int> 24, 24, 220, 212, 217, 212, 169, 33, 39~
## $ away_team.away_team_name <chr> "Liverpool", "Liverpool", "Real Madrid"~
## $ away_team.away_team_gender <chr> "male", "male", "male", "male", "male",~
## $ away_team.away_team_group <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
## $ away_team.managers <list> [<data.frame[1 x 6]>], [<data.frame[1 ~
## $ away_team.country.id <int> 68, 68, 214, 214, 214, 214, 85, 68, 68,~
## $ away_team.country.name <chr> "England", "England", "Spain", "Spain",~
## $ metadata.data_version <chr> "1.1.0", "1.1.0", "1.0.3", "1.0.3", "1.~
## $ metadata.shot_fidelity_version <chr> "2", "2", NA, NA, NA, NA, NA, NA, NA, N~
## $ metadata.xy_fidelity_version <chr> "2", "2", NA, NA, NA, NA, NA, NA, NA, N~
## $ competition_stage.id <int> 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,~
## $ competition_stage.name <chr> "Final", "Final", "Final", "Final", "Fi~
## $ stadium.id <int> 4654, 4222, 113891, 388, 367, 611, 4666~
## $ stadium.name <chr> "Estadio Wanda Metropolitano", "NSK Oli~
## $ stadium.country.id <int> 214, 238, 249, 112, 85, 183, 68, 85, 68~
## $ stadium.country.name <chr> "Spain", "Ukraine", "Wales", "Italy", "~
## $ referee.id <int> 420, 727, 186, 728, 581, 287, 419, 717,~
## $ referee.name <chr> "Damir Skomina", "Milorad Mažić", "Fe~
## $ referee.country.id <int> 208, 203, 85, 68, 233, 160, 112, 183, 1~
## $ referee.country.name <chr> "Slovenia", "Serbia", "Germany", "Engla~
We can filter the competition first before looking into the matches.
premier_league_matches <- data_freecomp %>%
filter(competition_name == "Premier League") %>%
FreeMatches()## [1] "Whilst we are keen to share data and facilitate research, we also urge you to be responsible with the data. Please register your details on https://www.statsbomb.com/resource-centre and read our User Agreement carefully."
glimpse(premier_league_matches)## Rows: 33
## Columns: 38
## $ match_id <int> 3749052, 3749522, 3749246, 3749257, 374~
## $ match_date <chr> "2004-02-07", "2003-12-26", "2004-03-28~
## $ kick_off <chr> "16:00:00.000", "13:00:00.000", "17:05:~
## $ home_score <int> 1, 3, 1, 2, 2, 0, 1, 1, 2, 0, 0, 3, 2, ~
## $ away_score <int> 3, 0, 1, 1, 1, 4, 4, 2, 1, 2, 2, 2, 1, ~
## $ match_status <chr> "available", "available", "available", ~
## $ match_status_360 <chr> "scheduled", "scheduled", "scheduled", ~
## $ last_updated <chr> "2020-07-29T05:00", "2020-07-29T05:00",~
## $ last_updated_360 <chr> "2021-06-13T16:17:31.694", "2021-06-13T~
## $ match_week <int> 2, 18, 30, 38, 27, 1, 1, 1, 9, 2, 2, 7,~
## $ competition.competition_id <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ~
## $ competition.country_name <chr> "England", "England", "England", "Engla~
## $ competition.competition_name <chr> "Premier League", "Premier League", "Pr~
## $ season.season_id <int> 44, 44, 44, 44, 44, 44, 44, 44, 44, 44,~
## $ season.season_name <chr> "2003/2004", "2003/2004", "2003/2004", ~
## $ home_team.home_team_id <int> 46, 1, 1, 1, 1, 47, 101, 24, 1, 98, 59,~
## $ home_team.home_team_name <chr> "Wolverhampton Wanderers", "Arsenal", "~
## $ home_team.home_team_gender <chr> "male", "male", "male", "male", "male",~
## $ home_team.home_team_group <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
## $ home_team.managers <list> [<data.frame[1 x 6]>], [<data.frame[1 ~
## $ home_team.country.id <int> 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,~
## $ home_team.country.name <chr> "England", "England", "England", "Engla~
## $ away_team.away_team_id <int> 1, 46, 39, 22, 75, 1, 1, 1, 33, 1, 1, 3~
## $ away_team.away_team_name <chr> "Arsenal", "Wolverhampton Wanderers", "~
## $ away_team.away_team_gender <chr> "male", "male", "male", "male", "male",~
## $ away_team.away_team_group <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,~
## $ away_team.managers <list> [<data.frame[1 x 6]>], [<data.frame[1 ~
## $ away_team.country.id <int> 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,~
## $ away_team.country.name <chr> "England", "England", "England", "Engla~
## $ metadata.data_version <chr> "1.1.0", "1.1.0", "1.1.0", "1.1.0", "1.~
## $ metadata.shot_fidelity_version <chr> "2", "2", "2", "2", "2", "2", "2", "2",~
## $ metadata.xy_fidelity_version <chr> "2", "2", "2", "2", "2", "2", "2", "2",~
## $ competition_stage.id <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
## $ competition_stage.name <chr> "Regular Season", "Regular Season", "Re~
## $ stadium.id <int> 217, 1000243, 1000243, 1000243, 1000243~
## $ stadium.name <chr> "Molineux Stadium", "Highbury Stadium",~
## $ stadium.country.id <int> 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,~
## $ stadium.country.name <chr> "England", "England", "England", "Engla~
To pull events data from the premier league 2003/2004 season above, we use StatsBombFreeEvents()
pl_20032004_events <- premier_league_matches %>% StatsBombFreeEvents(Parallel = T)## [1] "Whilst we are keen to share data and facilitate research, we also urge you to be responsible with the data. Please register your details on https://www.statsbomb.com/resource-centre and read our User Agreement carefully."
## Warning in if (MatchesDF == "ALL") {: the condition has length > 1 and only the
## first element will be used
StatsBomb provide allclean() function to organize the events data into more desirable state.
pl_20032004_events_clean <- pl_20032004_events %>% allclean() ## Joining, by = "id"
## Joining, by = "id"
## Joining, by = "id"
## Joining, by = "id"
## Joining, by = "id"
## Joining, by = "id"
## Joining, by = "id"
## Joining, by = "id"
## Joining, by = "id"
## Joining, by = "id"
## Joining, by = "id"
## Joining, by = "id"
## Joining, by = "id"
## Joining, by = "id"
## Joining, by = "id"
## Joining, by = "id"
## Joining, by = "id"
## Joining, by = c("period", "match_id")
pl_20032004_events_clean %>% glimpse()## Rows: 112,773
## Columns: 180
## $ id <chr> "3b20dc57-5cb7-46a8-bccb-b09d984478d7~
## $ index <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12~
## $ period <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
## $ timestamp <chr> "00:00:00.000", "00:00:00.000", "00:0~
## $ minute <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ second <int> 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 4, 4, 8~
## $ possession <int> 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2~
## $ duration <dbl> 0.000000, 0.000000, 0.000000, 0.00000~
## $ related_events <list> <NULL>, <NULL>, "691cd9cc-9e81-4ba1-~
## $ off_camera <lgl> NA, NA, NA, NA, TRUE, NA, NA, NA, NA,~
## $ location <list> <NULL>, <NULL>, <NULL>, <NULL>, <61.~
## $ under_pressure <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ counterpress <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ out <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ type.id <int> 35, 35, 18, 18, 30, 42, 30, 42, 43, 3~
## $ type.name <chr> "Starting XI", "Starting XI", "Half S~
## $ possession_team.id <int> 46, 46, 46, 46, 1, 1, 1, 1, 1, 1, 1, ~
## $ possession_team.name <chr> "Wolverhampton Wanderers", "Wolverham~
## $ play_pattern.id <int> 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9~
## $ play_pattern.name <chr> "Regular Play", "Regular Play", "Regu~
## $ team.id <int> 46, 1, 1, 46, 1, 1, 1, 1, 1, 1, 1, 1,~
## $ team.name <chr> "Wolverhampton Wanderers", "Arsenal",~
## $ tactics.formation <int> 442, 442, NA, NA, NA, NA, NA, NA, NA,~
## $ tactics.lineup <list> [<data.frame[11 x 5]>], [<data.frame~
## $ player.id <int> NA, NA, NA, NA, 15516, 15042, 15042, ~
## $ player.name <chr> NA, NA, NA, NA, "Thierry Henry", "Den~
## $ position.id <int> NA, NA, NA, NA, 24, 22, 22, 11, 11, 1~
## $ position.name <chr> NA, NA, NA, NA, "Left Center Forward"~
## $ pass.length <dbl> NA, NA, NA, NA, 3.522783, NA, 15.3375~
## $ pass.angle <dbl> NA, NA, NA, NA, 1.4570043, NA, -2.931~
## $ pass.end_location <list> <NULL>, <NULL>, <NULL>, <NULL>, <61.~
## $ pass.aerial_won <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ pass.switch <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ pass.cross <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ pass.assisted_shot_id <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ pass.goal_assist <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ pass.shot_assist <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ pass.outswinging <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ pass.deflected <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ pass.straight <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ pass.inswinging <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ pass.cut_back <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ pass.through_ball <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ pass.miscommunication <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ pass.no_touch <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ pass.recipient.id <int> NA, NA, NA, NA, 15042, NA, 26014, NA,~
## $ pass.recipient.name <chr> NA, NA, NA, NA, "Dennis Bergkamp", NA~
## $ pass.height.id <int> NA, NA, NA, NA, 1, NA, 1, NA, NA, 1, ~
## $ pass.height.name <chr> NA, NA, NA, NA, "Ground Pass", NA, "G~
## $ pass.body_part.id <int> NA, NA, NA, NA, 40, NA, 40, NA, NA, 3~
## $ pass.body_part.name <chr> NA, NA, NA, NA, "Right Foot", NA, "Ri~
## $ pass.type.id <int> NA, NA, NA, NA, 65, NA, NA, NA, NA, N~
## $ pass.type.name <chr> NA, NA, NA, NA, "Kick Off", NA, NA, N~
## $ pass.outcome.id <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ pass.outcome.name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ pass.technique.id <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ pass.technique.name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ carry.end_location <list> <NULL>, <NULL>, <NULL>, <NULL>, <NUL~
## $ ball_receipt.outcome.id <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ ball_receipt.outcome.name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ clearance.left_foot <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ clearance.right_foot <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ clearance.head <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ clearance.aerial_won <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ clearance.body_part.id <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ clearance.body_part.name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ ball_recovery.recovery_failure <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ ball_recovery.offensive <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ dribble.overrun <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ dribble.nutmeg <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ dribble.outcome.id <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ dribble.outcome.name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ duel.outcome.id <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ duel.outcome.name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ duel.type.id <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ duel.type.name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ foul_committed.offensive <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ foul_committed.advantage <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ foul_committed.type.id <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ foul_committed.type.name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ foul_committed.card.id <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ foul_committed.card.name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ foul_won.defensive <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ foul_won.advantage <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ interception.outcome.id <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ interception.outcome.name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ shot.statsbomb_xg <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ shot.end_location <list> <NULL>, <NULL>, <NULL>, <NULL>, <NUL~
## $ shot.key_pass_id <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ shot.first_time <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ shot.freeze_frame <list> <NULL>, <NULL>, <NULL>, <NULL>, <NUL~
## $ shot.aerial_won <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ shot.open_goal <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ shot.body_part.id <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ shot.body_part.name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ shot.type.id <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ shot.type.name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ shot.outcome.id <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ shot.outcome.name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ shot.technique.id <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ shot.technique.name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ goalkeeper.end_location <list> <NULL>, <NULL>, <NULL>, <NULL>, <NUL~
## $ goalkeeper.technique.id <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ goalkeeper.technique.name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ goalkeeper.position.id <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ goalkeeper.position.name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ goalkeeper.type.id <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ goalkeeper.type.name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ goalkeeper.outcome.id <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ goalkeeper.outcome.name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ goalkeeper.body_part.id <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ goalkeeper.body_part.name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ miscontrol.aerial_won <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ block.offensive <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ bad_behaviour.card.id <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ bad_behaviour.card.name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ substitution.outcome.id <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ substitution.outcome.name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ substitution.replacement.id <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ substitution.replacement.name <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ match_id <int> 3749052, 3749052, 3749052, 3749052, 3~
## $ competition_id <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2~
## $ season_id <int> 44, 44, 44, 44, 44, 44, 44, 44, 44, 4~
## $ shot.deflected <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ shot.one_on_one <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ block.deflection <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ `50_50.outcome.id` <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ `50_50.outcome.name` <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ goalkeeper.punched_out <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ goalkeeper.success_in_play <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ goalkeeper.lost_out <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ foul_committed.penalty <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ foul_won.penalty <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ shot.saved_off_target <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ goalkeeper.shot_saved_off_target <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ dribble.no_touch <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ half_start.late_video_start <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ block.save_block <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ shot.follows_dribble <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ clearance.other <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ goalkeeper.shot_saved_to_post <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ shot.redirect <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ injury_stoppage.in_chain <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ location.x <dbl> NA, NA, NA, NA, 61.0, 61.4, 61.9, 46.~
## $ location.y <dbl> NA, NA, NA, NA, 40.1, 43.6, 44.0, 40.~
## $ carry.end_location.x <dbl> NA, NA, NA, NA, NA, NA, NA, NA, 46.9,~
## $ carry.end_location.y <dbl> NA, NA, NA, NA, NA, NA, NA, NA, 39.1,~
## $ pass.end_location.x <dbl> NA, NA, NA, NA, 61.4, NA, 46.9, NA, N~
## $ pass.end_location.y <dbl> NA, NA, NA, NA, 43.6, NA, 40.8, NA, N~
## $ shot.end_location.x <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ shot.end_location.y <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ shot.end_location.z <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ shot_impact_height <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ player.name.GK <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ player.id.GK <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ location.x.GK <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ location.y.GK <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ DistToGoal <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ DistToKeeper <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ AngleToGoal <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ AngleToKeeper <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ AngleDeviation <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ avevelocity <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ DistSGK <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ density <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ density.incone <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ distance.ToD1 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ distance.ToD2 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ AttackersBehindBall <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ DefendersBehindBall <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ DefendersInCone <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ InCone.GK <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ DefArea <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ distance.ToD1.360 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ distance.ToD2.360 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ milliseconds <dbl> 0, 0, 0, 0, 675, 333, 390, 494, 494, ~
## $ ElapsedTime <dbl> 0.000, 0.000, 0.000, 0.000, 0.675, 1.~
## $ StartOfPossession <dbl> 0.000, 0.000, 0.000, 0.000, 0.675, 0.~
## $ TimeInPoss <dbl> 0.000, 0.000, 0.000, 0.000, 0.000, 0.~
## $ TimeToPossEnd <dbl> 0.000, 0.000, 0.000, 0.000, 15.626, 1~
Let’s take a look at shots and goals.
shots_goals <- pl_20032004_events_clean %>%
group_by(team.name) %>%
summarise(shots = sum(type.name=="Shot", na.rm = T),
goals = sum(shot.outcome.name=="Goal", na.rm = T),
shots_per_goal = sum(shots/goals),
goal_rate = sum(goals/shots)) %>%
mutate(across(.cols = everything(),
.fns = ~ ifelse(is.infinite(.),
0,
.)
)
)
slice_min(shots_goals, order_by = shots)## # A tibble: 1 x 5
## team.name shots goals shots_per_goal goal_rate
## <chr> <int> <int> <dbl> <dbl>
## 1 Leicester City 2 1 2 0.5
slice_min(shots_goals, order_by = goals)## # A tibble: 5 x 5
## team.name shots goals shots_per_goal goal_rate
## <chr> <int> <int> <dbl> <dbl>
## 1 Aston Villa 13 0 0 0
## 2 Birmingham City 11 0 0 0
## 3 Blackburn Rovers 17 0 0 0
## 4 Fulham 24 0 0 0
## 5 Southampton 5 0 0 0
slice_min(shots_goals, order_by = shots_per_goal)## # A tibble: 5 x 5
## team.name shots goals shots_per_goal goal_rate
## <chr> <int> <int> <dbl> <dbl>
## 1 Aston Villa 13 0 0 0
## 2 Birmingham City 11 0 0 0
## 3 Blackburn Rovers 17 0 0 0
## 4 Fulham 24 0 0 0
## 5 Southampton 5 0 0 0
slice_min(shots_goals, order_by = goal_rate)## # A tibble: 5 x 5
## team.name shots goals shots_per_goal goal_rate
## <chr> <int> <int> <dbl> <dbl>
## 1 Aston Villa 13 0 0 0
## 2 Birmingham City 11 0 0 0
## 3 Blackburn Rovers 17 0 0 0
## 4 Fulham 24 0 0 0
## 5 Southampton 5 0 0 0
slice_max(shots_goals, order_by = goals)## # A tibble: 1 x 5
## team.name shots goals shots_per_goal goal_rate
## <chr> <int> <int> <dbl> <dbl>
## 1 Arsenal 465 62 7.5 0.133
slice_max(shots_goals, order_by = shots)## # A tibble: 1 x 5
## team.name shots goals shots_per_goal goal_rate
## <chr> <int> <int> <dbl> <dbl>
## 1 Arsenal 465 62 7.5 0.133
slice_max(shots_goals, order_by = goal_rate)## # A tibble: 1 x 5
## team.name shots goals shots_per_goal goal_rate
## <chr> <int> <int> <dbl> <dbl>
## 1 Leicester City 2 1 2 0.5
I feel like there are something weird in the tables above. Let’s visualize the goals data to see more clearly.
ggplot(shots_goals, aes(y=reorder(team.name, goals), x=goals))+
geom_col(aes(fill=goals),
show.legend = T)+
labs(x="Goals Scored")+
scale_fill_gradient(low = "grey", high = "blue")+
geom_col(data = shots_goals[1:1,],
fill="royalblue",
col = "green", lwd = 2, linetype=2)+
theme_minimal()+
theme(axis.title.y = element_blank())Arsenal led by TOO HUGE a gap, and some clubs didn’t record a single goal.
That can’t be right.
There’s something afoot here. Let’s see the number of matches in this data.
n_distinct(premier_league_matches$match_id)## [1] 33
Whoa, only 33 matches?
Last time I checked, each football club has to participate in 38 matches. But this EPL 2003/2004 data has only 33 matches, and it focuses only on Arsenal.
After looking into this a bit more, I’ve found that:
StatsBomb had intended to present this portion of Premier League data as the Invincibles Project ; to look at this historic season with modern lens.
By the time it was released, this article explained that StatsBomb team had been facing problems to acquire the matches, and had only managed to get 32 of them. So there’s 1 match difference in this data that I pulled at the time of writing this article and the data that was released before.
So what do I do? Even though there are still a lot of data in this. I think it’s hard to create comparisons between teams. I think I will focus on Arsenal performance and players.
Before we continue to identify what tasks to pursue, I want to see the events data grouped by match_id.
ars_fixture_results <- pl_20032004_events_clean %>%
group_by(match_id, team.name) %>%
summarise(
goals = sum(shot.outcome.name=="Goal", na.rm = T),
) %>%
rename(
perspective_team_name = team.name,
goals = goals,
)## `summarise()` has grouped output by 'match_id'. You can override using the
## `.groups` argument.
ars_fixture_results## # A tibble: 66 x 3
## # Groups: match_id [33]
## match_id perspective_team_name goals
## <int> <chr> <int>
## 1 3749052 Arsenal 3
## 2 3749052 Wolverhampton Wanderers 1
## 3 3749068 Arsenal 2
## 4 3749068 Tottenham Hotspur 2
## 5 3749079 Arsenal 2
## 6 3749079 Chelsea 1
## 7 3749133 Arsenal 2
## 8 3749133 Aston Villa 0
## 9 3749153 Arsenal 3
## 10 3749153 Middlesbrough 1
## # ... with 56 more rows
I see, while the matches data followed the 33 matches of Arsenal, the events data cover the detailed events of both perspective, that’s why there are 2 perspectives of the events under each match_id, hence the 66 rows of it.
I can split the data frame to both perspectives.
ars_events_perspective <- pl_20032004_events_clean %>%
group_by(match_id, team.name) %>%
filter(team.name=="Arsenal")
foe_events_perspective <- pl_20032004_events_clean %>%
group_by(match_id, team.name) %>%
filter(team.name!="Arsenal")the Invincibles was a magnificent team. That Arsenal squad went on their 2003/2004 season without a defeat. How did they do that? First I’ll look into the general stats of the 33 matches data available, and then I’ll pick a match as a study case.
home_res <- premier_league_matches %>%
select(match_id, match_date, home_team.home_team_name, away_team.away_team_name, home_score, away_score) %>%
filter(home_team.home_team_name == "Arsenal") %>%
mutate(goal_diff = home_score - away_score,
result = if_else(home_score > away_score, "won", "draw")
) %>% arrange(match_date)
home_res## # A tibble: 17 x 8
## match_id match_date home_team.home_te~ away_team.away_~ home_score away_score
## <int> <chr> <chr> <chr> <int> <int>
## 1 3749493 2003-08-16 Arsenal Everton 2 1
## 2 3749296 2003-09-13 Arsenal Portsmouth 1 1
## 3 3749528 2003-09-26 Arsenal Newcastle United 3 2
## 4 3749079 2003-10-18 Arsenal Chelsea 2 1
## 5 3749196 2003-11-08 Arsenal Tottenham Hotsp~ 2 1
## 6 3749192 2003-11-30 Arsenal Fulham 0 0
## 7 3749360 2003-12-14 Arsenal Blackburn Rovers 1 0
## 8 3749522 2003-12-26 Arsenal Wolverhampton W~ 3 0
## 9 3749153 2004-01-10 Arsenal Middlesbrough 4 1
## 10 3749233 2004-02-01 Arsenal Manchester City 2 1
## 11 3749642 2004-02-28 Arsenal Charlton Athlet~ 2 1
## 12 3749403 2004-03-20 Arsenal Bolton Wanderers 2 1
## 13 3749246 2004-03-28 Arsenal Manchester Unit~ 1 1
## 14 3749448 2004-04-09 Arsenal Liverpool 4 2
## 15 3749453 2004-04-16 Arsenal Leeds United 5 0
## 16 3749462 2004-05-01 Arsenal Birmingham City 0 0
## 17 3749257 2004-05-15 Arsenal Leicester City 2 1
## # ... with 2 more variables: goal_diff <int>, result <chr>
away_res <- premier_league_matches %>%
select(match_id, match_date, home_team.home_team_name, away_team.away_team_name, home_score, away_score) %>%
filter(home_team.home_team_name != "Arsenal") %>%
mutate(goal_diff = home_score - away_score,
result = if_else(home_score < away_score, "won", "draw")
) %>% arrange(match_date)
away_res## # A tibble: 16 x 8
## match_id match_date home_team.home_te~ away_team.away_~ home_score away_score
## <int> <chr> <chr> <chr> <int> <int>
## 1 3749358 2003-08-24 Middlesbrough Arsenal 0 4
## 2 3749454 2003-08-31 Manchester City Arsenal 1 2
## 3 3749552 2003-09-21 Manchester United Arsenal 0 0
## 4 3749253 2003-10-04 Liverpool Arsenal 1 2
## 5 3749526 2003-10-26 Charlton Athletic Arsenal 1 1
## 6 3749346 2003-11-01 Leeds United Arsenal 1 4
## 7 3749434 2003-11-22 Birmingham City Arsenal 0 3
## 8 3749278 2003-12-20 Bolton Wanderers Arsenal 1 1
## 9 3749310 2003-12-29 Southampton Arsenal 0 1
## 10 3749133 2004-01-18 Aston Villa Arsenal 0 2
## 11 3749052 2004-02-07 Wolverhampton Wan~ Arsenal 1 3
## 12 3749276 2004-02-21 Chelsea Arsenal 1 2
## 13 3749465 2004-03-13 Blackburn Rovers Arsenal 0 2
## 14 3749431 2004-04-11 Newcastle United Arsenal 0 0
## 15 3749068 2004-04-25 Tottenham Hotspur Arsenal 2 2
## 16 3749603 2004-05-09 Fulham Arsenal 0 1
## # ... with 2 more variables: goal_diff <int>, result <chr>
a_ho <- as.data.frame(table(home_res$result))
a_ho <- cbind(a_ho, ven=c("home","home"))
a_ho ## Var1 Freq ven
## 1 draw 4 home
## 2 won 13 home
a_aw <- as.data.frame(table(away_res$result))
a_aw <- cbind(a_aw, ven=c("away","away"))
a_aw## Var1 Freq ven
## 1 draw 5 away
## 2 won 11 away
res <- rbind(a_ho, a_aw)ggplot(res, aes(x=reorder(ven, Freq), y=Freq))+
geom_col(aes(fill=Var1))+
scale_fill_discrete(labels = c("Draw", "Won"))+
labs(title="The Invincibles Streak",
subtitle="At Home and Away",
x="Match Location",
y="Number of Matches",
fill="Result")
Arsenal won more at home in this incomplete dataset.
bigwin_home <- home_res %>% slice_max(goal_diff)
bigwin_home## # A tibble: 1 x 8
## match_id match_date home_team.home_tea~ away_team.away_~ home_score away_score
## <int> <chr> <chr> <chr> <int> <int>
## 1 3749453 2004-04-16 Arsenal Leeds United 5 0
## # ... with 2 more variables: goal_diff <int>, result <chr>
bigwin_away <- away_res %>% slice_min(goal_diff)
bigwin_away## # A tibble: 1 x 8
## match_id match_date home_team.home_tea~ away_team.away_~ home_score away_score
## <int> <chr> <chr> <chr> <int> <int>
## 1 3749358 2003-08-24 Middlesbrough Arsenal 0 4
## # ... with 2 more variables: goal_diff <int>, result <chr>
Arsenal won at home 5 - 0 against Leeds United, and crushed Middlesbrough 0 - 4 at an away match.
draw_res <- premier_league_matches %>%
select(match_id, match_date, home_team.home_team_name, away_team.away_team_name, home_score, away_score) %>%
mutate(goal_diff = home_score - away_score) %>%
filter(goal_diff == 0)
draw_res## # A tibble: 9 x 7
## match_id match_date home_team.home_tea~ away_team.away_~ home_score away_score
## <int> <chr> <chr> <chr> <int> <int>
## 1 3749246 2004-03-28 Arsenal Manchester Unit~ 1 1
## 2 3749462 2004-05-01 Arsenal Birmingham City 0 0
## 3 3749552 2003-09-21 Manchester United Arsenal 0 0
## 4 3749296 2003-09-13 Arsenal Portsmouth 1 1
## 5 3749068 2004-04-25 Tottenham Hotspur Arsenal 2 2
## 6 3749192 2003-11-30 Arsenal Fulham 0 0
## 7 3749278 2003-12-20 Bolton Wanderers Arsenal 1 1
## 8 3749526 2003-10-26 Charlton Athletic Arsenal 1 1
## 9 3749431 2004-04-11 Newcastle United Arsenal 0 0
## # ... with 1 more variable: goal_diff <int>
draw_res_longer <- draw_res %>%
pivot_longer(cols = c("home_team.home_team_name", "away_team.away_team_name"),
names_to = "venue",
values_to = "opponent_team_name"
) %>% filter(opponent_team_name != "Arsenal") %>%
select(match_id, match_date, opponent_team_name, home_score, away_score, goal_diff, venue)
draw_res_longer## # A tibble: 9 x 7
## match_id match_date opponent_team_name home_score away_score goal_diff venue
## <int> <chr> <chr> <int> <int> <int> <chr>
## 1 3749246 2004-03-28 Manchester United 1 1 0 away_t~
## 2 3749462 2004-05-01 Birmingham City 0 0 0 away_t~
## 3 3749552 2003-09-21 Manchester United 0 0 0 home_t~
## 4 3749296 2003-09-13 Portsmouth 1 1 0 away_t~
## 5 3749068 2004-04-25 Tottenham Hotspur 2 2 0 home_t~
## 6 3749192 2003-11-30 Fulham 0 0 0 away_t~
## 7 3749278 2003-12-20 Bolton Wanderers 1 1 0 home_t~
## 8 3749526 2003-10-26 Charlton Athletic 1 1 0 home_t~
## 9 3749431 2004-04-11 Newcastle United 0 0 0 home_t~
table(draw_res_longer$opponent_team_name)##
## Birmingham City Bolton Wanderers Charlton Athletic Fulham
## 1 1 1 1
## Manchester United Newcastle United Portsmouth Tottenham Hotspur
## 2 1 1 1
draw_res_longer %>% filter(opponent_team_name=="Manchester United")## # A tibble: 2 x 7
## match_id match_date opponent_team_name home_score away_score goal_diff venue
## <int> <chr> <chr> <int> <int> <int> <chr>
## 1 3749246 2004-03-28 Manchester United 1 1 0 away_t~
## 2 3749552 2003-09-21 Manchester United 0 0 0 home_t~
In, this incomplete data, we can find that the Invincibles failed to win 2 times against Manchester United. Both at home and away.
ars_match_xg <- ars_events_perspective %>%
select(match_id,
shot.outcome.name,
shot.statsbomb_xg
) %>%
group_by(match_id) %>%
summarise(goals = sum(shot.outcome.name=="Goal", na.rm = T),
xg_per_match = sum(shot.statsbomb_xg, na.rm = T)) %>%
left_join(premier_league_matches, by="match_id") %>%
mutate(venue = case_when(home_team.home_team_id == 1 ~ "Home",
away_team.away_team_id == 1 ~ "Away"),
offense_rate = case_when(goals > (xg_per_match + xg_per_match * 0.2) ~ "Overperformed",
goals < (xg_per_match - xg_per_match * 0.2) ~ "Underperformed",
TRUE ~ "Performed"),
opponent = case_when(home_team.home_team_id != 1 ~ home_team.home_team_name,
away_team.away_team_id != 1 ~ away_team.away_team_name)) %>%
select(match_id, match_date, goals, xg_per_match, opponent, offense_rate, venue) %>%
arrange(match_date)## Adding missing grouping variables: `team.name`
ars_match_xg## # A tibble: 33 x 7
## match_id match_date goals xg_per_match opponent offense_rate venue
## <int> <chr> <int> <dbl> <chr> <chr> <chr>
## 1 3749493 2003-08-16 2 2.45 Everton Performed Home
## 2 3749358 2003-08-24 4 2.88 Middlesbrough Overperformed Away
## 3 3749454 2003-08-31 2 1.21 Manchester City Overperformed Away
## 4 3749296 2003-09-13 1 2.36 Portsmouth Underperformed Home
## 5 3749552 2003-09-21 0 0.235 Manchester United Underperformed Away
## 6 3749528 2003-09-26 3 2.28 Newcastle United Overperformed Home
## 7 3749253 2003-10-04 1 0.702 Liverpool Overperformed Away
## 8 3749079 2003-10-18 2 1.64 Chelsea Overperformed Home
## 9 3749526 2003-10-26 1 0.419 Charlton Athletic Overperformed Away
## 10 3749346 2003-11-01 4 2.53 Leeds United Overperformed Away
## # ... with 23 more rows
ggplot(ars_match_xg, aes(x=goals, y=opponent, fill=xg_per_match))+
geom_col()+
facet_wrap(~venue + offense_rate)+
scale_fill_gradient(low = "white", high="maroon")+
theme_classic()+
labs(y="Opponents",
x="Goals Scored",
title = "The Invincibles Goalscoring Feat")+
theme(panel.grid.major = element_line(color = "purple",
size = 0.2
),
plot.title = element_text(face = "bold", hjust = 0.5))The Invincibles often overperformed their expected goal rating. They underperformed more at home than on away games, but those were very rare occasion.
ars_goal_shot_event <- ars_events_perspective %>%
select(
match_id,
player.name,
position.name,
location,
under_pressure,
shot.aerial_won,
shot.follows_dribble,
shot.first_time,
shot.open_goal,
shot.statsbomb_xg,
shot.deflected,
shot.technique.name,
shot.body_part.name,
shot.type.name,
type.name,
DistToGoal,
shot.outcome.name,
location.x,
location.y) %>%
filter(shot.outcome.name=="Goal")## Adding missing grouping variables: `team.name`
who_scored_how <- ars_goal_shot_event %>%
group_by(player.name,
shot.technique.name,
shot.body_part.name,
shot.type.name,
type.name) %>%
summarise(goals = sum(shot.outcome.name == "Goal"),
shots = sum(type.name=="Shot", na.rm = T),
shot_xg = sum(shot.statsbomb_xg),
shot_under_pressure = sum(under_pressure==T),
shot_aerial_won = sum(shot.aerial_won==T),
shot_follows_dribble = sum(shot.follows_dribble==T),
shot_first_time = sum(shot.first_time==T),
shot_open_goal = sum(shot.open_goal==T),
avg_dist = mean(DistToGoal)) %>%
mutate_if(is.numeric, ~replace(., is.na(.),0))## `summarise()` has grouped output by 'player.name', 'shot.technique.name',
## 'shot.body_part.name', 'shot.type.name'. You can override using the `.groups`
## argument.
## `mutate_if()` ignored the following grouping variables:
print(as.data.frame(who_scored_how))## player.name shot.technique.name shot.body_part.name
## 1 Dennis Bergkamp Half Volley Right Foot
## 2 Dennis Bergkamp Lob Right Foot
## 3 Dennis Bergkamp Normal Right Foot
## 4 Eduardo César Daude Gaspar Normal Left Foot
## 5 Eduardo César Daude Gaspar Normal Left Foot
## 6 Fredrik Ljungberg Normal Right Foot
## 7 Gilberto Aparecido da Silva Half Volley Left Foot
## 8 Gilberto Aparecido da Silva Normal Head
## 9 Gilberto Aparecido da Silva Volley Right Foot
## 10 José Antonio Reyes Calderón Normal Right Foot
## 11 Kolo Habib Touré Normal Head
## 12 Patrick Vieira Normal Left Foot
## 13 Patrick Vieira Normal Right Foot
## 14 Robert Pirès Half Volley Left Foot
## 15 Robert Pirès Half Volley Right Foot
## 16 Robert Pirès Normal Left Foot
## 17 Robert Pirès Normal Right Foot
## 18 Sylvain Wiltord Normal Left Foot
## 19 Sylvain Wiltord Normal Right Foot
## 20 Sylvain Wiltord Volley Right Foot
## 21 Thierry Henry Half Volley Left Foot
## 22 Thierry Henry Lob Right Foot
## 23 Thierry Henry Normal Left Foot
## 24 Thierry Henry Normal Other
## 25 Thierry Henry Normal Right Foot
## 26 Thierry Henry Normal Right Foot
## 27 Thierry Henry Normal Right Foot
## shot.type.name type.name goals shots shot_xg shot_under_pressure
## 1 Open Play Shot 2 2 0.67049544 0
## 2 Open Play Shot 1 1 0.42512512 0
## 3 Open Play Shot 1 1 0.22693571 1
## 4 Free Kick Shot 1 1 0.05423032 0
## 5 Open Play Shot 1 1 0.18986110 0
## 6 Open Play Shot 4 4 1.24778873 0
## 7 Open Play Shot 1 1 0.08142420 0
## 8 Open Play Shot 1 1 0.21590783 0
## 9 Open Play Shot 1 1 0.12615536 0
## 10 Open Play Shot 1 1 0.91854423 0
## 11 Open Play Shot 1 1 0.72327920 0
## 12 Open Play Shot 1 1 0.28374390 0
## 13 Open Play Shot 2 2 1.43965975 0
## 14 Open Play Shot 2 2 0.34451984 0
## 15 Open Play Shot 2 2 0.71944238 0
## 16 Open Play Shot 3 3 1.40253565 0
## 17 Open Play Shot 7 7 2.12707875 0
## 18 Open Play Shot 1 1 0.34743246 0
## 19 Open Play Shot 1 1 0.90684265 0
## 20 Open Play Shot 1 1 0.33687192 0
## 21 Open Play Shot 1 1 0.39777723 0
## 22 Penalty Shot 2 2 1.52000000 0
## 23 Open Play Shot 4 4 1.19304814 0
## 24 Open Play Shot 2 2 1.10208023 0
## 25 Free Kick Shot 3 3 0.20517991 0
## 26 Open Play Shot 10 10 2.97710134 0
## 27 Penalty Shot 5 5 3.80000000 0
## shot_aerial_won shot_follows_dribble shot_first_time shot_open_goal
## 1 0 0 2 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 1 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## 7 0 0 1 0
## 8 0 0 0 0
## 9 0 0 1 0
## 10 0 0 0 1
## 11 0 0 0 1
## 12 0 0 1 0
## 13 0 0 0 0
## 14 0 0 2 0
## 15 0 0 2 0
## 16 0 0 3 0
## 17 0 0 0 0
## 18 0 0 1 0
## 19 0 0 1 1
## 20 0 0 1 0
## 21 0 0 1 1
## 22 0 0 0 0
## 23 0 0 0 0
## 24 0 0 0 0
## 25 0 0 0 0
## 26 0 0 0 0
## 27 0 0 0 0
## avg_dist
## 1 12.292548
## 2 16.471187
## 3 11.269871
## 4 27.802878
## 5 9.682975
## 6 15.680826
## 7 13.121738
## 8 5.714018
## 9 14.904026
## 10 8.072174
## 11 2.500000
## 12 14.402778
## 13 9.102849
## 14 11.819394
## 15 9.438034
## 16 11.269017
## 17 15.035854
## 18 7.940403
## 19 4.609772
## 20 8.381527
## 21 5.280152
## 22 12.051890
## 23 13.700081
## 24 8.507591
## 25 25.380190
## 26 16.630579
## 27 12.000997
ars_goals_tally <-
ars_goal_shot_event %>%
group_by(player.name) %>%
summarise(total_goals = sum(shot.outcome.name == "Goal"),
total_shot_xg = sum(shot.statsbomb_xg),
avg_dist = mean(DistToGoal)) %>%
arrange(desc(total_goals))ars_goals_tally %>% slice_max(total_goals)## # A tibble: 1 x 4
## player.name total_goals total_shot_xg avg_dist
## <chr> <int> <dbl> <dbl>
## 1 Thierry Henry 27 11.2 15.0
ggplot(ars_goals_tally, aes(x=total_goals, y=reorder(player.name, total_goals)))+
geom_col(fill="purple", color="maroon")+
geom_segment(aes(x=0, xend=total_shot_xg, yend=player.name),
linetype=3,
size=2,
col="white"
)+
geom_label(data=ars_goals_tally[1,],
aes(y=player.name,
x=total_shot_xg+0.3,
label=paste(" sum xg of", round(total_shot_xg))
),
size=3)+
labs(y=element_blank(),
x="Goals Scored",
title="The Invincibles Goalscorers Tally")+
theme(plot.title = element_text(hjust = 0.5, face = "bold"))Henry led the numbers far exceed his total expected goals with Pires halfway behind, while the rest hanging at the far end.
# get player minutes played
player_minutes <- get.minutesplayed(pl_20032004_events_clean) ## Joining, by = "id"
## Joining, by = "match_id"
player_minutes <- player_minutes %>%
left_join(pl_20032004_events_clean,by = "player.id") %>%
group_by(player.name) %>%
summarise(minutes = sum(MinutesPlayed, na.rm = T)) # get player shots
player_shots = pl_20032004_events_clean %>%
group_by(player.name, player.id) %>%
summarise(shots = sum(type.name=="Shot", na.rm = TRUE))## `summarise()` has grouped output by 'player.name'. You can override using the
## `.groups` argument.
ars_goals_tally_expanded <- ars_goals_tally %>%
left_join(player_minutes, by = "player.name") %>%
left_join(player_shots, by = "player.name") %>%
mutate(nineties = minutes/90,
shots_per90 = shots/nineties,
goals_per90 = total_goals/nineties) %>% arrange(desc(shots_per90)) %>%
select(player.name, shots_per90, total_goals, goals_per90, nineties)
ars_goals_tally_expanded## # A tibble: 10 x 5
## player.name shots_per90 total_goals goals_per90 nineties
## <chr> <dbl> <int> <dbl> <dbl>
## 1 "José Antonio Reyes Calderón" 0.00243 1 0.000174 5752.
## 2 "Sylvain Wiltord" 0.00203 3 0.000468 6406.
## 3 "Thierry Henry" 0.000872 27 0.000134 201869.
## 4 "Dennis Bergkamp" 0.000648 4 0.0000603 66335.
## 5 "Fredrik Ljungberg" 0.000563 4 0.0000523 76414.
## 6 "Eduardo César Daude Gaspar" 0.000437 2 0.0000398 50288.
## 7 "Robert Pirès " 0.000300 14 0.0000858 163242.
## 8 "Gilberto Aparecido da Silva" 0.000179 3 0.0000224 133911.
## 9 "Patrick Vieira" 0.000111 3 0.0000195 153730.
## 10 "Kolo Habib Touré" 0.0000940 1 0.00000723 138287.
ggplot(ars_goals_tally_expanded, aes(x=shots_per90, y=reorder(player.name, shots_per90)))+
geom_col(fill="purple", color="maroon")+
geom_segment(aes(x=0, xend=goals_per90, yend=player.name),
linetype=3,
size=2,
col="white"
)+
geom_label(data=ars_goals_tally_expanded[1,],
aes(y=player.name,
x=goals_per90+0.00025,
label=paste(" GoalsPer90 of", round(goals_per90, digits=4))
),
size=3)+
labs(y=element_blank(),
x="Shots per 90",
title="The Invincibles Shots per 90 Tally")+
theme(plot.title = element_text(hjust = 0.5, face = "bold"))Reyes wasted many shots with too little of them resulted as goals.
lowest_xg <- ars_goal_shot_event %>% arrange(shot.statsbomb_xg) %>% head(1)
lowest_xg## # A tibble: 1 x 20
## # Groups: match_id, team.name [1]
## team.name match_id player.name position.name location under_pressure
## <chr> <int> <chr> <chr> <list> <lgl>
## 1 Arsenal 3749233 Thierry Henry Right Center Forward <dbl [2]> TRUE
## # ... with 14 more variables: shot.aerial_won <lgl>,
## # shot.follows_dribble <lgl>, shot.first_time <lgl>, shot.open_goal <lgl>,
## # shot.statsbomb_xg <dbl>, shot.deflected <lgl>, shot.technique.name <chr>,
## # shot.body_part.name <chr>, shot.type.name <chr>, type.name <chr>,
## # DistToGoal <dbl>, shot.outcome.name <chr>, location.x <dbl>,
## # location.y <dbl>
ggplot(lowest_xg) +
annotate_pitch(dimensions = pitch_statsbomb, # we can change dimension to fit statsbomb size
colour = "white",
fill = "black",
limits = FALSE) +
geom_point(aes(x=location.x, # goal attributes
y=location.y,
),
color="red",
size=4)+
coord_flip(xlim = c(80, 120), # flip vertical
ylim = c(0, 80))+
labs(color=element_blank(),
shape=element_blank(),
title="Goal with The Lowest XG")+
geom_text(mapping = aes(x=location.x, y=location.y, label=player.name),
position = position_jitter(width = 5, seed=999),
color="green",
lwd=4,
size=4)+
geom_label(mapping = aes(x=location.x, y=location.y, label=paste(" xg of", shot.statsbomb_xg)),
position = position_jitter(width = 15, seed=100),
color="black",
lwd=5,
size=2)+
theme(legend.position = "right",
plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(face = "bold", hjust = 0.5)
)+
theme_pitch()## Warning: Duplicated aesthetics after name standardisation: size
## Duplicated aesthetics after name standardisation: size
who_scored_how %>% arrange(shot_xg) %>% filter(shot.type.name=="Open Play") %>% head(1)## # A tibble: 1 x 14
## # Groups: player.name, shot.technique.name, shot.body_part.name,
## # shot.type.name [1]
## player.name shot.technique.~ shot.body_part.~ shot.type.name type.name goals
## <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 Gilberto Apa~ Half Volley Left Foot Open Play Shot 1
## # ... with 8 more variables: shots <dbl>, shot_xg <dbl>,
## # shot_under_pressure <dbl>, shot_aerial_won <dbl>,
## # shot_follows_dribble <dbl>, shot_first_time <dbl>, shot_open_goal <dbl>,
## # avg_dist <dbl>
who_assist_how <- ars_events_perspective %>%
group_by(
player.name,
pass.body_part.name,
pass.technique.name,
pass.type.name) %>%
filter(pass.goal_assist==T) %>%
summarise(
avg_pass_length = mean(pass.length),
pass.aerial_won = sum(pass.aerial_won==T),
pass.cut_back = sum(pass.cut_back==T),
pass.switch = sum(pass.switch==T),
pass.cross = sum(pass.cross==T),
pass.shot_assist = sum(pass.shot_assist==T),
pass.goal_assist = sum(pass.goal_assist==T)
) %>%
mutate_if(is.numeric, ~replace(., is.na(.),0)) ## `summarise()` has grouped output by 'player.name', 'pass.body_part.name',
## 'pass.technique.name'. You can override using the `.groups` argument.
## `mutate_if()` ignored the following grouping variables:
who_assist_how## # A tibble: 25 x 11
## # Groups: player.name, pass.body_part.name, pass.technique.name [23]
## player.name pass.body_part.~ pass.technique.~ pass.type.name avg_pass_length
## <chr> <chr> <chr> <chr> <dbl>
## 1 Ashley Cole Left Foot <NA> <NA> 24.1
## 2 Dennis Berg~ Left Foot <NA> <NA> 20.3
## 3 Dennis Berg~ Right Foot Through Ball <NA> 23.5
## 4 Dennis Berg~ Right Foot <NA> <NA> 5.46
## 5 Eduardo Cés~ Left Foot Through Ball <NA> 32.7
## 6 Eduardo Cés~ Left Foot <NA> <NA> 9.30
## 7 Fredrik Lju~ Right Foot Through Ball <NA> 17.9
## 8 Fredrik Lju~ Right Foot <NA> <NA> 15.7
## 9 Gilberto Ap~ Right Foot Through Ball <NA> 35.7
## 10 Gilberto Ap~ Right Foot <NA> Recovery 12.8
## # ... with 15 more rows, and 6 more variables: pass.aerial_won <dbl>,
## # pass.cut_back <dbl>, pass.switch <dbl>, pass.cross <dbl>,
## # pass.shot_assist <dbl>, pass.goal_assist <dbl>
ars_assists_tally <-
ars_events_perspective %>%
left_join(ars_events_perspective,suffix = c(".a",".b"), by=c("id"="shot.key_pass_id"),) %>%
group_by(player.name.a) %>%
summarise(assists = sum(pass.goal_assist.a==T, na.rm = T),
avg_dist = mean(pass.length.a, na.rm = T),
xGA = sum(shot.statsbomb_xg.b, na.rm = T)) %>%
filter(assists != 0) %>%
arrange(desc(assists))
ars_assists_tally## # A tibble: 13 x 4
## player.name.a assists avg_dist xGA
## <chr> <int> <dbl> <dbl>
## 1 "Thierry Henry" 6 19.2 9.00
## 2 "Dennis Bergkamp" 5 19.4 4.77
## 3 "Robert Pirès " 5 19.9 5.35
## 4 "Patrick Vieira" 4 19.3 2.91
## 5 "Ashley Cole" 3 17.1 1.76
## 6 "Fredrik Ljungberg" 3 17.1 2.67
## 7 "Gilberto Aparecido da Silva" 3 18.7 1.51
## 8 "Eduardo César Daude Gaspar" 2 19.8 1.55
## 9 "José Antonio Reyes Calderón" 2 16.6 0.713
## 10 "Nwankwo Christian Kanu" 2 17.2 0.314
## 11 "Kolo Habib Touré" 1 22.5 0.568
## 12 "Laureano Bisan-Etame Mayer" 1 18.5 2.20
## 13 "Sulzeer Jeremiah \"Sol\" Campbell" 1 23.2 0.0814
ggplot(ars_assists_tally, aes(x=assists, y=reorder(player.name.a, assists)))+
geom_col(fill="white", color="maroon")+
geom_segment(aes(x=0, xend=xGA, yend=player.name.a),
linetype=3,
size=2,
col="purple"
)+
geom_label(data=ars_assists_tally[1,],
aes(y=player.name.a,
x=xGA+0.5,
label=paste(" sum xGA of", round(xGA, digits=2))
),
size=3)+
labs(y=element_blank(),
x="Total Assists",
title="The Invincibles Assists Tally")+
theme(plot.title = element_text(hjust = 0.5, face = "bold"))+
xlim(0, 10)
It seems, that, not only Henry was a clinical goalscorer, he was also
prolific at creating chances for others with the highest expected goal
assist.
As we know before, Arsenal won 5 - 0 against Leeds United.
bigwin_home## # A tibble: 1 x 8
## match_id match_date home_team.home_tea~ away_team.away_~ home_score away_score
## <int> <chr> <chr> <chr> <int> <int>
## 1 3749453 2004-04-16 Arsenal Leeds United 5 0
## # ... with 2 more variables: goal_diff <int>, result <chr>
Now, we’ll take a peek at the events data how the Invincibles destroyed Leeds United.
# filter events data for the matches against Leeds
vs_leeds <- ars_events_perspective %>%
left_join(bigwin_home[,c(1,4)], by="match_id") %>%
filter(away_team.away_team_name == "Leeds United")lineups_vs_leeds <- get.lineupsFree(Match = bigwin_home[,1])## [1] "Whilst we are keen to share data and facilitate research, we also urge you to be responsible with the data. Please register your details on https://www.statsbomb.com/resource-centre and read our User Agreement carefully."
## Warning: Unknown or uninitialised column: `competition.competition_id`.
## Warning: Unknown or uninitialised column: `season.season_id`.
lineups_leeds <- lineups_vs_leeds[[1,3]]
lineups_arsenal <- lineups_vs_leeds[[2,3]]lineups_leeds[2:4]## player_name player_nickname jersey_number
## 1 James Philip Milner James Milner 38
## 2 Matthew Kilgallon <NA> 36
## 3 Aaron Lennon <NA> 25
## 4 Scott Carson <NA> 40
## 5 Nick Barmby <NA> 7
## 6 Jermaine Pennant <NA> 11
## 7 Dominic Matteo <NA> 21
## 8 Paul Robinson <NA> 1
## 9 Alan Smith <NA> 17
## 10 Gary Oliver Kelly Gary Kelly 2
## 11 Ian Harte <NA> 3
## 12 Steven Caldwell <NA> 15
## 13 Lucas Radebe <NA> 5
## 14 Michael Duberry <NA> 22
## 15 Simon Johnson <NA> 39
## 16 Mark Viduka <NA> 9
lineups_arsenal[2:4]## player_name player_nickname jersey_number
## 1 Dennis Bergkamp <NA> 10
## 2 Sylvain Wiltord <NA> 11
## 3 Patrick Vieira <NA> 4
## 4 Thierry Henry <NA> 14
## 5 Sulzeer Jeremiah "Sol" Campbell Sol Campbell 23
## 6 Martin Keown <NA> 5
## 7 Robert Pirès <NA> 7
## 8 Jens Lehmann <NA> 1
## 9 Gaël Clichy <NA> 22
## 10 José Antonio Reyes Calderón José Antonio Reyes 9
## 11 Ray Parlour <NA> 15
## 12 Eduardo César Daude Gaspar Edu Gaspar 17
## 13 Kolo Habib Touré Kolo Touré 28
## 14 Gilberto Aparecido da Silva Gilberto Silva 19
## 15 Laureano Bisan-Etame Mayer Lauren 12
## 16 Graham Stack <NA> 33
da_vs_leeds_transformed <- vs_leeds %>%
filter(type.name=="Pressure" | duel.type.name=="Tackle" |
type.name=="Foul Committed" | type.name=="Interception" |
type.name=="Block") %>% soccerTransform(method = "statsbomb") plot_def <- soccerHeatmap(df = da_vs_leeds_transformed,
x = "location.x",
y = "location.y",
xBins = 21,
yBins = 19,
title = "The Invincibles vs Leeds United",
subtitle = "Defensive Action Heatmap",
kde = T,
arrow = "r"
)
plot_def
the Invincibles tend to sit back and
vs_leeds_score_assist <- vs_leeds %>%
filter(shot.outcome.name=="Goal") %>%
left_join(vs_leeds, by=c("shot.key_pass_id"="id"), suffix=c(".goal",".assist")) %>%
select(player.name.goal, play_pattern.name.goal, shot.type.name.goal, shot.statsbomb_xg.goal, minute.goal, shot.technique.name.goal, location.x.goal, location.y.goal, player.name.assist, pass.technique.name.assist, location.x.assist, location.y.assist)
assister_coord <- vs_leeds_score_assist %>% select(assister = player.name.assist,
x = location.x.assist,
y = location.y.assist) %>% drop_na()
goal_coord <- vs_leeds_score_assist %>% select(goal = player.name.goal,
x = location.x.goal,
y = location.y.goal) %>% drop_na()
vs_leeds_score_assist %>% select(player.name.goal, shot.type.name.goal, player.name.assist) ## # A tibble: 5 x 3
## player.name.goal shot.type.name.goal player.name.assist
## <chr> <chr> <chr>
## 1 "Robert Pirès " Open Play "Dennis Bergkamp"
## 2 "Thierry Henry" Open Play "Gilberto Aparecido da Silva"
## 3 "Thierry Henry" Penalty <NA>
## 4 "Thierry Henry" Open Play "Gilberto Aparecido da Silva"
## 5 "Thierry Henry" Open Play "Robert Pirès "
## ggsoccer default to opta but
plot_att <- ggplot(vs_leeds_score_assist) +
annotate_pitch(dimensions = pitch_statsbomb, # we can change dimension to fit statsbomb size
colour = "white",
fill = "steelblue4",
limits = FALSE) +
geom_segment(aes(x = location.x.assist, # assist arrow
y = location.y.assist,
xend = location.x.goal,
yend = location.y.goal),
arrow = arrow(length = unit(0.15, "cm"),
type = "closed"),
col="black")+
geom_point(aes(x=location.x.goal, # goal attributes
y=location.y.goal,
color=shot.type.name.goal,
shape=play_pattern.name.goal),
size=4)+
coord_flip(xlim = c(40, 120),
ylim = c(0, 80))+
labs(color=element_blank(),
shape=element_blank(),
title="The Invincibles vs Leeds United",
subtitle ="5 - 0")+
geom_label(data= assister_coord,
mapping = aes(x=x, y=y, label=assister))+
geom_text(data= goal_coord,
mapping = aes(x=x, y=y, label=goal),
position = position_jitter(height = 4, width = 5, seed=10),
color="green",
lwd=4,
size=1)+
theme(legend.position = "left",
plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(face = "bold", hjust = 0.5)
)+
theme_pitch() +
theme(panel.background = element_rect(fill = "steelblue4"))## Warning: Duplicated aesthetics after name standardisation: size
plot_att## Warning: Removed 1 rows containing missing values (geom_segment).
This quick glance at the incomplete data of the Invincibles era. Thierry Henry was so dominant, not only did he bombarded his foe with goals, he was also a creative outlet for others to score. When we tried to look into one of their biggest wins, which was against Leeds, we’ve seen that the Invincibles held a low block defensively, and launched a vertical direct passes that cut through the middle of the pitch, and they scored them from various play pattern such as counter, free kick, throw in, regular play, and other (in this case, penalty).