Data source

https://en.wikipedia.org/wiki/UEFA_Euro_2020_squads

https://theanalyst.com/eu/2021/06/euro-2020-player-stats/

Libraries and data import

sr=T
library(readxl)
library(dplyr)
library(stringr)
library(forcats)
library(RColorBrewer)
library(ggplot2)
library(ggridges)
library(cluster)
library(ggdendro)
library(wrapr)

Data processing

Stage assignment

Factors reordering

Goals and xG edit

Originally in statistics, penalty shots were not included in xG factor. It’s assumed that each penalty shot adds 0.65 to xG factor.

Caps and Goals

Teams

df %>%
  mutate(Team=fct_reorder(Team, Caps, .fun=median))%>%
  ggplot(aes(x=Team, y=Caps, fill=Stage))+
  geom_boxplot()+
  labs(title='EURO 2020 - Boxplot of international caps by team, sort by median',
       subtitle='All players, as of 2021-06-10',
       caption = podpis)+
  xlab("")+
  ylab('Caps')+
  theme_minimal()+
  theme(axis.title.y = element_text(face='bold',size=16),
        axis.text.x = element_text(angle=45,face='bold',hjust = 1, size=12),
        axis.text.y=element_text(face='bold',size=13),
        legend.key.size = unit(0.9, 'cm'),
        legend.key.height = unit(0.9,'cm'),
        legend.key.width = unit(0.9,'cm'),
        legend.text = element_text(size=10),
        legend.title = element_text(size=16),
        plot.title = element_text(size=18),
        plot.subtitle = element_text(size=13),
        plot.caption = element_text(size=12))

df %>%
  group_by(., Team) %>%
  top_n(mins.played.x,n=10) %>%
  ungroup() %>%
  mutate(Team=fct_reorder(Team, Caps, .fun=median)) %>%
  ggplot(aes(x=Team, y=Caps, fill=Stage))+
  geom_boxplot()+
  labs(title='EURO 2020 - Boxplot of international caps by team',
       subtitle='Top 10 players by minutes played, sort by median, as of 2021-06-10',
       caption=podpis)+
  xlab("")+
  ylab("Caps")+
  theme_minimal()+
  theme(axis.title.y = element_text(face='bold',size=16),
        axis.text.x = element_text(angle=45,face='bold',hjust = 1, size=12),
        axis.text.y=element_text(face='bold',size=13),
        legend.key.size = unit(0.9, 'cm'),
        legend.key.height = unit(0.9,'cm'),
        legend.key.width = unit(0.9,'cm'),
        legend.text = element_text(size=10),
        legend.title = element_text(size=16),
        plot.title = element_text(size=18),
        plot.subtitle = element_text(size=13),
        plot.caption = element_text(size=12))

Numbers

Caps

ggplot(data=df, aes(x=as.factor(Nr), y=Caps))+
  geom_boxplot(fill="grey")+
    labs(title='Euro 2020 - Boxplot of Caps, sort by median',
       subtitle='All players',
       caption = podpis)+
  xlab("")+
  ylab('Height')+
  theme_minimal()+
  theme(axis.title.y = element_text(face='bold',size=16),
        axis.text.x = element_text(face='bold', size=16),
        axis.text.y=element_text(face='bold',size=13),
        legend.key.size = unit(0.9, 'cm'),
        legend.key.height = unit(0.9,'cm'),
        legend.key.width = unit(0.9,'cm'),
        legend.text = element_text(size=10),
        legend.title = element_text(size=16),
        plot.title = element_text(size=18),
        plot.subtitle = element_text(size=13),
        plot.caption = element_text(size=12))

Goals

ggplot(data=df, aes(x=as.factor(Nr), y=Goals))+
  geom_boxplot(fill="grey")+
    labs(title='Euro 2020 - Boxplot of Goals, sort by median',
       subtitle='All players',
       caption = podpis)+
  xlab("")+
  ylab('Height')+
  theme_minimal()+
  theme(axis.title.y = element_text(face='bold',size=16),
        axis.text.x = element_text(face='bold', size=16),
        axis.text.y=element_text(face='bold',size=13),
        legend.key.size = unit(0.9, 'cm'),
        legend.key.height = unit(0.9,'cm'),
        legend.key.width = unit(0.9,'cm'),
        legend.text = element_text(size=10),
        legend.title = element_text(size=16),
        plot.title = element_text(size=18),
        plot.subtitle = element_text(size=13),
        plot.caption = element_text(size=12))

Players

df %>%
  filter((mins.played.x>1|mins.played.y>1)) %>% #wybór graczy z co najmniej 1 minutą
  ggplot(aes(x=Birth.date, y=Caps))+
  geom_point(aes(color=Position))+
  scale_color_brewer(palette = "Set1")+
  geom_text(aes(label=Name),check_overlap = T,nudge_y = -2.5,size=3)+
  labs(title ='EURO 2020 - Caps by Birth Date', 
       subtitle = 'Players with at least 1 minute played, as of 2021-06-10',
       caption=podpis)+
  xlab("Birth date")+
  ylab("Caps")+
  theme_minimal()+
  theme(axis.title.y = element_text(face='bold',size=16),
        axis.title.x = element_text(face='bold',size=16),
        axis.text.x = element_text(face='bold',hjust = 1, size=12),
        axis.text.y=element_text(face='bold',size=13),
        legend.text = element_text(size=10),
        legend.title = element_text(size=16),
        plot.title = element_text(size=18),
        plot.subtitle = element_text(size=13),
        plot.caption = element_text(size=12))

df %>%
  filter((mins.played.x>1|mins.played.y>1)) %>% #wybór graczy z co najmniej 1 minutą
  ggplot(aes(x=Birth.date, y=Caps))+
  geom_point(aes(color=Position))+
  geom_smooth(aes(group=Position,color=Position),method='loess',se=F,size=1)+
  geom_text(aes(label=Name, color=Position),check_overlap = T,nudge_y = -2.0,size=3)+
  scale_color_brewer(palette = "Set1")+
  theme(axis.text = element_text(face='bold', size=10),
        axis.title = element_text(face='bold'))+
  labs(title = 'Caps by birth date',
       subtitle = 'Player with at least 1 minute played',
       caption=podpis)+
  xlab("Birth date")+
  ylab("Caps")+
    theme_minimal()
## `geom_smooth()` using formula 'y ~ x'

df %>%
  filter(Position!='GK') %>% # wykluczenie bramkarzy
  ggplot(aes(x=Caps, y=Goals))+
  geom_point(aes(color=Position))+
  geom_smooth(aes(group=Position,color=Position),method='glm',se=F,size=1)+
  geom_text(aes(label=Name, color=Position),check_overlap = T,nudge_y = -1.0,size=3)+
  scale_color_brewer(palette = "Set1")+
  theme_minimal()+
  theme(axis.text = element_text(face='bold', size=10),
        axis.title = element_text(face='bold'))+
  labs(title = 'International goals vs. caps',
       subtitle = 'No goalkeepers',
       caption=podpis)
## `geom_smooth()` using formula 'y ~ x'

df %>%
  filter(Position!='GK') %>% # wykluczenie bramkarzy
  ggplot(aes(x=Caps, y=Goals))+
  geom_point(aes(color=Position))+
  facet_wrap(vars(Position),ncol=1)+
  geom_smooth(aes(group=Position,color=Position),method='glm',se=F,size=1)+
  geom_text(aes(label=Name, color=Position),check_overlap = T,nudge_y = -1.0,size=3)+
  scale_color_brewer(palette = "Set1")+
  theme_minimal()+
  theme(axis.text = element_text(face='bold', size=10),
        axis.title = element_text(face='bold'))+
  labs(title = 'International goals vs. caps',
       subtitle = 'No goalkeepers',
       caption=podpis)
## `geom_smooth()` using formula 'y ~ x'

FWGoalsCaps=glm(Goals ~ Caps, data=df[df$Position=='FW',])

df %>%
  filter(Position=='FW') %>% # napastnicy
  ggplot(aes(x=Caps, y=Goals))+
  geom_point(aes(color=Position), color='skyblue4', show.legend = F)+
  geom_smooth(aes(group=Position,color=Position), color='skyblue4',method='glm',se=F,size=1, show.legend = F)+
  geom_text(aes(label=Name, color=Position), color='skyblue4',check_overlap = T,nudge_y = -1.0,size=4, show.legend = F)+
  labs(title = 'EURO2020 - International goals vs. caps',
       subtitle = 'Forwards, as of 2021-06-10',
       caption=podpis)+
   theme_minimal()+
  theme(axis.title.y = element_text(face='bold',size=16),
        axis.title.x = element_text(face='bold',size=16),
        axis.text.x = element_text(face='bold',hjust = 1, size=13),
        axis.text.y=element_text(face='bold',size=13),
        legend.text = element_text(size=10),
        legend.title = element_text(size=16),
        plot.title = element_text(size=18),
        plot.subtitle = element_text(size=13),
        plot.caption = element_text(size=12))+
  annotate(geom = 'text', x=30, y=75, 
  label=paste('Goals = ',round(FWGoalsCaps$coefficients[2],3),'* Caps',round(FWGoalsCaps$coefficients[1],3)),
  color='skyblue4', size=7)
## `geom_smooth()` using formula 'y ~ x'

MFGoalsCaps=glm(Goals ~ Caps, data=df[df$Position=='MF',])


df %>%
  filter(Position=='MF') %>% # pomocnicy
  ggplot(aes(x=Caps, y=Goals))+
  geom_point(color='forestgreen', show.legend = F)+
  geom_smooth(aes(group=Position), color='forestgreen',method='glm',se=F,size=1, show.legend = F)+
  geom_text(aes(label=Name),color='forestgreen',check_overlap = T,nudge_y = -0.45,size=4, show.legend = F)+
  labs(title = 'EURO2020 - International goals vs. caps',
       subtitle = 'Midfielders, as of 2021-06-10',
       caption=podpis)+
   theme_minimal()+
  theme(axis.title.y = element_text(face='bold',size=16),
        axis.title.x = element_text(face='bold',size=16),
        axis.text.x = element_text(face='bold',hjust = 1, size=13),
        axis.text.y=element_text(face='bold',size=13),
        legend.text = element_text(size=10),
        legend.title = element_text(size=16),
        plot.title = element_text(size=18),
        plot.subtitle = element_text(size=13),
        plot.caption = element_text(size=12))+
  annotate(geom = 'text', x=30, y=25, 
  label=paste('Goals = ',round(MFGoalsCaps$coefficients[2],3),'* Caps',round(MFGoalsCaps$coefficients[1],3)),
  color='forestgreen', size=7)
## `geom_smooth()` using formula 'y ~ x'

DFGoalsCaps=glm(Goals ~ Caps, data=df[df$Position=='DF',])

df %>%
  filter(Position=='DF') %>% # obrońcy
  ggplot(aes(x=Caps, y=Goals))+
  geom_point(color='red', show.legend=F)+
  geom_smooth(aes(group=Position), color='red',method='glm',se=F,size=1, show.legend = F)+
  geom_text(aes(label=Name), color='red',check_overlap = T,nudge_y = -0.3,size=4, show.legend = F)+
  labs(title = 'EURO2020 - International goals vs. caps',
       subtitle = 'Defenders, as of 2021-06-10',
       caption=podpis)+
  theme_minimal()+
  theme(axis.title.y = element_text(face='bold',size=16),
        axis.title.x = element_text(face='bold',size=16),
        axis.text.x = element_text(face='bold',hjust = 1, size=13),
        axis.text.y=element_text(face='bold',size=13),
        legend.text = element_text(size=10),
        legend.title = element_text(size=16),
        plot.title = element_text(size=18),
        plot.subtitle = element_text(size=13),
        plot.caption = element_text(size=12))+
  annotate(geom = 'text', x=30, y=10, 
  label=paste('Goals = ',round(DFGoalsCaps$coefficients[2],3),'* Caps',round(DFGoalsCaps$coefficients[1],3)),
  color='red', size=7)
## `geom_smooth()` using formula 'y ~ x'

Height

Height by Teams

df %>%
  mutate(Team=fct_reorder(Team, Height, .fun=median)) %>%
  ggplot(aes(x=Team, y=Height, fill=Stage))+
  geom_boxplot()+
  labs(title='Euro 2020 - Boxplot of players height by team, sort by median',
       subtitle='All players',
       caption=podpis)+
   theme_minimal()+
  theme(axis.title.y = element_text(face='bold',size=16),
        axis.text.x = element_text(angle=45,face='bold',hjust = 1, size=12),
        axis.text.y=element_text(face='bold',size=13),
        legend.key.size = unit(0.9, 'cm'),
        legend.key.height = unit(0.9,'cm'),
        legend.key.width = unit(0.9,'cm'),
        legend.text = element_text(size=10),
        legend.title = element_text(size=16),
        plot.title = element_text(size=18),
        plot.subtitle = element_text(size=13),
        plot.caption = element_text(size=12))

df %>%
  group_by(Team) %>%
  top_n(mins.played.x,n=10) %>%
  ungroup() %>%
  mutate(Team=fct_reorder(Team, Height, .fun=median)) %>%
  ggplot(aes(x=Team, y=Height, fill=Stage))+
  geom_boxplot()+
  labs(title='Boxplot of players height by team',
       subtitle='Top 10 players by minutes played, sort by median',
       caption=podpis)+
  theme_minimal()+
  theme(axis.title.y = element_text(face='bold',size=16),
        axis.text.x = element_text(angle=45,face='bold',hjust = 1, size=12),
        axis.text.y=element_text(face='bold',size=13),
        legend.key.size = unit(0.9, 'cm'),
        legend.key.height = unit(0.9,'cm'),
        legend.key.width = unit(0.9,'cm'),
        legend.text = element_text(size=10),
        legend.title = element_text(size=16),
        plot.title = element_text(size=18),
        plot.subtitle = element_text(size=13),
        plot.caption = element_text(size=12))

Height by number

ggplot(data=df, aes(x=as.factor(Nr), y=Height))+
  geom_boxplot(fill="grey")+
    labs(title='Euro 2020 - Boxplot of players height, sort by median',
       subtitle='All players',
       caption = podpis)+
  xlab("")+
  ylab('Height')+
  theme_minimal()+
  theme(axis.title.y = element_text(face='bold',size=16),
        axis.text.x = element_text(face='bold', size=16),
        axis.text.y=element_text(face='bold',size=13),
        legend.key.size = unit(0.9, 'cm'),
        legend.key.height = unit(0.9,'cm'),
        legend.key.width = unit(0.9,'cm'),
        legend.text = element_text(size=10),
        legend.title = element_text(size=16),
        plot.title = element_text(size=18),
        plot.subtitle = element_text(size=13),
        plot.caption = element_text(size=12))

Height by Position

df %>%
  ggplot()+
  geom_density(aes(Height, fill=Position, color=Position),size=1, alpha=0.3)+
  labs(title='Euro 2020 - Players height histogram by Position', 
       subtitle='All players',
       caption=podpis)+
  theme_minimal()+
  theme(axis.title.y = element_text(face='bold',size=16),
        axis.title.x = element_text(face='bold',size=16),
        axis.text.x = element_text(face='bold',hjust = 1, size=12),
        axis.text.y=element_text(face='bold',size=13),
        legend.text = element_text(size=10),
        legend.title = element_text(size=16),
        plot.title = element_text(size=18),
        plot.subtitle = element_text(size=13),
        plot.caption = element_text(size=12))

df %>%
  ggplot()+
  geom_density_ridges(aes(x=Height, y=Position, fill=Position), scale=2, show.legend=F)+
  theme_ridges()
## Picking joint bandwidth of 1.84

Heightest Players

df %>%
  top_n(Height, n=20) %>%
  mutate(Name=fct_reorder(Name, desc(Height))) %>%
  ggplot(aes(x=Name,y=Height, fill=Position))+
  geom_bar(stat='identity')+
  labs(title='Highest players',
       caption=podpis)+
  xlab("")+
  ylab("Height (cm)")+
  theme_minimal()+
  theme(axis.title.y= element_text(size=15, face='bold'),
        axis.text.x = element_text(angle=45, face='bold',hjust=1, size=12),
        axis.text.y = element_text(face='bold', size=12),
        plot.title = element_text(size=18, face='bold'))+
  geom_text(aes(label=Height), size=4, nudge_y = 3.5)

df %>%
  group_by(Position) %>%
  mutate(Position=fct_reorder(Position, desc(Height))) %>%
  top_n(Height, n=10) %>%
  ggplot(aes(x=Name,y=Height, fill=Team))+
  geom_bar(stat='identity')+
  facet_wrap(vars(Position), ncol=1, scales='free')+
  theme_minimal()+
  theme(axis.text.x = element_text(angle=45, face='bold',hjust=1, size=12),
        axis.text.y = element_text(face='bold', size=12))+
  geom_text(aes(label=Height), size=4, nudge_y = 3.5)

Shortest players

df %>%
  top_n(Height, n=-20) %>%
  mutate(Name=fct_reorder(Name, Height)) %>%
  ggplot(aes(x=Name,y=Height, fill=Position))+
  geom_bar(stat='identity')+
  labs(title='Shortest players',
       caption=podpis)+
  xlab("")+
  ylab("Height (cm)")+
  theme_minimal()+
  theme(axis.title.y= element_text(size=15, face='bold'),
        axis.text.x = element_text(angle=45, face='bold',hjust=1, size=12),
        axis.text.y = element_text(face='bold', size=12),
        plot.title = element_text(size=18, face='bold'))+
  geom_text(aes(label=Height), size=4, nudge_y = 3.5)

df %>%
  group_by(Position) %>%
  mutate(Position=fct_reorder(Position, Height)) %>%
  top_n(Height, n=-10) %>%
  ggplot(aes(x=Name,y=Height, fill=Team))+
  geom_bar(stat='identity')+
  facet_wrap(vars(Position), ncol=1, scales='free')+
  labs(title='Shortest players by Position',
       caption=podpis)+
  theme_minimal()+
  theme(axis.text.x = element_text(angle=45, face='bold',hjust=1, size=12),
        axis.text.y = element_text(face='bold', size=12),
        plot.title = element_text(size=18))+
  geom_text(aes(label=Height), size=4, nudge_y = 4)

Expected Goals

Players

GLM Models

glmxGDF=glm(xGr ~ mins.played.x, data=df[df$Position=='DF',])
glmxGDF1=glmxGDF$coefficients[1]
glmxGDF2=glmxGDF$coefficients[2]
glmxGMF=glm(xGr ~ mins.played.x, data=df[df$Position=='MF',])
glmxGMF1=glmxGMF$coefficients[1]
glmxGMF2=glmxGMF$coefficients[2]
glmxGFW=glm(xGr ~ mins.played.x, data=df[df$Position=='FW',])
glmxGFW1=glmxGFW$coefficients[1]
glmxGFW2=glmxGFW$coefficients[2]
df %>%
  filter(Position!='GK') %>%
  mutate(xGr=as.numeric(xGr)) %>%
  ggplot(aes(x=mins.played.x, y=xGr))+
  geom_point(aes(color=Position), na.rm = T)+
  geom_text(aes(label=Name),check_overlap = T,nudge_y = -0.05,size=3.5)+
  geom_smooth(aes(group=Position, color=Position), method = 'glm', se=F)+
  labs(title = 'Euro 2020 - Expected goals by minutes played',
       subtitle='All players',
       caption=podpis)+
  xlab("Minutes played")+
  ylab("Expected goals factor")+
   theme_minimal()+
  theme(axis.title.y= element_text(size=12, face='bold'),
        axis.title.x= element_text(size=12, face='bold'),
        axis.text.x = element_text( face='bold',hjust=1, size=12),
        axis.text.y = element_text(face='bold', size=12),
        plot.title = element_text(size=18, face='bold'))

df %>%
  filter(Position!='GK' & mins.played.x>0) %>%
  mutate(xGr=as.numeric(xGr)) %>%
  ggplot(aes(x=mins.played.x, y=xGr))+
  geom_point(aes(color=Position), na.rm = T)+
  geom_text(aes(label=Name),check_overlap = T,nudge_y = -0.07,size=4)+
  geom_smooth(aes(group=Position, color=Position), method = 'glm', se=F)+
  facet_wrap(vars(Position),ncol=1)+
  theme(axis.text = element_text(face='bold', size=10),
        axis.title = element_text(face='bold'))+
  theme_minimal()+
  labs(title = 'Expected goals by minutes played',
       caption=podpis)+
  xlab("Minutes played")+
  ylab("Excpected goals factor")

df %>%
  top_n(xGr, n=20) %>%
  mutate(Name=fct_reorder(Name, xGr)) %>%
  ggplot(aes(x=Name, y=xGr, fill=Position))+
  geom_bar(stat = 'identity')+
  geom_text(aes(label=paste(xGr,'(',goals,')')), size=3.8, nudge_y = 0.3)+
  labs(title='Euro 2020 - Top 20 players by expected goals factor',
       subtitle='Scored goals in a bracket',
       caption=podpis)+
  xlab("")+
  ylab("Expected goals factor")+
   theme_minimal()+
  theme(axis.title.y= element_text(size=12, face='bold'),
        axis.title.x= element_text(size=12, face='bold'),
        axis.text.x = element_text( face='bold',hjust=1, size=12),
        axis.text.y = element_text(face='bold', size=12),
        plot.title = element_text(size=18, face='bold'))+
  coord_flip()

df %>%
  filter(Position!='GK' & goals==0) %>%
  top_n(xGr, n=20) %>%
  mutate(Name=fct_reorder(Name, xGr)) %>%
  ggplot(aes(x=Name, y=xGr, fill=Position))+
  geom_bar(stat = 'identity')+
  geom_text(aes(label=xGr), size=3.8, nudge_y = 0.10)+
  labs(title='EURO 2020 - Top 20 players by expected goals factor',
       subtitle='Players with 0 goals scored',
       caption=podpis)+
  xlab("")+
  ylab("Expected goals factor")+
  theme_minimal()+
  theme(axis.title.y= element_text(size=12, face='bold'),
        axis.title.x= element_text(size=12, face='bold'),
        axis.text.x = element_text( face='bold',hjust=1, size=12),
        axis.text.y = element_text(face='bold', size=12),
        plot.title = element_text(size=18, face='bold'))+
  coord_flip()

df %>%
  filter(Position!='GK' & goals>0) %>%
  top_n(-xGr, n=20) %>%
  mutate(Name=fct_reorder(Name, desc(xGr))) %>%
  ggplot(aes(x=Name, y=xGr, fill=Position))+
  geom_bar(stat = 'identity')+
  geom_text(aes(label=paste(xGr,'(',goals,')')), size=3.8, nudge_y = 0.02)+
  labs(title='EURO 2020 - Top 20 players with the lowest expected goals factor',
       subtitle='Players with at least 1 goal scored',
       caption=podpis)+
  xlab("")+
  ylab("Expected goals factor")+
  theme_minimal()+
  theme(axis.title.y= element_text(size=12, face='bold'),
        axis.title.x= element_text(size=12, face='bold'),
        axis.text.x = element_text( face='bold',hjust=1, size=12),
        axis.text.y = element_text(face='bold', size=12),
        plot.title = element_text(size=18, face='bold'),
        plot.subtitle = element_text(size=13),
        plot.caption = element_text(size=12))+
  coord_flip()

Per minutes played

df %>%
  filter(Position!='GK') %>%
  mutate(xGrperminute=xGr/(mins.played.x/90),
         Name=fct_reorder(Name, xGrperminute)) %>%
  top_n(xGrperminute, n=20) %>%
  ggplot(aes(x=Name, y=xGrperminute, fill=Position))+
  geom_bar(stat = 'identity')+
  geom_text(aes(label=paste('(',xGr,'/',mins.played.x,')')), size=3.8,nudge_y=0.28)+
  labs(title='EURO 2020 - Top 20 players with the highest xG per 90 minutes factor',
       subtitle='All players, (xG/minutes played)',
       caption=podpis)+
  xlab("")+
  ylab("Expected goals per 90 minutes played factor")+
  theme_minimal()+
  theme(axis.title.y= element_text(size=12, face='bold'),
        axis.title.x= element_text(size=12, face='bold'),
        axis.text.x = element_text( face='bold',hjust=1, size=12),
        axis.text.y = element_text(face='bold', size=12),
        plot.title = element_text(size=18, face='bold'),
        plot.subtitle = element_text(size=13),
        plot.caption = element_text(size=12))+
  coord_flip()

df %>%
  filter(Position!='GK' & goals>0) %>%
  mutate(xGrperminute=xGr/(mins.played.x/90),
         Name=fct_reorder(Name, xGrperminute)) %>%
  top_n(xGrperminute, n=20) %>%
  ggplot(aes(x=Name, y=xGrperminute, fill=Position))+
  geom_bar(stat = 'identity')+
  geom_text(aes(label=paste('(',xGr,'/',mins.played.x,')')), size=3.8,nudge_y=0.28)+
  labs(title='EURO 2020 - Top 20 players with the highest xG per 90 minutes factor',
       subtitle='Players with at least 1 goal scored',
       caption=podpis)+
  xlab("")+
  ylab("Expected goals per 90 minutes played factor")+
  theme_minimal()+
  theme(axis.title.y= element_text(size=12, face='bold'),
        axis.title.x= element_text(size=12, face='bold'),
        axis.text.x = element_text( face='bold',hjust=1, size=12),
        axis.text.y = element_text(face='bold', size=12),
        plot.title = element_text(size=18, face='bold'),
        plot.subtitle = element_text(size=13),
        plot.caption = element_text(size=12))+
  coord_flip()

Teams

df %>%
  filter(is.na(xGr)==F) %>%
  group_by(., Team) %>%
  summarize(xGr=sum(xGr)) %>%
  ungroup() %>%
  mutate(Team=fct_reorder(Team, xGr)) %>%
  ggplot(aes(x=Team, y=xGr))+
  geom_bar(stat='identity')+
  geom_text(aes(label=xGr), size=4, nudge_y = 0.5)+
  labs(title='Euro 2020 - Expected goals factor by Team',
       caption=podpis)+
  xlab("")+
  ylab("Expected goals factor")+
  theme_minimal()+
  theme(axis.title.y= element_text(size=12, face='bold'),
        axis.title.x= element_text(size=12, face='bold'),
        axis.text.x = element_text( face='bold',hjust=1, size=12),
        axis.text.y = element_text(face='bold', size=12),
        plot.title = element_text(size=18, face='bold'))+
  coord_flip()

df %>%
  filter(is.na(xGr)==F) %>%
  group_by(., Team) %>%
  summarize(xGr=sum(xGr), shots=sum(shots)) %>%
  ungroup() %>%
  mutate(Team=fct_reorder(Team, xGr/shots)) %>%
  ggplot(aes(x=Team, y=xGr/shots))+
  geom_bar(stat='identity')+
  geom_text(aes(label=round(xGr/shots,3)), size=4, nudge_y = 0.004)+
  labs(title='Euro 2020 - Expected goals per shot factor by Team',
       caption=podpis)+
  xlab("")+
  ylab("Expected goals per shot factor")+
  theme_minimal()+
  theme(axis.title.y= element_text(size=12, face='bold'),
        axis.title.x= element_text(size=12, face='bold'),
        axis.text.x = element_text( face='bold',hjust=1, size=12),
        axis.text.y = element_text(face='bold', size=12),
        plot.title = element_text(size=18, face='bold'))+
  coord_flip()

Goals to expected goals

This ratio indicates how effictively each team was able to exploit the chances that they were created. Surprisingly, Hungary was the most effective team, which scored more then twice as many goals as expected.

df %>%
  filter(is.na(xGr)==F) %>%
  group_by(., Team) %>%
  summarize(xGr=sum(xGr), goals=sum(goals)) %>%
  ungroup() %>%
  mutate(Team=fct_reorder(Team, goals/xGr)) %>%
  ggplot(aes(x=Team, y=goals/xGr))+
  geom_bar(stat='identity')+
  geom_text(aes(label=round(goals/xGr,3)), size=4, nudge_y = 0.07)+
  labs(title='Euro 2020 - goals scored to expected goals ratio',
       caption=podpis)+
  xlab("")+
  ylab("Goals scored to expected goals ratio")+
  theme_minimal()+
  theme(axis.title.y= element_text(size=12, face='bold'),
        axis.title.x= element_text(size=12, face='bold'),
        axis.text.x = element_text( face='bold',hjust=1, size=12),
        axis.text.y = element_text(face='bold', size=12),
        plot.title = element_text(size=18, face='bold'))+
  coord_flip()

Expected goals by minutes played

df %>%
  filter(is.na(xGr)==F) %>%
  group_by(., Team) %>%
  summarize(xGr=sum(xGr), mins.played.x=sum(mins.played.x)) %>%
  ungroup() %>%
  mutate(Team=fct_reorder(Team, xGr/mins.played.x)) %>%
  ggplot(aes(x=Team, y=900*xGr/mins.played.x))+
  geom_bar(stat='identity')+
  geom_text(aes(label=round(900*xGr/mins.played.x,3)), size=4, nudge_y = 0.08)+
  labs(title='Euro 2020 - expected goals per 90 minutes',
       caption=podpis)+
  xlab("")+
  ylab("Expected goals per 90 minutes ratio")+
  theme_minimal()+
  theme(axis.title.y= element_text(size=12, face='bold'),
        axis.title.x= element_text(size=12, face='bold'),
        axis.text.x = element_text( face='bold',hjust=1, size=12),
        axis.text.y = element_text(face='bold', size=12),
        plot.title = element_text(size=18, face='bold'))+
  coord_flip()

Corelation and cluster analysis

dff<-df %>%
  filter(Position=='FW' & mins.played.x>=90) %>%
  filter(is.na(xGr)==F) %>%
  select(Name, Position, mins.played.x,shots,shots.on.target,goals,xGr,chances.created,assists,xA,passes,completed,tackles.won,int,rec,avg.distance, avg.progress, shot.ending.carry, goal.ending.carry, end.in.chance, end.in.assist,shot.ending, goal.ending) %>%
  mutate(shotspermin=shots/mins.played.x,
         shots.on.targetpermin=shots/mins.played.x,
         goalspermin=goals/mins.played.x,
         xGrpermin=xGr/mins.played.x,
         chancescreatedpermin=chances.created/mins.played.x,
         assistspermin=assists/mins.played.x,
         xApermin=xA/mins.played.x,
         passespermin=passes/mins.played.x,
         tacklespermin=tackles.won/mins.played.x,
         intpermin=int/mins.played.x,
         recpermin=rec/mins.played.x)
row.names(dff)<-dff$Name
dff<-dff[,-1]

dff=scale(dff[,2:33])
dff=dist(dff, method = "euclidean")
dff=hclust(dff, method = "ward.D")
dff=as.dendrogram(dff)

dff=dendro_data(dff, type='rectangle')
ggplot(segment(dff))+
  geom_segment(aes(x=x, y=y, xend=xend, yend=yend))+
  geom_text(data = dff$labels, 
              aes(x = x, y = y, label = label), size = 4, hjust = 1) +
  scale_y_continuous(expand=c(-1,100))+
  labs(title='Euro 2020 - Hierarchical cluster analysis',
       subtitle="Forwards with at least 90 minutes played",
       caption=podpis)+
  xlab("")+
  ylab("")+
  theme_minimal()+
  theme(axis.title.y= element_text(size=12, face='bold'),
        axis.title.x= element_text(size=12, face='bold'),
        axis.text.x = element_text( face='bold',hjust=1, size=12),
        axis.text.y = element_text(face='bold', size=12),
        plot.title = element_text(size=18),
        plot.subtitle = element_text(size=13),
        plot.caption = element_text(size=12))+
  coord_flip()

```