sr=T
library(readxl)
library(dplyr)
library(stringr)
library(forcats)
library(RColorBrewer)
library(ggplot2)
library(ggridges)
library(cluster)
library(ggdendro)
library(wrapr)
Originally in statistics, penalty shots were not included in xG factor. It’s assumed that each penalty shot adds 0.65 to xG factor.
df %>%
mutate(Team=fct_reorder(Team, Caps, .fun=median))%>%
ggplot(aes(x=Team, y=Caps, fill=Stage))+
geom_boxplot()+
labs(title='EURO 2020 - Boxplot of international caps by team, sort by median',
subtitle='All players, as of 2021-06-10',
caption = podpis)+
xlab("")+
ylab('Caps')+
theme_minimal()+
theme(axis.title.y = element_text(face='bold',size=16),
axis.text.x = element_text(angle=45,face='bold',hjust = 1, size=12),
axis.text.y=element_text(face='bold',size=13),
legend.key.size = unit(0.9, 'cm'),
legend.key.height = unit(0.9,'cm'),
legend.key.width = unit(0.9,'cm'),
legend.text = element_text(size=10),
legend.title = element_text(size=16),
plot.title = element_text(size=18),
plot.subtitle = element_text(size=13),
plot.caption = element_text(size=12))
df %>%
group_by(., Team) %>%
top_n(mins.played.x,n=10) %>%
ungroup() %>%
mutate(Team=fct_reorder(Team, Caps, .fun=median)) %>%
ggplot(aes(x=Team, y=Caps, fill=Stage))+
geom_boxplot()+
labs(title='EURO 2020 - Boxplot of international caps by team',
subtitle='Top 10 players by minutes played, sort by median, as of 2021-06-10',
caption=podpis)+
xlab("")+
ylab("Caps")+
theme_minimal()+
theme(axis.title.y = element_text(face='bold',size=16),
axis.text.x = element_text(angle=45,face='bold',hjust = 1, size=12),
axis.text.y=element_text(face='bold',size=13),
legend.key.size = unit(0.9, 'cm'),
legend.key.height = unit(0.9,'cm'),
legend.key.width = unit(0.9,'cm'),
legend.text = element_text(size=10),
legend.title = element_text(size=16),
plot.title = element_text(size=18),
plot.subtitle = element_text(size=13),
plot.caption = element_text(size=12))
ggplot(data=df, aes(x=as.factor(Nr), y=Caps))+
geom_boxplot(fill="grey")+
labs(title='Euro 2020 - Boxplot of Caps, sort by median',
subtitle='All players',
caption = podpis)+
xlab("")+
ylab('Height')+
theme_minimal()+
theme(axis.title.y = element_text(face='bold',size=16),
axis.text.x = element_text(face='bold', size=16),
axis.text.y=element_text(face='bold',size=13),
legend.key.size = unit(0.9, 'cm'),
legend.key.height = unit(0.9,'cm'),
legend.key.width = unit(0.9,'cm'),
legend.text = element_text(size=10),
legend.title = element_text(size=16),
plot.title = element_text(size=18),
plot.subtitle = element_text(size=13),
plot.caption = element_text(size=12))
ggplot(data=df, aes(x=as.factor(Nr), y=Goals))+
geom_boxplot(fill="grey")+
labs(title='Euro 2020 - Boxplot of Goals, sort by median',
subtitle='All players',
caption = podpis)+
xlab("")+
ylab('Height')+
theme_minimal()+
theme(axis.title.y = element_text(face='bold',size=16),
axis.text.x = element_text(face='bold', size=16),
axis.text.y=element_text(face='bold',size=13),
legend.key.size = unit(0.9, 'cm'),
legend.key.height = unit(0.9,'cm'),
legend.key.width = unit(0.9,'cm'),
legend.text = element_text(size=10),
legend.title = element_text(size=16),
plot.title = element_text(size=18),
plot.subtitle = element_text(size=13),
plot.caption = element_text(size=12))
df %>%
filter((mins.played.x>1|mins.played.y>1)) %>% #wybór graczy z co najmniej 1 minutą
ggplot(aes(x=Birth.date, y=Caps))+
geom_point(aes(color=Position))+
scale_color_brewer(palette = "Set1")+
geom_text(aes(label=Name),check_overlap = T,nudge_y = -2.5,size=3)+
labs(title ='EURO 2020 - Caps by Birth Date',
subtitle = 'Players with at least 1 minute played, as of 2021-06-10',
caption=podpis)+
xlab("Birth date")+
ylab("Caps")+
theme_minimal()+
theme(axis.title.y = element_text(face='bold',size=16),
axis.title.x = element_text(face='bold',size=16),
axis.text.x = element_text(face='bold',hjust = 1, size=12),
axis.text.y=element_text(face='bold',size=13),
legend.text = element_text(size=10),
legend.title = element_text(size=16),
plot.title = element_text(size=18),
plot.subtitle = element_text(size=13),
plot.caption = element_text(size=12))
df %>%
filter((mins.played.x>1|mins.played.y>1)) %>% #wybór graczy z co najmniej 1 minutą
ggplot(aes(x=Birth.date, y=Caps))+
geom_point(aes(color=Position))+
geom_smooth(aes(group=Position,color=Position),method='loess',se=F,size=1)+
geom_text(aes(label=Name, color=Position),check_overlap = T,nudge_y = -2.0,size=3)+
scale_color_brewer(palette = "Set1")+
theme(axis.text = element_text(face='bold', size=10),
axis.title = element_text(face='bold'))+
labs(title = 'Caps by birth date',
subtitle = 'Player with at least 1 minute played',
caption=podpis)+
xlab("Birth date")+
ylab("Caps")+
theme_minimal()
## `geom_smooth()` using formula 'y ~ x'
df %>%
filter(Position!='GK') %>% # wykluczenie bramkarzy
ggplot(aes(x=Caps, y=Goals))+
geom_point(aes(color=Position))+
geom_smooth(aes(group=Position,color=Position),method='glm',se=F,size=1)+
geom_text(aes(label=Name, color=Position),check_overlap = T,nudge_y = -1.0,size=3)+
scale_color_brewer(palette = "Set1")+
theme_minimal()+
theme(axis.text = element_text(face='bold', size=10),
axis.title = element_text(face='bold'))+
labs(title = 'International goals vs. caps',
subtitle = 'No goalkeepers',
caption=podpis)
## `geom_smooth()` using formula 'y ~ x'
df %>%
filter(Position!='GK') %>% # wykluczenie bramkarzy
ggplot(aes(x=Caps, y=Goals))+
geom_point(aes(color=Position))+
facet_wrap(vars(Position),ncol=1)+
geom_smooth(aes(group=Position,color=Position),method='glm',se=F,size=1)+
geom_text(aes(label=Name, color=Position),check_overlap = T,nudge_y = -1.0,size=3)+
scale_color_brewer(palette = "Set1")+
theme_minimal()+
theme(axis.text = element_text(face='bold', size=10),
axis.title = element_text(face='bold'))+
labs(title = 'International goals vs. caps',
subtitle = 'No goalkeepers',
caption=podpis)
## `geom_smooth()` using formula 'y ~ x'
FWGoalsCaps=glm(Goals ~ Caps, data=df[df$Position=='FW',])
df %>%
filter(Position=='FW') %>% # napastnicy
ggplot(aes(x=Caps, y=Goals))+
geom_point(aes(color=Position), color='skyblue4', show.legend = F)+
geom_smooth(aes(group=Position,color=Position), color='skyblue4',method='glm',se=F,size=1, show.legend = F)+
geom_text(aes(label=Name, color=Position), color='skyblue4',check_overlap = T,nudge_y = -1.0,size=4, show.legend = F)+
labs(title = 'EURO2020 - International goals vs. caps',
subtitle = 'Forwards, as of 2021-06-10',
caption=podpis)+
theme_minimal()+
theme(axis.title.y = element_text(face='bold',size=16),
axis.title.x = element_text(face='bold',size=16),
axis.text.x = element_text(face='bold',hjust = 1, size=13),
axis.text.y=element_text(face='bold',size=13),
legend.text = element_text(size=10),
legend.title = element_text(size=16),
plot.title = element_text(size=18),
plot.subtitle = element_text(size=13),
plot.caption = element_text(size=12))+
annotate(geom = 'text', x=30, y=75,
label=paste('Goals = ',round(FWGoalsCaps$coefficients[2],3),'* Caps',round(FWGoalsCaps$coefficients[1],3)),
color='skyblue4', size=7)
## `geom_smooth()` using formula 'y ~ x'
MFGoalsCaps=glm(Goals ~ Caps, data=df[df$Position=='MF',])
df %>%
filter(Position=='MF') %>% # pomocnicy
ggplot(aes(x=Caps, y=Goals))+
geom_point(color='forestgreen', show.legend = F)+
geom_smooth(aes(group=Position), color='forestgreen',method='glm',se=F,size=1, show.legend = F)+
geom_text(aes(label=Name),color='forestgreen',check_overlap = T,nudge_y = -0.45,size=4, show.legend = F)+
labs(title = 'EURO2020 - International goals vs. caps',
subtitle = 'Midfielders, as of 2021-06-10',
caption=podpis)+
theme_minimal()+
theme(axis.title.y = element_text(face='bold',size=16),
axis.title.x = element_text(face='bold',size=16),
axis.text.x = element_text(face='bold',hjust = 1, size=13),
axis.text.y=element_text(face='bold',size=13),
legend.text = element_text(size=10),
legend.title = element_text(size=16),
plot.title = element_text(size=18),
plot.subtitle = element_text(size=13),
plot.caption = element_text(size=12))+
annotate(geom = 'text', x=30, y=25,
label=paste('Goals = ',round(MFGoalsCaps$coefficients[2],3),'* Caps',round(MFGoalsCaps$coefficients[1],3)),
color='forestgreen', size=7)
## `geom_smooth()` using formula 'y ~ x'
DFGoalsCaps=glm(Goals ~ Caps, data=df[df$Position=='DF',])
df %>%
filter(Position=='DF') %>% # obrońcy
ggplot(aes(x=Caps, y=Goals))+
geom_point(color='red', show.legend=F)+
geom_smooth(aes(group=Position), color='red',method='glm',se=F,size=1, show.legend = F)+
geom_text(aes(label=Name), color='red',check_overlap = T,nudge_y = -0.3,size=4, show.legend = F)+
labs(title = 'EURO2020 - International goals vs. caps',
subtitle = 'Defenders, as of 2021-06-10',
caption=podpis)+
theme_minimal()+
theme(axis.title.y = element_text(face='bold',size=16),
axis.title.x = element_text(face='bold',size=16),
axis.text.x = element_text(face='bold',hjust = 1, size=13),
axis.text.y=element_text(face='bold',size=13),
legend.text = element_text(size=10),
legend.title = element_text(size=16),
plot.title = element_text(size=18),
plot.subtitle = element_text(size=13),
plot.caption = element_text(size=12))+
annotate(geom = 'text', x=30, y=10,
label=paste('Goals = ',round(DFGoalsCaps$coefficients[2],3),'* Caps',round(DFGoalsCaps$coefficients[1],3)),
color='red', size=7)
## `geom_smooth()` using formula 'y ~ x'
df %>%
mutate(Team=fct_reorder(Team, Height, .fun=median)) %>%
ggplot(aes(x=Team, y=Height, fill=Stage))+
geom_boxplot()+
labs(title='Euro 2020 - Boxplot of players height by team, sort by median',
subtitle='All players',
caption=podpis)+
theme_minimal()+
theme(axis.title.y = element_text(face='bold',size=16),
axis.text.x = element_text(angle=45,face='bold',hjust = 1, size=12),
axis.text.y=element_text(face='bold',size=13),
legend.key.size = unit(0.9, 'cm'),
legend.key.height = unit(0.9,'cm'),
legend.key.width = unit(0.9,'cm'),
legend.text = element_text(size=10),
legend.title = element_text(size=16),
plot.title = element_text(size=18),
plot.subtitle = element_text(size=13),
plot.caption = element_text(size=12))
df %>%
group_by(Team) %>%
top_n(mins.played.x,n=10) %>%
ungroup() %>%
mutate(Team=fct_reorder(Team, Height, .fun=median)) %>%
ggplot(aes(x=Team, y=Height, fill=Stage))+
geom_boxplot()+
labs(title='Boxplot of players height by team',
subtitle='Top 10 players by minutes played, sort by median',
caption=podpis)+
theme_minimal()+
theme(axis.title.y = element_text(face='bold',size=16),
axis.text.x = element_text(angle=45,face='bold',hjust = 1, size=12),
axis.text.y=element_text(face='bold',size=13),
legend.key.size = unit(0.9, 'cm'),
legend.key.height = unit(0.9,'cm'),
legend.key.width = unit(0.9,'cm'),
legend.text = element_text(size=10),
legend.title = element_text(size=16),
plot.title = element_text(size=18),
plot.subtitle = element_text(size=13),
plot.caption = element_text(size=12))
ggplot(data=df, aes(x=as.factor(Nr), y=Height))+
geom_boxplot(fill="grey")+
labs(title='Euro 2020 - Boxplot of players height, sort by median',
subtitle='All players',
caption = podpis)+
xlab("")+
ylab('Height')+
theme_minimal()+
theme(axis.title.y = element_text(face='bold',size=16),
axis.text.x = element_text(face='bold', size=16),
axis.text.y=element_text(face='bold',size=13),
legend.key.size = unit(0.9, 'cm'),
legend.key.height = unit(0.9,'cm'),
legend.key.width = unit(0.9,'cm'),
legend.text = element_text(size=10),
legend.title = element_text(size=16),
plot.title = element_text(size=18),
plot.subtitle = element_text(size=13),
plot.caption = element_text(size=12))
df %>%
ggplot()+
geom_density(aes(Height, fill=Position, color=Position),size=1, alpha=0.3)+
labs(title='Euro 2020 - Players height histogram by Position',
subtitle='All players',
caption=podpis)+
theme_minimal()+
theme(axis.title.y = element_text(face='bold',size=16),
axis.title.x = element_text(face='bold',size=16),
axis.text.x = element_text(face='bold',hjust = 1, size=12),
axis.text.y=element_text(face='bold',size=13),
legend.text = element_text(size=10),
legend.title = element_text(size=16),
plot.title = element_text(size=18),
plot.subtitle = element_text(size=13),
plot.caption = element_text(size=12))
df %>%
ggplot()+
geom_density_ridges(aes(x=Height, y=Position, fill=Position), scale=2, show.legend=F)+
theme_ridges()
## Picking joint bandwidth of 1.84
df %>%
top_n(Height, n=20) %>%
mutate(Name=fct_reorder(Name, desc(Height))) %>%
ggplot(aes(x=Name,y=Height, fill=Position))+
geom_bar(stat='identity')+
labs(title='Highest players',
caption=podpis)+
xlab("")+
ylab("Height (cm)")+
theme_minimal()+
theme(axis.title.y= element_text(size=15, face='bold'),
axis.text.x = element_text(angle=45, face='bold',hjust=1, size=12),
axis.text.y = element_text(face='bold', size=12),
plot.title = element_text(size=18, face='bold'))+
geom_text(aes(label=Height), size=4, nudge_y = 3.5)
df %>%
group_by(Position) %>%
mutate(Position=fct_reorder(Position, desc(Height))) %>%
top_n(Height, n=10) %>%
ggplot(aes(x=Name,y=Height, fill=Team))+
geom_bar(stat='identity')+
facet_wrap(vars(Position), ncol=1, scales='free')+
theme_minimal()+
theme(axis.text.x = element_text(angle=45, face='bold',hjust=1, size=12),
axis.text.y = element_text(face='bold', size=12))+
geom_text(aes(label=Height), size=4, nudge_y = 3.5)
df %>%
top_n(Height, n=-20) %>%
mutate(Name=fct_reorder(Name, Height)) %>%
ggplot(aes(x=Name,y=Height, fill=Position))+
geom_bar(stat='identity')+
labs(title='Shortest players',
caption=podpis)+
xlab("")+
ylab("Height (cm)")+
theme_minimal()+
theme(axis.title.y= element_text(size=15, face='bold'),
axis.text.x = element_text(angle=45, face='bold',hjust=1, size=12),
axis.text.y = element_text(face='bold', size=12),
plot.title = element_text(size=18, face='bold'))+
geom_text(aes(label=Height), size=4, nudge_y = 3.5)
df %>%
group_by(Position) %>%
mutate(Position=fct_reorder(Position, Height)) %>%
top_n(Height, n=-10) %>%
ggplot(aes(x=Name,y=Height, fill=Team))+
geom_bar(stat='identity')+
facet_wrap(vars(Position), ncol=1, scales='free')+
labs(title='Shortest players by Position',
caption=podpis)+
theme_minimal()+
theme(axis.text.x = element_text(angle=45, face='bold',hjust=1, size=12),
axis.text.y = element_text(face='bold', size=12),
plot.title = element_text(size=18))+
geom_text(aes(label=Height), size=4, nudge_y = 4)
glmxGDF=glm(xGr ~ mins.played.x, data=df[df$Position=='DF',])
glmxGDF1=glmxGDF$coefficients[1]
glmxGDF2=glmxGDF$coefficients[2]
glmxGMF=glm(xGr ~ mins.played.x, data=df[df$Position=='MF',])
glmxGMF1=glmxGMF$coefficients[1]
glmxGMF2=glmxGMF$coefficients[2]
glmxGFW=glm(xGr ~ mins.played.x, data=df[df$Position=='FW',])
glmxGFW1=glmxGFW$coefficients[1]
glmxGFW2=glmxGFW$coefficients[2]
df %>%
filter(Position!='GK') %>%
mutate(xGr=as.numeric(xGr)) %>%
ggplot(aes(x=mins.played.x, y=xGr))+
geom_point(aes(color=Position), na.rm = T)+
geom_text(aes(label=Name),check_overlap = T,nudge_y = -0.05,size=3.5)+
geom_smooth(aes(group=Position, color=Position), method = 'glm', se=F)+
labs(title = 'Euro 2020 - Expected goals by minutes played',
subtitle='All players',
caption=podpis)+
xlab("Minutes played")+
ylab("Expected goals factor")+
theme_minimal()+
theme(axis.title.y= element_text(size=12, face='bold'),
axis.title.x= element_text(size=12, face='bold'),
axis.text.x = element_text( face='bold',hjust=1, size=12),
axis.text.y = element_text(face='bold', size=12),
plot.title = element_text(size=18, face='bold'))
df %>%
filter(Position!='GK' & mins.played.x>0) %>%
mutate(xGr=as.numeric(xGr)) %>%
ggplot(aes(x=mins.played.x, y=xGr))+
geom_point(aes(color=Position), na.rm = T)+
geom_text(aes(label=Name),check_overlap = T,nudge_y = -0.07,size=4)+
geom_smooth(aes(group=Position, color=Position), method = 'glm', se=F)+
facet_wrap(vars(Position),ncol=1)+
theme(axis.text = element_text(face='bold', size=10),
axis.title = element_text(face='bold'))+
theme_minimal()+
labs(title = 'Expected goals by minutes played',
caption=podpis)+
xlab("Minutes played")+
ylab("Excpected goals factor")
df %>%
top_n(xGr, n=20) %>%
mutate(Name=fct_reorder(Name, xGr)) %>%
ggplot(aes(x=Name, y=xGr, fill=Position))+
geom_bar(stat = 'identity')+
geom_text(aes(label=paste(xGr,'(',goals,')')), size=3.8, nudge_y = 0.3)+
labs(title='Euro 2020 - Top 20 players by expected goals factor',
subtitle='Scored goals in a bracket',
caption=podpis)+
xlab("")+
ylab("Expected goals factor")+
theme_minimal()+
theme(axis.title.y= element_text(size=12, face='bold'),
axis.title.x= element_text(size=12, face='bold'),
axis.text.x = element_text( face='bold',hjust=1, size=12),
axis.text.y = element_text(face='bold', size=12),
plot.title = element_text(size=18, face='bold'))+
coord_flip()
df %>%
filter(Position!='GK' & goals==0) %>%
top_n(xGr, n=20) %>%
mutate(Name=fct_reorder(Name, xGr)) %>%
ggplot(aes(x=Name, y=xGr, fill=Position))+
geom_bar(stat = 'identity')+
geom_text(aes(label=xGr), size=3.8, nudge_y = 0.10)+
labs(title='EURO 2020 - Top 20 players by expected goals factor',
subtitle='Players with 0 goals scored',
caption=podpis)+
xlab("")+
ylab("Expected goals factor")+
theme_minimal()+
theme(axis.title.y= element_text(size=12, face='bold'),
axis.title.x= element_text(size=12, face='bold'),
axis.text.x = element_text( face='bold',hjust=1, size=12),
axis.text.y = element_text(face='bold', size=12),
plot.title = element_text(size=18, face='bold'))+
coord_flip()
df %>%
filter(Position!='GK' & goals>0) %>%
top_n(-xGr, n=20) %>%
mutate(Name=fct_reorder(Name, desc(xGr))) %>%
ggplot(aes(x=Name, y=xGr, fill=Position))+
geom_bar(stat = 'identity')+
geom_text(aes(label=paste(xGr,'(',goals,')')), size=3.8, nudge_y = 0.02)+
labs(title='EURO 2020 - Top 20 players with the lowest expected goals factor',
subtitle='Players with at least 1 goal scored',
caption=podpis)+
xlab("")+
ylab("Expected goals factor")+
theme_minimal()+
theme(axis.title.y= element_text(size=12, face='bold'),
axis.title.x= element_text(size=12, face='bold'),
axis.text.x = element_text( face='bold',hjust=1, size=12),
axis.text.y = element_text(face='bold', size=12),
plot.title = element_text(size=18, face='bold'),
plot.subtitle = element_text(size=13),
plot.caption = element_text(size=12))+
coord_flip()
df %>%
filter(Position!='GK') %>%
mutate(xGrperminute=xGr/(mins.played.x/90),
Name=fct_reorder(Name, xGrperminute)) %>%
top_n(xGrperminute, n=20) %>%
ggplot(aes(x=Name, y=xGrperminute, fill=Position))+
geom_bar(stat = 'identity')+
geom_text(aes(label=paste('(',xGr,'/',mins.played.x,')')), size=3.8,nudge_y=0.28)+
labs(title='EURO 2020 - Top 20 players with the highest xG per 90 minutes factor',
subtitle='All players, (xG/minutes played)',
caption=podpis)+
xlab("")+
ylab("Expected goals per 90 minutes played factor")+
theme_minimal()+
theme(axis.title.y= element_text(size=12, face='bold'),
axis.title.x= element_text(size=12, face='bold'),
axis.text.x = element_text( face='bold',hjust=1, size=12),
axis.text.y = element_text(face='bold', size=12),
plot.title = element_text(size=18, face='bold'),
plot.subtitle = element_text(size=13),
plot.caption = element_text(size=12))+
coord_flip()
df %>%
filter(Position!='GK' & goals>0) %>%
mutate(xGrperminute=xGr/(mins.played.x/90),
Name=fct_reorder(Name, xGrperminute)) %>%
top_n(xGrperminute, n=20) %>%
ggplot(aes(x=Name, y=xGrperminute, fill=Position))+
geom_bar(stat = 'identity')+
geom_text(aes(label=paste('(',xGr,'/',mins.played.x,')')), size=3.8,nudge_y=0.28)+
labs(title='EURO 2020 - Top 20 players with the highest xG per 90 minutes factor',
subtitle='Players with at least 1 goal scored',
caption=podpis)+
xlab("")+
ylab("Expected goals per 90 minutes played factor")+
theme_minimal()+
theme(axis.title.y= element_text(size=12, face='bold'),
axis.title.x= element_text(size=12, face='bold'),
axis.text.x = element_text( face='bold',hjust=1, size=12),
axis.text.y = element_text(face='bold', size=12),
plot.title = element_text(size=18, face='bold'),
plot.subtitle = element_text(size=13),
plot.caption = element_text(size=12))+
coord_flip()
df %>%
filter(is.na(xGr)==F) %>%
group_by(., Team) %>%
summarize(xGr=sum(xGr)) %>%
ungroup() %>%
mutate(Team=fct_reorder(Team, xGr)) %>%
ggplot(aes(x=Team, y=xGr))+
geom_bar(stat='identity')+
geom_text(aes(label=xGr), size=4, nudge_y = 0.5)+
labs(title='Euro 2020 - Expected goals factor by Team',
caption=podpis)+
xlab("")+
ylab("Expected goals factor")+
theme_minimal()+
theme(axis.title.y= element_text(size=12, face='bold'),
axis.title.x= element_text(size=12, face='bold'),
axis.text.x = element_text( face='bold',hjust=1, size=12),
axis.text.y = element_text(face='bold', size=12),
plot.title = element_text(size=18, face='bold'))+
coord_flip()
df %>%
filter(is.na(xGr)==F) %>%
group_by(., Team) %>%
summarize(xGr=sum(xGr), shots=sum(shots)) %>%
ungroup() %>%
mutate(Team=fct_reorder(Team, xGr/shots)) %>%
ggplot(aes(x=Team, y=xGr/shots))+
geom_bar(stat='identity')+
geom_text(aes(label=round(xGr/shots,3)), size=4, nudge_y = 0.004)+
labs(title='Euro 2020 - Expected goals per shot factor by Team',
caption=podpis)+
xlab("")+
ylab("Expected goals per shot factor")+
theme_minimal()+
theme(axis.title.y= element_text(size=12, face='bold'),
axis.title.x= element_text(size=12, face='bold'),
axis.text.x = element_text( face='bold',hjust=1, size=12),
axis.text.y = element_text(face='bold', size=12),
plot.title = element_text(size=18, face='bold'))+
coord_flip()
This ratio indicates how effictively each team was able to exploit the chances that they were created. Surprisingly, Hungary was the most effective team, which scored more then twice as many goals as expected.
df %>%
filter(is.na(xGr)==F) %>%
group_by(., Team) %>%
summarize(xGr=sum(xGr), goals=sum(goals)) %>%
ungroup() %>%
mutate(Team=fct_reorder(Team, goals/xGr)) %>%
ggplot(aes(x=Team, y=goals/xGr))+
geom_bar(stat='identity')+
geom_text(aes(label=round(goals/xGr,3)), size=4, nudge_y = 0.07)+
labs(title='Euro 2020 - goals scored to expected goals ratio',
caption=podpis)+
xlab("")+
ylab("Goals scored to expected goals ratio")+
theme_minimal()+
theme(axis.title.y= element_text(size=12, face='bold'),
axis.title.x= element_text(size=12, face='bold'),
axis.text.x = element_text( face='bold',hjust=1, size=12),
axis.text.y = element_text(face='bold', size=12),
plot.title = element_text(size=18, face='bold'))+
coord_flip()
df %>%
filter(is.na(xGr)==F) %>%
group_by(., Team) %>%
summarize(xGr=sum(xGr), mins.played.x=sum(mins.played.x)) %>%
ungroup() %>%
mutate(Team=fct_reorder(Team, xGr/mins.played.x)) %>%
ggplot(aes(x=Team, y=900*xGr/mins.played.x))+
geom_bar(stat='identity')+
geom_text(aes(label=round(900*xGr/mins.played.x,3)), size=4, nudge_y = 0.08)+
labs(title='Euro 2020 - expected goals per 90 minutes',
caption=podpis)+
xlab("")+
ylab("Expected goals per 90 minutes ratio")+
theme_minimal()+
theme(axis.title.y= element_text(size=12, face='bold'),
axis.title.x= element_text(size=12, face='bold'),
axis.text.x = element_text( face='bold',hjust=1, size=12),
axis.text.y = element_text(face='bold', size=12),
plot.title = element_text(size=18, face='bold'))+
coord_flip()
dff<-df %>%
filter(Position=='FW' & mins.played.x>=90) %>%
filter(is.na(xGr)==F) %>%
select(Name, Position, mins.played.x,shots,shots.on.target,goals,xGr,chances.created,assists,xA,passes,completed,tackles.won,int,rec,avg.distance, avg.progress, shot.ending.carry, goal.ending.carry, end.in.chance, end.in.assist,shot.ending, goal.ending) %>%
mutate(shotspermin=shots/mins.played.x,
shots.on.targetpermin=shots/mins.played.x,
goalspermin=goals/mins.played.x,
xGrpermin=xGr/mins.played.x,
chancescreatedpermin=chances.created/mins.played.x,
assistspermin=assists/mins.played.x,
xApermin=xA/mins.played.x,
passespermin=passes/mins.played.x,
tacklespermin=tackles.won/mins.played.x,
intpermin=int/mins.played.x,
recpermin=rec/mins.played.x)
row.names(dff)<-dff$Name
dff<-dff[,-1]
dff=scale(dff[,2:33])
dff=dist(dff, method = "euclidean")
dff=hclust(dff, method = "ward.D")
dff=as.dendrogram(dff)
dff=dendro_data(dff, type='rectangle')
ggplot(segment(dff))+
geom_segment(aes(x=x, y=y, xend=xend, yend=yend))+
geom_text(data = dff$labels,
aes(x = x, y = y, label = label), size = 4, hjust = 1) +
scale_y_continuous(expand=c(-1,100))+
labs(title='Euro 2020 - Hierarchical cluster analysis',
subtitle="Forwards with at least 90 minutes played",
caption=podpis)+
xlab("")+
ylab("")+
theme_minimal()+
theme(axis.title.y= element_text(size=12, face='bold'),
axis.title.x= element_text(size=12, face='bold'),
axis.text.x = element_text( face='bold',hjust=1, size=12),
axis.text.y = element_text(face='bold', size=12),
plot.title = element_text(size=18),
plot.subtitle = element_text(size=13),
plot.caption = element_text(size=12))+
coord_flip()
```