This is a two-part series analyzing the recently released player data from EAFC 24. The first part will undertake a exploratory data analysis of the female only data. The second part will focus on building a linear regression model to predict player rating.
To perform the EDA we need to install a range of different R packages to help manipulate and visual the data. If you have not already install the required packages, then delete the # to install.
The make the importing the data as seamless as possible, it is recommended to: 1. Create a new directory. 2. Create a “data” folder in the new directory. 3. Save the data in the “date” folder. 4. Follow the steps below.
The data can be found on Kaggle through this link: https://www.kaggle.com/datasets/nyagami/fc-24-players-database-and-stats-from-easports/code
# load the data and name it "eafc", na.strings turns any blank space to a NA value
eafc <- read.csv("Data/female_players.csv", na.strings = "NA")
# use head() to see the first 6 rows of data.
head(eafc)
## X Name Nation Club Position Age Overall Pace
## 1 0 Alexia Putellas Spain FC Barcelona CM 29 91 82
## 2 1 Aitana Bonmatí Spain FC Barcelona CM 25 90 81
## 3 2 Sam Kerr Australia Chelsea ST 30 90 85
## 4 3 Caroline Graham Hansen Norway FC Barcelona RW 28 90 89
## 5 4 Kadidiatou Diani France OL RW 28 89 89
## 6 5 Mapi León Spain FC Barcelona CB 28 89 75
## Shooting Passing Dribbling Defending Physicality Acceleration Sprint
## 1 90 91 92 72 78 81 82
## 2 84 83 91 75 73 82 80
## 3 88 74 90 42 85 86 84
## 4 86 88 93 47 75 90 88
## 5 85 83 88 56 77 88 90
## 6 74 81 68 90 81 73 77
## Positioning Finishing Shot Long Volleys Penalties Vision Crossing Free Curve
## 1 91 91 94 91 90 91 91 88 89 89
## 2 91 91 91 87 62 70 90 67 67 76
## 3 92 92 78 67 92 71 79 70 68 76
## 4 87 86 89 84 83 71 89 90 77 90
## 5 91 90 87 74 74 71 85 84 70 74
## 6 70 65 88 90 49 64 56 86 88 87
## Agility Balance Reactions Ball Composure Interceptions Heading Def Standing
## 1 90 89 92 94 92 78 74 60 81
## 2 93 86 91 91 85 88 54 70 81
## 3 90 82 89 91 91 24 93 44 39
## 4 94 75 83 90 84 35 71 45 50
## 5 89 84 87 90 84 55 81 46 59
## 6 70 86 90 82 91 90 83 91 91
## Sliding Jumping Stamina Strength Aggression Att.work.rate Def.work.rate
## 1 64 84 85 78 70 High Medium
## 2 67 75 80 75 60 High High
## 3 30 95 87 88 70 High High
## 4 46 84 84 74 66 High Medium
## 5 49 86 81 84 52 High High
## 6 90 87 75 83 84 High High
## Preferred.foot Weak.foot Skill.moves
## 1 Left 5 5
## 2 Right 5 4
## 3 Right 4 4
## 4 Right 4 5
## 5 Right 4 4
## 6 Left 3 2
## URL
## 1 https://www.ea.com/games/ea-sports-fc/ratings/player-ratings/alexia-putellas/227203
## 2 https://www.ea.com/games/ea-sports-fc/ratings/player-ratings/aitana-bonmati/241667
## 3 https://www.ea.com/games/ea-sports-fc/ratings/player-ratings/sam-kerr/227125
## 4 https://www.ea.com/games/ea-sports-fc/ratings/player-ratings/caroline-graham-hansen/227102
## 5 https://www.ea.com/games/ea-sports-fc/ratings/player-ratings/kadidiatou-diani/227361
## 6 https://www.ea.com/games/ea-sports-fc/ratings/player-ratings/mapi-leon/236479
## Gender GK
## 1 FALSE NA
## 2 FALSE NA
## 3 FALSE NA
## 4 FALSE NA
## 5 FALSE NA
## 6 FALSE NA
# The function creates a sum for every missing value in a column.
colSums(is.na(eafc))
## X Name Nation Club Position
## 0 0 0 0 0
## Age Overall Pace Shooting Passing
## 0 0 0 0 0
## Dribbling Defending Physicality Acceleration Sprint
## 0 0 0 0 0
## Positioning Finishing Shot Long Volleys
## 0 0 0 0 0
## Penalties Vision Crossing Free Curve
## 0 0 0 0 0
## Agility Balance Reactions Ball Composure
## 0 0 0 0 0
## Interceptions Heading Def Standing Sliding
## 0 0 0 0 0
## Jumping Stamina Strength Aggression Att.work.rate
## 0 0 0 0 0
## Def.work.rate Preferred.foot Weak.foot Skill.moves URL
## 0 0 0 0 0
## Gender GK
## 0 1316
The missing data relates to whether the player is a GK or not. Only GK will have a rating in this column. Otherwise there is no other missing values.
# Remove the missing values from the data set, remove GK
eafc_outfield <- eafc %>%
select(-c("GK","URL")) %>%
filter(Position !="GK")
str(eafc_outfield)
## 'data.frame': 1316 obs. of 45 variables:
## $ X : int 0 1 2 3 4 5 6 7 8 9 ...
## $ Name : chr "Alexia Putellas" "Aitana Bonmatí" "Sam Kerr" "Caroline Graham Hansen" ...
## $ Nation : chr "Spain" "Spain" "Australia" "Norway" ...
## $ Club : chr "FC Barcelona" "FC Barcelona" "Chelsea" "FC Barcelona" ...
## $ Position : chr "CM" "CM" "ST" "RW" ...
## $ Age : int 29 25 30 28 28 28 34 28 32 33 ...
## $ Overall : int 91 90 90 90 89 89 89 89 88 88 ...
## $ Pace : int 82 81 85 89 89 75 83 75 69 53 ...
## $ Shooting : int 90 84 88 86 85 74 89 89 51 67 ...
## $ Passing : int 91 83 74 88 83 81 80 77 67 69 ...
## $ Dribbling : int 92 91 90 93 88 68 89 90 57 69 ...
## $ Defending : int 72 75 42 47 56 90 47 39 89 91 ...
## $ Physicality : int 78 73 85 75 77 81 77 76 80 84 ...
## $ Acceleration : int 81 82 86 90 88 73 82 73 65 53 ...
## $ Sprint : int 82 80 84 88 90 77 84 77 72 53 ...
## $ Positioning : int 91 91 92 87 91 70 90 93 39 70 ...
## $ Finishing : int 91 91 92 86 90 65 94 91 51 65 ...
## $ Shot : int 94 91 78 89 87 88 89 83 85 80 ...
## $ Long : int 91 87 67 84 74 90 82 72 68 84 ...
## $ Volleys : int 90 62 92 83 74 49 83 90 60 55 ...
## $ Penalties : int 91 70 71 71 71 64 88 84 51 67 ...
## $ Vision : int 91 90 79 89 85 56 75 76 51 59 ...
## $ Crossing : int 88 67 70 90 84 86 71 71 59 53 ...
## $ Free : int 89 67 68 77 70 88 73 75 40 58 ...
## $ Curve : int 89 76 76 90 74 87 68 79 69 65 ...
## $ Agility : int 90 93 90 94 89 70 83 74 66 53 ...
## $ Balance : int 89 86 82 75 84 86 67 48 65 34 ...
## $ Reactions : int 92 91 89 83 87 90 82 90 91 87 ...
## $ Ball : int 94 91 91 90 90 82 92 90 79 75 ...
## $ Composure : int 92 85 91 84 84 91 91 86 75 84 ...
## $ Interceptions : int 78 88 24 35 55 90 46 23 89 89 ...
## $ Heading : int 74 54 93 71 81 83 84 92 85 94 ...
## $ Def : int 60 70 44 45 46 91 41 42 92 92 ...
## $ Standing : int 81 81 39 50 59 91 43 33 87 92 ...
## $ Sliding : int 64 67 30 46 49 90 39 30 88 81 ...
## $ Jumping : int 84 75 95 84 86 87 90 89 79 81 ...
## $ Stamina : int 85 80 87 84 81 75 87 75 72 72 ...
## $ Strength : int 78 75 88 74 84 83 81 80 82 91 ...
## $ Aggression : int 70 60 70 66 52 84 53 65 87 84 ...
## $ Att.work.rate : chr "High" "High" "High" "High" ...
## $ Def.work.rate : chr "Medium" "High" "High" "Medium" ...
## $ Preferred.foot: chr "Left" "Right" "Right" "Right" ...
## $ Weak.foot : int 5 5 4 4 4 3 5 4 3 3 ...
## $ Skill.moves : int 5 4 4 5 4 2 4 4 2 2 ...
## $ Gender : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
# use sapply to count the data types in "eafc"
datatype_count <- sapply(eafc_outfield, class)
print(table(datatype_count))
## datatype_count
## character integer logical
## 7 37 1
# Filter the dataframe to only have the character columns, excluding the URL column
cat_counts <- eafc_outfield %>%
select(c("Nation","Club","Position","Att.work.rate","Def.work.rate","Preferred.foot", "Weak.foot", "Skill.moves", "Gender")) %>%
lapply(table)
for (i in 1:length(cat_counts)){
cat("Counts for", names(cat_counts)[i], ":\n")
print(cat_counts[[i]])
cat("\n")
}
## Counts for Nation :
##
## Algeria Andorra Argentina
## 3 1 5
## Australia Austria Belarus
## 13 18 1
## Belgium Bermuda Bosnia and Herzegovina
## 7 1 1
## Brazil Cameroon Canada
## 25 6 26
## Chile China PR Colombia
## 4 1 7
## Costa Rica Côte d'Ivoire Curaçao
## 1 4 1
## Czech Republic Denmark Ecuador
## 13 22 1
## England Finland France
## 94 6 151
## Germany Ghana Greece
## 184 1 1
## Haiti Holland Hungary
## 5 40 3
## Iceland Israel Italy
## 8 1 12
## Jamaica Japan Kenya
## 10 9 1
## Korea Republic Kosovo Mali
## 2 2 1
## Mexico Montenegro Morocco
## 7 1 4
## New Zealand Nigeria North Macedonia
## 5 14 1
## Northern Ireland Norway Panama
## 3 18 2
## Paraguay Poland Portugal
## 1 15 16
## Republic of Ireland Scotland Serbia
## 13 26 5
## Slovakia Slovenia South Africa
## 3 3 3
## Spain St. Kitts and Nevis Sweden
## 182 1 30
## Switzerland Trinidad and Tobago Turkey
## 30 1 1
## United States Venezuela Wales
## 219 6 12
## Zambia
## 2
##
## Counts for Club :
##
## 1. FC Köln 1. FC Nürnberg Ajax Angel City FC
## 19 19 18 22
## Arsenal AS Saint Étienne Aston Villa Athletic Club
## 23 19 17 22
## Atlético de Madrid Bordeaux Brighton Bristol City
## 21 12 16 15
## Chelsea Chicago Red Stars Dijon FCO En Avant Guingamp
## 25 24 12 15
## Everton FC Barcelona FC Bayern München FC Fleury 91
## 16 21 22 16
## FC Rosengård FC Zürich Frankfurt Glasgow City FC
## 16 18 22 19
## Granada CF Havre AC Houston Dash Juventus
## 3 17 24 20
## KC Current Leicester City Levante LP Levante UD
## 24 14 14 20
## Leverkusen Liverpool LOSC Lille Madrid CFF
## 21 20 15 16
## Manchester City Manchester Utd Montpellier MSV Duisburg
## 20 19 16 17
## NC Courage NJ/NY Gotham OL OL Reign
## 22 23 26 22
## Orlando Pride Paris FC Paris SG Portland Thorns
## 22 14 22 23
## Rac. Louisville RB Leipzig Real Betis Real Madrid CF
## 20 21 15 21
## Real Sociedad San Diego Wave SC Freiburg SD Eibar
## 15 21 21 3
## Sevilla FC SGS Essen SL Benfica Slavia Praha
## 15 20 20 15
## Sporting Huelva Spurs Stade de Reims SV Werder Bremen
## 12 20 18 19
## TSG Hoffenheim UDG Tenerife Valencia CF VfL Wolfsburg
## 21 21 17 23
## Villarreal CF Washington Spirit West Ham
## 16 22 17
##
## Counts for Position :
##
## CAM CB CDM CF CM LB LM LW LWB RB RM RW RWB ST
## 90 256 108 3 206 113 53 51 2 117 45 54 4 214
##
## Counts for Att.work.rate :
##
## High Low Medium
## 442 64 810
##
## Counts for Def.work.rate :
##
## High Low Medium
## 217 71 1028
##
## Counts for Preferred.foot :
##
## Left Right
## 208 1108
##
## Counts for Weak.foot :
##
## 1 2 3 4 5
## 2 248 832 201 33
##
## Counts for Skill.moves :
##
## 2 3 4 5
## 440 684 179 13
##
## Counts for Gender :
##
## FALSE
## 1316
eafc_clean <- eafc_outfield %>%
mutate(Att.work.rate_int = case_when(
Att.work.rate == "Low" ~ 1,
Att.work.rate == "Medium" ~ 2,
Att.work.rate == "High" ~ 3,
TRUE ~ NA_integer_),
Def.work.rate_int = case_when(
Att.work.rate == "Low" ~ 1,
Att.work.rate == "Medium" ~ 2,
Att.work.rate == "High" ~ 3,
TRUE ~ NA_integer_
))
count_club <- eafc_outfield %>%
group_by(Club) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
slice(1:20)
count_club %>%
ggplot(aes(x = fct_reorder(Club, -count),y = count)) +
geom_col(aes(fill = count)) +
scale_fill_gradient(low = "darkgreen", high = "darkblue")+
theme(axis.text.x = element_text(angle = 90))+
labs(title="Count of Players Per Club - Top 20", x = "Clubs")
count_nation <- eafc_outfield %>%
group_by(Nation) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
slice(1:20)
count_nation %>%
ggplot(aes(x = fct_reorder(Nation, -count),y = count)) +
geom_col(aes(fill = count)) +
scale_fill_gradient(low = "darkgreen", high = "darkblue")+
theme(axis.text.x = element_text(angle = 90))
labs(title="Count of Players Per Nation - Top 20", x = "Clubs")
## $x
## [1] "Clubs"
##
## $title
## [1] "Count of Players Per Nation - Top 20"
##
## attr(,"class")
## [1] "labels"
eafc_outfield <- eafc_outfield %>%
mutate(
Position_Type = case_when(
Position %in% c("RWB","RB","CB","LB","LWB") ~ "DEF",
Position %in% c("ST","CF")~ "ATT",
TRUE ~ "MID")
)
count_pos <- eafc_outfield %>%
group_by(Position) %>%
summarise(count = n()) %>%
arrange(desc(count))
gridExtra::grid.arrange(
eafc_outfield %>%
filter(Position_Type == "ATT") %>%
ggplot(aes(x = Position, fill = after_stat(count))) +
geom_bar() +
ylim(0,300)+
scale_fill_gradient(low = "lightgreen", high = "blue")+
geom_text(aes(label = after_stat(count)), stat = "count", vjust = -0.5, size = 3) +
theme_bw() +
theme(legend.position = "none"
)+
labs(title="Count of Players Per Position - Attackers"),
eafc_outfield %>%
filter(Position_Type == "MID") %>%
ggplot(aes(x = Position, fill = after_stat(count))) +
geom_bar() +
ylim(0,300)+
scale_fill_gradient(low = "lightgreen", high = "blue")+
geom_text(aes(label = after_stat(count)), stat = "count", vjust = -0.5, size = 3)+
theme_bw() +
theme(legend.position = "none")+
labs(title="Count of Players Per Position - Midfielders")
,
eafc_outfield %>%
filter(Position_Type == "DEF") %>%
ggplot(aes(x = Position, fill = after_stat(count))) +
geom_bar() +
ylim(0,300)+
scale_fill_gradient(low = "lightgreen", high = "blue")+
geom_text(aes(label = after_stat(count)), stat = "count", vjust = -0.5, size = 3)+
theme_bw() +
theme(legend.position = "none")+
labs(title="Count of Players Per Position - Defenders"),
ncol = 1
)
Using the describe() function we can generate the summary statistics, to provide insight into each variable. The skew column can be used to identify variables do not follow a normal distribution by either having a skew value of greater than 0.5 or less than -0.5. The manipulation of the skew variables will occur in Part 2.
# use describe() to examine the summary statistics of "eafc"
eafc_outfield_num <- eafc_outfield %>%
select(-c("X","Name", "Nation","Club","Position","Att.work.rate","Def.work.rate","Preferred.foot", "Weak.foot", "Skill.moves", "Gender","Position_Type"))
eafc_outfield_num %>% describe(na.rm = TRUE)
## vars n mean sd median trimmed mad min max range skew
## Age 1 1316 25.09 4.35 25.0 24.88 4.45 17 40 23 0.46
## Overall 2 1316 74.16 6.10 74.0 74.28 5.93 52 91 39 -0.23
## Pace 3 1316 69.54 10.06 70.0 69.90 8.90 33 94 61 -0.43
## Shooting 4 1316 60.77 13.78 63.0 61.31 14.83 22 90 68 -0.33
## Passing 5 1316 66.62 8.28 67.0 66.80 7.41 40 91 51 -0.21
## Dribbling 6 1316 69.01 10.57 70.0 69.54 10.38 22 93 71 -0.58
## Defending 7 1316 60.46 16.18 65.0 61.45 17.79 20 91 71 -0.48
## Physicality 8 1316 67.29 8.17 67.0 67.49 7.41 36 89 53 -0.28
## Acceleration 9 1316 69.31 10.66 70.0 69.82 10.38 31 95 64 -0.57
## Sprint 10 1316 69.71 10.29 70.0 70.17 8.90 33 94 61 -0.56
## Positioning 11 1316 63.76 16.22 69.0 65.13 14.83 18 93 75 -0.69
## Finishing 12 1316 60.51 16.06 64.0 61.28 17.79 20 94 74 -0.38
## Shot 13 1316 74.61 6.58 75.0 74.81 5.93 50 94 44 -0.36
## Long 14 1316 65.83 9.75 67.0 66.22 10.38 29 93 64 -0.33
## Volleys 15 1316 52.72 15.61 54.5 52.80 18.53 17 92 75 -0.06
## Penalties 16 1316 57.95 11.21 59.0 57.91 11.86 32 91 59 0.04
## Vision 17 1316 63.45 13.25 66.0 64.38 13.34 22 92 70 -0.61
## Crossing 18 1316 60.89 14.14 64.0 61.69 14.83 22 93 71 -0.48
## Free 19 1316 54.75 12.84 56.0 54.86 14.83 20 89 69 -0.07
## Curve 20 1316 59.90 12.98 62.0 60.51 11.86 20 92 72 -0.42
## Agility 21 1316 67.77 11.26 69.0 68.23 10.38 32 94 62 -0.42
## Balance 22 1316 67.45 11.61 68.0 67.81 11.86 30 94 64 -0.35
## Reactions 23 1316 71.97 7.53 72.0 72.20 7.41 45 92 47 -0.36
## Ball 24 1316 74.24 6.94 74.0 74.30 5.93 49 94 45 -0.11
## Composure 25 1316 68.45 8.48 68.0 68.39 8.90 41 93 52 0.05
## Interceptions 26 1316 59.95 18.55 68.0 61.60 14.83 15 90 75 -0.69
## Heading 27 1316 63.75 11.94 65.0 64.16 13.34 24 94 70 -0.29
## Def 28 1316 58.80 19.19 65.0 59.87 20.76 14 92 78 -0.41
## Standing 29 1316 62.28 17.57 69.0 63.93 14.83 12 92 80 -0.75
## Sliding 30 1316 57.25 17.77 62.0 58.30 19.27 10 90 80 -0.46
## Jumping 31 1316 72.23 10.43 73.0 72.56 10.38 27 95 68 -0.36
## Stamina 32 1316 69.19 10.38 69.0 69.57 10.38 33 94 61 -0.46
## Strength 33 1316 67.81 10.66 68.5 68.22 9.64 32 92 60 -0.43
## Aggression 34 1316 62.24 12.25 63.0 62.81 11.86 22 94 72 -0.40
## kurtosis se
## Age -0.11 0.12
## Overall 0.38 0.17
## Pace 0.56 0.28
## Shooting -0.72 0.38
## Passing -0.13 0.23
## Dribbling 0.60 0.29
## Defending -0.95 0.45
## Physicality 0.22 0.23
## Acceleration 0.70 0.29
## Sprint 0.94 0.28
## Positioning -0.33 0.45
## Finishing -0.81 0.44
## Shot 0.44 0.18
## Long -0.09 0.27
## Volleys -0.88 0.43
## Penalties -0.35 0.31
## Vision -0.06 0.37
## Crossing -0.51 0.39
## Free -0.58 0.35
## Curve -0.07 0.36
## Agility 0.32 0.31
## Balance 0.16 0.32
## Reactions 0.52 0.21
## Ball 0.11 0.19
## Composure -0.16 0.23
## Interceptions -0.75 0.51
## Heading -0.40 0.33
## Def -1.12 0.53
## Standing -0.56 0.48
## Sliding -0.96 0.49
## Jumping 0.03 0.29
## Stamina 0.73 0.29
## Strength 0.46 0.29
## Aggression 0.01 0.34
The numerical data types can be divided into three categories. These are physical, technical and tactical.
eafc_phys <- eafc_outfield_num %>%
select(c("Age","Pace","Physicality","Acceleration","Sprint","Agility", "Balance","Reactions", "Stamina","Strength","Aggression"))
eafc_phys %>%
keep(is.numeric) %>%
gather() %>%
ggplot(aes(value))+
facet_wrap(~key, scales = "free") +
geom_boxplot(outlier.colour = "red", outlier.shape = 1)+
labs(title = "Spread of Physical Characteristics")
eafc_phys %>%
keep(is.numeric) %>%
gather() %>%
ggplot(aes(value))+
facet_wrap(~key, scales = "free") +
geom_histogram()+
labs(title = "Distribution Physical Characteristics")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
eafc_tech <- eafc_outfield_num %>%
select(c("Shooting","Passing","Dribbling","Defending", "Finishing","Shot","Long","Volleys","Penalties","Crossing","Free","Curve","Ball","Interceptions","Heading","Def","Standing","Sliding","Jumping"))
eafc_tech %>%
keep(is.numeric) %>%
gather() %>%
ggplot(aes(value))+
facet_wrap(~key, scales = "free") +
geom_boxplot(outlier.colour = "red", outlier.shape = 1)+
labs(title = "Spread of Technical Characteristics")
eafc_tech %>%
keep(is.numeric) %>%
gather() %>%
ggplot(aes(value))+
facet_wrap(~key, scales = "free") +
geom_histogram()+
labs(title = "Distribution Technical Characteristics")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
eafc_tact <- eafc_outfield_num %>%
select(c("Positioning","Vision","Composure"))
eafc_tact %>%
keep(is.numeric) %>%
gather() %>%
ggplot(aes(value))+
facet_wrap(~key, scales = "free") +
geom_boxplot()+
labs(title = "Spread of Tactical Characteristics")
eafc_tact %>%
keep(is.numeric) %>%
gather() %>%
ggplot(aes(value))+
facet_wrap(~key, scales = "free") +
geom_histogram()+
labs(title = "Distribution Tactical Characteristics")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Strength, Sprint and Stamina for position types.
SSS_mean <- eafc_outfield %>%
select(c("Strength","Sprint","Stamina")) %>%
group_by(eafc_outfield$Position_Type) %>%
summarise(
mean_str = mean(Strength),
mean_spr = mean(Sprint),
mean_sta = mean(Stamina)
)
print(SSS_mean)
## # A tibble: 3 × 4
## `eafc_outfield$Position_Type` mean_str mean_spr mean_sta
## <chr> <dbl> <dbl> <dbl>
## 1 ATT 71.0 74.1 66.0
## 2 DEF 69.3 67.3 68.3
## 3 MID 65.5 70.1 71.0
gridExtra::grid.arrange(
eafc_outfield %>%
filter(Position_Type == "ATT") %>%
ggplot(aes(x = Strength, y = Overall,fill = Overall))+
geom_col() +
geom_vline(aes(xintercept = 70.95853), linetype = "dotted", color = "black", linewidth=1)+
scale_fill_viridis_c()+
theme_bw()+
labs(title="Defenders Players Overall vs Strength")
,
eafc_outfield %>%
filter(Position_Type == "MID") %>%
ggplot(aes(x = Strength, y = Overall,fill = Overall))+
geom_col() +
geom_vline(aes(xintercept = 70.95853), linetype = "dotted", color = "black", linewidth=1)+ scale_fill_viridis_c()+
theme_bw()+
labs(title="Defenders Players Overall vs Strength"),
eafc_outfield %>%
filter(Position_Type == "DEF") %>%
ggplot(aes(x = Strength, y = Overall,fill = Overall))+
geom_col() +
geom_vline(aes(xintercept = 70.95853), linetype = "dotted", color = "black", linewidth=1)+
scale_fill_viridis_c()+
theme_bw()+
labs(title="Defenders Players Overall vs Strength"),
ncol = 1
)
gridExtra::grid.arrange(
eafc_outfield %>%
filter(Position_Type == "ATT") %>%
ggplot(aes(x = Sprint, y = Overall,fill = Overall))+
geom_col() +
geom_vline(aes(xintercept = 70.95853), linetype = "dotted", color = "black", linewidth=1)+
scale_fill_viridis_c()+
theme_bw()+
labs(title="Attacking Players Overall vs Sprint Ability")
,
eafc_outfield %>%
filter(Position_Type == "MID") %>%
ggplot(aes(x = Sprint, y = Overall,fill = Overall))+
geom_col() +
geom_vline(aes(xintercept = 70.95853), linetype = "dotted", color = "black", linewidth=1)+ scale_fill_viridis_c()+
theme_bw()+
labs(title="Midfielders Players Overall vs Sprint Ability"),
eafc_outfield %>%
filter(Position_Type == "DEF") %>%
ggplot(aes(x = Sprint, y = Overall,fill = Overall))+
geom_col() +
geom_vline(aes(xintercept = 70.95853), linetype = "dotted", color = "black", linewidth=1)+
scale_fill_viridis_c()+
theme_bw()+
labs(title="Defenders Players Overall vs Sprint Ability"),
ncol = 1
)
gridExtra::grid.arrange(
eafc_outfield %>%
filter(Position_Type == "ATT") %>%
ggplot(aes(x = Stamina, y = Overall,fill = Overall))+
geom_col() +
geom_vline(aes(xintercept = 70.95853), linetype = "dotted", color = "black", linewidth=1)+
scale_fill_viridis_c()+
theme_bw()+
labs(title="Attacking Players Overall vs Stamina")
,
eafc_outfield %>%
filter(Position_Type == "MID") %>%
ggplot(aes(x = Stamina, y = Overall,fill = Overall))+
geom_col() +
geom_vline(aes(xintercept = 70.95853), linetype = "dotted", color = "black", linewidth=1)+ scale_fill_viridis_c()+
theme_bw()+
labs(title="Midfielders Players Overall vs Stamina"),
eafc_outfield %>%
filter(Position_Type == "DEF") %>%
ggplot(aes(x = Stamina, y = Overall,fill = Overall))+
geom_col() +
geom_vline(aes(xintercept = 70.95853), linetype = "dotted", color = "black", linewidth=1)+
scale_fill_viridis_c()+
theme_bw()+
labs(title="Defenders Players Overall vs Stamina"),
ncol = 1
)
# create the new metric called SSS
SSS_club <- eafc_outfield %>%
group_by(Club) %>%
summarise(
SSS = mean(Strength) + mean(Sprint) + mean(Stamina)
)
SSS_player <- eafc_outfield %>%
group_by(Name) %>%
summarise(
SSS = round(mean(Strength) + mean(Sprint) + mean(Stamina),2)
)
SSS_club %>%
arrange(desc(SSS)) %>%
slice(1:15) %>%
ggplot(aes(x = fct_reorder(Club,SSS), y = SSS, fill = SSS))+
geom_col()+
theme(axis.text.x = element_text(angle = 90))+
geom_text(aes(label = round(SSS,2)), vjust = -0.5, size = 2)+
scale_fill_viridis_c(option="viridis",direction=-1)+
labs(title="Top 10 Clubs with the highest SSS", x = "Club")
SSS_player %>%
arrange(desc(SSS)) %>%
slice(1:15) %>%
ggplot(aes(x = fct_reorder(Name,SSS), y = SSS, fill = SSS))+
geom_col()+
theme(axis.text.x = element_text(angle = 90))+
geom_text(aes(label = round(SSS,2)), vjust = -0.5, size = 2)+
scale_fill_viridis_c(option="viridis",direction=-1)+
labs(title="Top 10 Players with the highest SSS", x = "Player")
SSS_player <- eafc_outfield %>%
group_by(Name) %>%
summarise(
SSS = round(mean(Strength) + mean(Sprint) + mean(Stamina),2)
)
SSS_Overall <- eafc_outfield %>%
mutate(
SSS = round((Strength) + (Sprint) + (Stamina),2)
) %>%
ggplot(aes(x=SSS, y= Overall, color = Overall))+
geom_point()+
scale_color_viridis_c(option="turbo",direction=-1)+
theme_bw()+
stat_smooth(method="lm")+
labs(title="Overall Rating vs SSS score")
ggplotly(SSS_Overall)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
set_piece <- eafc_outfield %>%
arrange(desc(Free)) %>%
slice(1:15) %>%
ggplot(aes(x = Free, y = Penalties, color = Overall, label = Name))+
geom_point(aes(size = 5))+
scale_color_viridis_c(option="viridis",direction=-1)+
labs(title = "Set Piece Specialists",x="Free Kick")
ggplotly(set_piece)
scoring <- eafc_outfield %>%
arrange(desc(Shooting)) %>%
slice(1:15) %>%
ggplot(aes(x = Finishing, y = Shooting, color = Overall, label = Name))+
geom_point(aes(size = 5))+
scale_color_viridis_c(option="viridis",direction=-1)+
labs(title = "Finishers",x="Free Kick")
ggplotly(scoring)
eafc_outfield %>%
arrange(desc(Overall)) %>%
slice(1:10) %>%
ggplot(aes(x = fct_reorder(Name,Overall), y = Overall, fill = Overall))+
geom_col()+
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, size = 8))+
geom_text(aes(label = round(Overall,2)), vjust = -0.5, size = 3)+
scale_fill_viridis_c(option="viridis",direction=-1)+
labs(title="Top 10 Players - Overall Rating", x= "Player")
eafc_outfield %>%
filter(Position_Type =="ATT") %>%
arrange(desc(Overall)) %>%
slice(1:5) %>%
ggplot(aes(x = fct_reorder(Name,Overall), y = Overall, fill = Overall))+
geom_col()+
theme(axis.text.x = element_text(angle = 45, vjust = 0.5))+
geom_text(aes(label = round(Overall,2)), vjust = -0.5, size = 3)+
scale_fill_viridis_c(option="viridis",direction=-1)+
labs(title="Top 5 Attackers - Overall Rating", x= "Player")
eafc_outfield %>%
filter(Position_Type =="MID") %>%
arrange(desc(Overall)) %>%
slice(1:5) %>%
ggplot(aes(x = fct_reorder(Name,Overall), y = Overall, fill = Overall))+
geom_col()+
theme(axis.text.x = element_text(angle = 45, vjust = 0.5))+
geom_text(aes(label = round(Overall,2)), vjust = -0.5, size = 3)+
scale_fill_viridis_c(option="viridis",direction=-1)+
labs(title="Top 5 Midfielders - Overall Rating", x= "Player")
eafc_outfield %>%
filter(Position_Type =="DEF") %>%
arrange(desc(Overall)) %>%
slice(1:5) %>%
ggplot(aes(x = fct_reorder(Name,Overall), y = Overall, fill = Overall))+
geom_col()+
theme(axis.text.x = element_text(angle = 45, vjust = 0.5))+
geom_text(aes(label = round(Overall,2)), vjust = -0.5, size = 3)+
scale_fill_viridis_c(option="viridis",direction=-1)+
labs(title="Top 5 Defenders - Overall Rating", x= "Player")
eafc_outfield %>%
group_by(Club) %>%
summarise(Overall = mean(Overall)) %>%
arrange(desc(Overall)) %>%
slice(1:10) %>%
ggplot(aes(x = fct_reorder(Club,Overall), y = Overall, fill = Overall))+
geom_col()+
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, size = 8))+
geom_text(aes(label = round(Overall,2)), vjust = -0.5, size = 3)+
scale_fill_viridis_c(option="viridis",direction=-1)+
labs(title="Top 10 Clubs - Average Overall Rating", x= "Club")
phys_corr <- eafc_outfield_num %>%
select(c("Overall","Age","Pace","Physicality","Acceleration","Sprint","Agility", "Balance","Reactions", "Stamina","Strength","Aggression"))
ggcorr(phys_corr, nbreaks = 4, palette = "RdGy", hjust = 0.75,label = TRUE, label_size = 3, label_color = "white", size = 2)
tech_corr <- eafc_outfield_num %>%
select(c("Overall","Shooting","Passing","Dribbling","Defending", "Finishing","Shot","Long","Volleys","Penalties","Crossing","Free","Curve","Ball","Interceptions","Heading","Def","Standing","Sliding","Jumping"))
ggcorr(tech_corr, nbreaks = 4, palette = "RdGy", hjust = 0.75,label = TRUE, label_size = 3, label_color = "white", size = 2)
tact_corr <- eafc_outfield_num %>%
select(c("Overall","Positioning","Vision","Composure"))
ggcorr(tact_corr, nbreaks = 4, palette = "RdGy", hjust = 0.75,label = TRUE, label_size = 3, label_color = "white", size = 2)