EAFC 24 - Part 1: Exploratory Data Analysis

This is a two-part series analyzing the recently released player data from EAFC 24. The first part will undertake a exploratory data analysis of the female only data. The second part will focus on building a linear regression model to predict player rating.

Installing and loading the necessary libraries

To perform the EDA we need to install a range of different R packages to help manipulate and visual the data. If you have not already install the required packages, then delete the # to install.

Load the data.

The make the importing the data as seamless as possible, it is recommended to: 1. Create a new directory. 2. Create a “data” folder in the new directory. 3. Save the data in the “date” folder. 4. Follow the steps below.

The data can be found on Kaggle through this link: https://www.kaggle.com/datasets/nyagami/fc-24-players-database-and-stats-from-easports/code

# load the data and name it "eafc", na.strings turns any blank space to a NA value 
eafc <- read.csv("Data/female_players.csv", na.strings = "NA")

Generate data set summary

# use head() to see the first 6 rows of data.
head(eafc)
##   X                   Name    Nation         Club Position Age Overall Pace
## 1 0        Alexia Putellas     Spain FC Barcelona       CM  29      91   82
## 2 1         Aitana Bonmatí     Spain FC Barcelona       CM  25      90   81
## 3 2               Sam Kerr Australia      Chelsea       ST  30      90   85
## 4 3 Caroline Graham Hansen    Norway FC Barcelona       RW  28      90   89
## 5 4       Kadidiatou Diani    France           OL       RW  28      89   89
## 6 5              Mapi León     Spain FC Barcelona       CB  28      89   75
##   Shooting Passing Dribbling Defending Physicality Acceleration Sprint
## 1       90      91        92        72          78           81     82
## 2       84      83        91        75          73           82     80
## 3       88      74        90        42          85           86     84
## 4       86      88        93        47          75           90     88
## 5       85      83        88        56          77           88     90
## 6       74      81        68        90          81           73     77
##   Positioning Finishing Shot Long Volleys Penalties Vision Crossing Free Curve
## 1          91        91   94   91      90        91     91       88   89    89
## 2          91        91   91   87      62        70     90       67   67    76
## 3          92        92   78   67      92        71     79       70   68    76
## 4          87        86   89   84      83        71     89       90   77    90
## 5          91        90   87   74      74        71     85       84   70    74
## 6          70        65   88   90      49        64     56       86   88    87
##   Agility Balance Reactions Ball Composure Interceptions Heading Def Standing
## 1      90      89        92   94        92            78      74  60       81
## 2      93      86        91   91        85            88      54  70       81
## 3      90      82        89   91        91            24      93  44       39
## 4      94      75        83   90        84            35      71  45       50
## 5      89      84        87   90        84            55      81  46       59
## 6      70      86        90   82        91            90      83  91       91
##   Sliding Jumping Stamina Strength Aggression Att.work.rate Def.work.rate
## 1      64      84      85       78         70          High        Medium
## 2      67      75      80       75         60          High          High
## 3      30      95      87       88         70          High          High
## 4      46      84      84       74         66          High        Medium
## 5      49      86      81       84         52          High          High
## 6      90      87      75       83         84          High          High
##   Preferred.foot Weak.foot Skill.moves
## 1           Left         5           5
## 2          Right         5           4
## 3          Right         4           4
## 4          Right         4           5
## 5          Right         4           4
## 6           Left         3           2
##                                                                                          URL
## 1        https://www.ea.com/games/ea-sports-fc/ratings/player-ratings/alexia-putellas/227203
## 2         https://www.ea.com/games/ea-sports-fc/ratings/player-ratings/aitana-bonmati/241667
## 3               https://www.ea.com/games/ea-sports-fc/ratings/player-ratings/sam-kerr/227125
## 4 https://www.ea.com/games/ea-sports-fc/ratings/player-ratings/caroline-graham-hansen/227102
## 5       https://www.ea.com/games/ea-sports-fc/ratings/player-ratings/kadidiatou-diani/227361
## 6              https://www.ea.com/games/ea-sports-fc/ratings/player-ratings/mapi-leon/236479
##   Gender GK
## 1  FALSE NA
## 2  FALSE NA
## 3  FALSE NA
## 4  FALSE NA
## 5  FALSE NA
## 6  FALSE NA

Examine the data for missing values

# The function creates a sum for every missing value in a column.
colSums(is.na(eafc))
##              X           Name         Nation           Club       Position 
##              0              0              0              0              0 
##            Age        Overall           Pace       Shooting        Passing 
##              0              0              0              0              0 
##      Dribbling      Defending    Physicality   Acceleration         Sprint 
##              0              0              0              0              0 
##    Positioning      Finishing           Shot           Long        Volleys 
##              0              0              0              0              0 
##      Penalties         Vision       Crossing           Free          Curve 
##              0              0              0              0              0 
##        Agility        Balance      Reactions           Ball      Composure 
##              0              0              0              0              0 
##  Interceptions        Heading            Def       Standing        Sliding 
##              0              0              0              0              0 
##        Jumping        Stamina       Strength     Aggression  Att.work.rate 
##              0              0              0              0              0 
##  Def.work.rate Preferred.foot      Weak.foot    Skill.moves            URL 
##              0              0              0              0              0 
##         Gender             GK 
##              0           1316

The missing data relates to whether the player is a GK or not. Only GK will have a rating in this column. Otherwise there is no other missing values.

# Remove the missing values from the data set, remove GK
eafc_outfield <- eafc %>% 
  select(-c("GK","URL")) %>% 
  filter(Position !="GK")

Examine the data types

str(eafc_outfield)
## 'data.frame':    1316 obs. of  45 variables:
##  $ X             : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ Name          : chr  "Alexia Putellas" "Aitana Bonmatí" "Sam Kerr" "Caroline Graham Hansen" ...
##  $ Nation        : chr  "Spain" "Spain" "Australia" "Norway" ...
##  $ Club          : chr  "FC Barcelona" "FC Barcelona" "Chelsea" "FC Barcelona" ...
##  $ Position      : chr  "CM" "CM" "ST" "RW" ...
##  $ Age           : int  29 25 30 28 28 28 34 28 32 33 ...
##  $ Overall       : int  91 90 90 90 89 89 89 89 88 88 ...
##  $ Pace          : int  82 81 85 89 89 75 83 75 69 53 ...
##  $ Shooting      : int  90 84 88 86 85 74 89 89 51 67 ...
##  $ Passing       : int  91 83 74 88 83 81 80 77 67 69 ...
##  $ Dribbling     : int  92 91 90 93 88 68 89 90 57 69 ...
##  $ Defending     : int  72 75 42 47 56 90 47 39 89 91 ...
##  $ Physicality   : int  78 73 85 75 77 81 77 76 80 84 ...
##  $ Acceleration  : int  81 82 86 90 88 73 82 73 65 53 ...
##  $ Sprint        : int  82 80 84 88 90 77 84 77 72 53 ...
##  $ Positioning   : int  91 91 92 87 91 70 90 93 39 70 ...
##  $ Finishing     : int  91 91 92 86 90 65 94 91 51 65 ...
##  $ Shot          : int  94 91 78 89 87 88 89 83 85 80 ...
##  $ Long          : int  91 87 67 84 74 90 82 72 68 84 ...
##  $ Volleys       : int  90 62 92 83 74 49 83 90 60 55 ...
##  $ Penalties     : int  91 70 71 71 71 64 88 84 51 67 ...
##  $ Vision        : int  91 90 79 89 85 56 75 76 51 59 ...
##  $ Crossing      : int  88 67 70 90 84 86 71 71 59 53 ...
##  $ Free          : int  89 67 68 77 70 88 73 75 40 58 ...
##  $ Curve         : int  89 76 76 90 74 87 68 79 69 65 ...
##  $ Agility       : int  90 93 90 94 89 70 83 74 66 53 ...
##  $ Balance       : int  89 86 82 75 84 86 67 48 65 34 ...
##  $ Reactions     : int  92 91 89 83 87 90 82 90 91 87 ...
##  $ Ball          : int  94 91 91 90 90 82 92 90 79 75 ...
##  $ Composure     : int  92 85 91 84 84 91 91 86 75 84 ...
##  $ Interceptions : int  78 88 24 35 55 90 46 23 89 89 ...
##  $ Heading       : int  74 54 93 71 81 83 84 92 85 94 ...
##  $ Def           : int  60 70 44 45 46 91 41 42 92 92 ...
##  $ Standing      : int  81 81 39 50 59 91 43 33 87 92 ...
##  $ Sliding       : int  64 67 30 46 49 90 39 30 88 81 ...
##  $ Jumping       : int  84 75 95 84 86 87 90 89 79 81 ...
##  $ Stamina       : int  85 80 87 84 81 75 87 75 72 72 ...
##  $ Strength      : int  78 75 88 74 84 83 81 80 82 91 ...
##  $ Aggression    : int  70 60 70 66 52 84 53 65 87 84 ...
##  $ Att.work.rate : chr  "High" "High" "High" "High" ...
##  $ Def.work.rate : chr  "Medium" "High" "High" "Medium" ...
##  $ Preferred.foot: chr  "Left" "Right" "Right" "Right" ...
##  $ Weak.foot     : int  5 5 4 4 4 3 5 4 3 3 ...
##  $ Skill.moves   : int  5 4 4 5 4 2 4 4 2 2 ...
##  $ Gender        : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
# use sapply to count the data types in "eafc"
datatype_count <- sapply(eafc_outfield, class)
print(table(datatype_count))
## datatype_count
## character   integer   logical 
##         7        37         1

Categorical variable frequency

# Filter the dataframe to only have the character columns, excluding the URL column
cat_counts <- eafc_outfield %>% 
  select(c("Nation","Club","Position","Att.work.rate","Def.work.rate","Preferred.foot", "Weak.foot", "Skill.moves", "Gender")) %>% 
  lapply(table)

for (i in 1:length(cat_counts)){
  cat("Counts for", names(cat_counts)[i], ":\n")
  print(cat_counts[[i]])
  cat("\n")
}
## Counts for Nation :
## 
##                Algeria                Andorra              Argentina 
##                      3                      1                      5 
##              Australia                Austria                Belarus 
##                     13                     18                      1 
##                Belgium                Bermuda Bosnia and Herzegovina 
##                      7                      1                      1 
##                 Brazil               Cameroon                 Canada 
##                     25                      6                     26 
##                  Chile               China PR               Colombia 
##                      4                      1                      7 
##             Costa Rica          Côte d'Ivoire                Curaçao 
##                      1                      4                      1 
##         Czech Republic                Denmark                Ecuador 
##                     13                     22                      1 
##                England                Finland                 France 
##                     94                      6                    151 
##                Germany                  Ghana                 Greece 
##                    184                      1                      1 
##                  Haiti                Holland                Hungary 
##                      5                     40                      3 
##                Iceland                 Israel                  Italy 
##                      8                      1                     12 
##                Jamaica                  Japan                  Kenya 
##                     10                      9                      1 
##         Korea Republic                 Kosovo                   Mali 
##                      2                      2                      1 
##                 Mexico             Montenegro                Morocco 
##                      7                      1                      4 
##            New Zealand                Nigeria        North Macedonia 
##                      5                     14                      1 
##       Northern Ireland                 Norway                 Panama 
##                      3                     18                      2 
##               Paraguay                 Poland               Portugal 
##                      1                     15                     16 
##    Republic of Ireland               Scotland                 Serbia 
##                     13                     26                      5 
##               Slovakia               Slovenia           South Africa 
##                      3                      3                      3 
##                  Spain    St. Kitts and Nevis                 Sweden 
##                    182                      1                     30 
##            Switzerland    Trinidad and Tobago                 Turkey 
##                     30                      1                      1 
##          United States              Venezuela                  Wales 
##                    219                      6                     12 
##                 Zambia 
##                      2 
## 
## Counts for Club :
## 
##         1. FC Köln     1. FC Nürnberg               Ajax      Angel City FC 
##                 19                 19                 18                 22 
##            Arsenal   AS Saint Étienne        Aston Villa      Athletic Club 
##                 23                 19                 17                 22 
## Atlético de Madrid           Bordeaux           Brighton       Bristol City 
##                 21                 12                 16                 15 
##            Chelsea  Chicago Red Stars          Dijon FCO  En Avant Guingamp 
##                 25                 24                 12                 15 
##            Everton       FC Barcelona  FC Bayern München       FC Fleury 91 
##                 16                 21                 22                 16 
##       FC Rosengård          FC Zürich          Frankfurt    Glasgow City FC 
##                 16                 18                 22                 19 
##         Granada CF           Havre AC       Houston Dash           Juventus 
##                  3                 17                 24                 20 
##         KC Current     Leicester City         Levante LP         Levante UD 
##                 24                 14                 14                 20 
##         Leverkusen          Liverpool         LOSC Lille         Madrid CFF 
##                 21                 20                 15                 16 
##    Manchester City     Manchester Utd        Montpellier       MSV Duisburg 
##                 20                 19                 16                 17 
##         NC Courage       NJ/NY Gotham                 OL           OL Reign 
##                 22                 23                 26                 22 
##      Orlando Pride           Paris FC           Paris SG    Portland Thorns 
##                 22                 14                 22                 23 
##    Rac. Louisville         RB Leipzig         Real Betis     Real Madrid CF 
##                 20                 21                 15                 21 
##      Real Sociedad     San Diego Wave        SC Freiburg           SD Eibar 
##                 15                 21                 21                  3 
##         Sevilla FC          SGS Essen         SL Benfica       Slavia Praha 
##                 15                 20                 20                 15 
##    Sporting Huelva              Spurs     Stade de Reims   SV Werder Bremen 
##                 12                 20                 18                 19 
##     TSG Hoffenheim       UDG Tenerife        Valencia CF      VfL Wolfsburg 
##                 21                 21                 17                 23 
##      Villarreal CF  Washington Spirit           West Ham 
##                 16                 22                 17 
## 
## Counts for Position :
## 
## CAM  CB CDM  CF  CM  LB  LM  LW LWB  RB  RM  RW RWB  ST 
##  90 256 108   3 206 113  53  51   2 117  45  54   4 214 
## 
## Counts for Att.work.rate :
## 
##   High    Low Medium 
##    442     64    810 
## 
## Counts for Def.work.rate :
## 
##   High    Low Medium 
##    217     71   1028 
## 
## Counts for Preferred.foot :
## 
##  Left Right 
##   208  1108 
## 
## Counts for Weak.foot :
## 
##   1   2   3   4   5 
##   2 248 832 201  33 
## 
## Counts for Skill.moves :
## 
##   2   3   4   5 
## 440 684 179  13 
## 
## Counts for Gender :
## 
## FALSE 
##  1316

Integer-Encoding

eafc_clean <- eafc_outfield %>%
  mutate(Att.work.rate_int = case_when(
    Att.work.rate == "Low" ~ 1,
    Att.work.rate == "Medium" ~ 2,
    Att.work.rate == "High" ~ 3,
    TRUE ~ NA_integer_),
    Def.work.rate_int = case_when(
    Att.work.rate == "Low" ~ 1,
    Att.work.rate == "Medium" ~ 2,
    Att.work.rate == "High" ~ 3,
    TRUE ~ NA_integer_
  ))

Top 20 Clubs

count_club <- eafc_outfield %>%
  group_by(Club) %>% 
  summarise(count = n()) %>% 
  arrange(desc(count)) %>% 
  slice(1:20)
  
count_club %>% 
  ggplot(aes(x = fct_reorder(Club, -count),y = count)) +
  geom_col(aes(fill = count)) +
  scale_fill_gradient(low = "darkgreen", high = "darkblue")+
  theme(axis.text.x = element_text(angle = 90))+
  labs(title="Count of Players Per Club - Top 20", x = "Clubs")

Top 20 Nations

count_nation <- eafc_outfield %>%
  group_by(Nation) %>% 
  summarise(count = n()) %>% 
  arrange(desc(count)) %>% 
  slice(1:20)
  
count_nation %>% 
  ggplot(aes(x = fct_reorder(Nation, -count),y = count)) +
  geom_col(aes(fill = count)) +
  scale_fill_gradient(low = "darkgreen", high = "darkblue")+
  theme(axis.text.x = element_text(angle = 90))

  labs(title="Count of Players Per Nation - Top 20", x = "Clubs")
## $x
## [1] "Clubs"
## 
## $title
## [1] "Count of Players Per Nation - Top 20"
## 
## attr(,"class")
## [1] "labels"

Breakdown of Position

eafc_outfield <- eafc_outfield %>% 
  mutate(
    Position_Type = case_when(
      Position %in% c("RWB","RB","CB","LB","LWB") ~ "DEF",
      Position %in% c("ST","CF")~ "ATT",
      TRUE ~ "MID")
    )

count_pos <- eafc_outfield %>%
  group_by(Position) %>% 
  summarise(count = n()) %>% 
  arrange(desc(count))

gridExtra::grid.arrange(
eafc_outfield %>% 
  filter(Position_Type == "ATT") %>%
  ggplot(aes(x = Position, fill = after_stat(count))) +
  geom_bar() +
  ylim(0,300)+
  scale_fill_gradient(low = "lightgreen", high = "blue")+
  geom_text(aes(label = after_stat(count)), stat = "count", vjust = -0.5, size = 3) +
  theme_bw() +
  theme(legend.position = "none"
  )+
  labs(title="Count of Players Per Position - Attackers"),
eafc_outfield %>% 
  filter(Position_Type == "MID") %>%
  ggplot(aes(x = Position, fill = after_stat(count))) +
  geom_bar() +
  ylim(0,300)+
  scale_fill_gradient(low = "lightgreen", high = "blue")+
  geom_text(aes(label = after_stat(count)), stat = "count", vjust = -0.5, size = 3)+
  theme_bw() +
  theme(legend.position = "none")+
  labs(title="Count of Players Per Position - Midfielders")
  ,
eafc_outfield %>% 
  filter(Position_Type == "DEF") %>%
  ggplot(aes(x = Position, fill = after_stat(count))) +
  geom_bar() +
  ylim(0,300)+
  scale_fill_gradient(low = "lightgreen", high = "blue")+
  geom_text(aes(label = after_stat(count)), stat = "count", vjust = -0.5, size = 3)+
  theme_bw() +
  theme(legend.position = "none")+
  
  labs(title="Count of Players Per Position - Defenders"),
ncol = 1
)

Numerical Data Distribution

Using the describe() function we can generate the summary statistics, to provide insight into each variable. The skew column can be used to identify variables do not follow a normal distribution by either having a skew value of greater than 0.5 or less than -0.5. The manipulation of the skew variables will occur in Part 2.

# use describe() to examine the summary statistics of "eafc"
eafc_outfield_num <- eafc_outfield %>% 
   select(-c("X","Name", "Nation","Club","Position","Att.work.rate","Def.work.rate","Preferred.foot", "Weak.foot", "Skill.moves", "Gender","Position_Type"))

eafc_outfield_num %>%  describe(na.rm = TRUE)
##               vars    n  mean    sd median trimmed   mad min max range  skew
## Age              1 1316 25.09  4.35   25.0   24.88  4.45  17  40    23  0.46
## Overall          2 1316 74.16  6.10   74.0   74.28  5.93  52  91    39 -0.23
## Pace             3 1316 69.54 10.06   70.0   69.90  8.90  33  94    61 -0.43
## Shooting         4 1316 60.77 13.78   63.0   61.31 14.83  22  90    68 -0.33
## Passing          5 1316 66.62  8.28   67.0   66.80  7.41  40  91    51 -0.21
## Dribbling        6 1316 69.01 10.57   70.0   69.54 10.38  22  93    71 -0.58
## Defending        7 1316 60.46 16.18   65.0   61.45 17.79  20  91    71 -0.48
## Physicality      8 1316 67.29  8.17   67.0   67.49  7.41  36  89    53 -0.28
## Acceleration     9 1316 69.31 10.66   70.0   69.82 10.38  31  95    64 -0.57
## Sprint          10 1316 69.71 10.29   70.0   70.17  8.90  33  94    61 -0.56
## Positioning     11 1316 63.76 16.22   69.0   65.13 14.83  18  93    75 -0.69
## Finishing       12 1316 60.51 16.06   64.0   61.28 17.79  20  94    74 -0.38
## Shot            13 1316 74.61  6.58   75.0   74.81  5.93  50  94    44 -0.36
## Long            14 1316 65.83  9.75   67.0   66.22 10.38  29  93    64 -0.33
## Volleys         15 1316 52.72 15.61   54.5   52.80 18.53  17  92    75 -0.06
## Penalties       16 1316 57.95 11.21   59.0   57.91 11.86  32  91    59  0.04
## Vision          17 1316 63.45 13.25   66.0   64.38 13.34  22  92    70 -0.61
## Crossing        18 1316 60.89 14.14   64.0   61.69 14.83  22  93    71 -0.48
## Free            19 1316 54.75 12.84   56.0   54.86 14.83  20  89    69 -0.07
## Curve           20 1316 59.90 12.98   62.0   60.51 11.86  20  92    72 -0.42
## Agility         21 1316 67.77 11.26   69.0   68.23 10.38  32  94    62 -0.42
## Balance         22 1316 67.45 11.61   68.0   67.81 11.86  30  94    64 -0.35
## Reactions       23 1316 71.97  7.53   72.0   72.20  7.41  45  92    47 -0.36
## Ball            24 1316 74.24  6.94   74.0   74.30  5.93  49  94    45 -0.11
## Composure       25 1316 68.45  8.48   68.0   68.39  8.90  41  93    52  0.05
## Interceptions   26 1316 59.95 18.55   68.0   61.60 14.83  15  90    75 -0.69
## Heading         27 1316 63.75 11.94   65.0   64.16 13.34  24  94    70 -0.29
## Def             28 1316 58.80 19.19   65.0   59.87 20.76  14  92    78 -0.41
## Standing        29 1316 62.28 17.57   69.0   63.93 14.83  12  92    80 -0.75
## Sliding         30 1316 57.25 17.77   62.0   58.30 19.27  10  90    80 -0.46
## Jumping         31 1316 72.23 10.43   73.0   72.56 10.38  27  95    68 -0.36
## Stamina         32 1316 69.19 10.38   69.0   69.57 10.38  33  94    61 -0.46
## Strength        33 1316 67.81 10.66   68.5   68.22  9.64  32  92    60 -0.43
## Aggression      34 1316 62.24 12.25   63.0   62.81 11.86  22  94    72 -0.40
##               kurtosis   se
## Age              -0.11 0.12
## Overall           0.38 0.17
## Pace              0.56 0.28
## Shooting         -0.72 0.38
## Passing          -0.13 0.23
## Dribbling         0.60 0.29
## Defending        -0.95 0.45
## Physicality       0.22 0.23
## Acceleration      0.70 0.29
## Sprint            0.94 0.28
## Positioning      -0.33 0.45
## Finishing        -0.81 0.44
## Shot              0.44 0.18
## Long             -0.09 0.27
## Volleys          -0.88 0.43
## Penalties        -0.35 0.31
## Vision           -0.06 0.37
## Crossing         -0.51 0.39
## Free             -0.58 0.35
## Curve            -0.07 0.36
## Agility           0.32 0.31
## Balance           0.16 0.32
## Reactions         0.52 0.21
## Ball              0.11 0.19
## Composure        -0.16 0.23
## Interceptions    -0.75 0.51
## Heading          -0.40 0.33
## Def              -1.12 0.53
## Standing         -0.56 0.48
## Sliding          -0.96 0.49
## Jumping           0.03 0.29
## Stamina           0.73 0.29
## Strength          0.46 0.29
## Aggression        0.01 0.34

Divide the Numerical Data into Categories

The numerical data types can be divided into three categories. These are physical, technical and tactical.

Physical Numerical Data

Boxplots

eafc_phys <- eafc_outfield_num %>% 
  select(c("Age","Pace","Physicality","Acceleration","Sprint","Agility", "Balance","Reactions", "Stamina","Strength","Aggression"))

eafc_phys %>% 
  keep(is.numeric) %>% 
  gather() %>% 
  ggplot(aes(value))+
  facet_wrap(~key, scales = "free") +
  geom_boxplot(outlier.colour = "red", outlier.shape = 1)+
  labs(title = "Spread of Physical Characteristics")

Histograms

eafc_phys %>% 
  keep(is.numeric) %>% 
  gather() %>% 
  ggplot(aes(value))+
  facet_wrap(~key, scales = "free") +
  geom_histogram()+
  labs(title = "Distribution Physical Characteristics")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Technical Numerical Data

Boxplots

eafc_tech <- eafc_outfield_num %>% 
  select(c("Shooting","Passing","Dribbling","Defending", "Finishing","Shot","Long","Volleys","Penalties","Crossing","Free","Curve","Ball","Interceptions","Heading","Def","Standing","Sliding","Jumping"))

eafc_tech %>% 
  keep(is.numeric) %>% 
  gather() %>% 
  ggplot(aes(value))+
  facet_wrap(~key, scales = "free") +
  geom_boxplot(outlier.colour = "red", outlier.shape = 1)+
  labs(title = "Spread of Technical Characteristics")

Histograms

eafc_tech %>% 
  keep(is.numeric) %>% 
  gather() %>% 
  ggplot(aes(value))+
  facet_wrap(~key, scales = "free") +
  geom_histogram()+
  labs(title = "Distribution Technical Characteristics")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Tactical Numerical Data

Boxplots

eafc_tact <- eafc_outfield_num %>% 
  select(c("Positioning","Vision","Composure"))

eafc_tact %>% 
  keep(is.numeric) %>% 
  gather() %>% 
  ggplot(aes(value))+
  facet_wrap(~key, scales = "free") +
  geom_boxplot()+
  labs(title = "Spread of Tactical Characteristics")

Histograms

eafc_tact %>% 
  keep(is.numeric) %>% 
  gather() %>% 
  ggplot(aes(value))+
  facet_wrap(~key, scales = "free") +
  geom_histogram()+
  labs(title = "Distribution Tactical Characteristics")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Physical Ability Visualization

Strength, Sprint and Stamina for position types.

SSS_mean <- eafc_outfield %>% 
  select(c("Strength","Sprint","Stamina")) %>% 
  group_by(eafc_outfield$Position_Type)  %>% 
  summarise(
    mean_str = mean(Strength),
    mean_spr = mean(Sprint),
    mean_sta = mean(Stamina)
  )

print(SSS_mean)
## # A tibble: 3 × 4
##   `eafc_outfield$Position_Type` mean_str mean_spr mean_sta
##   <chr>                            <dbl>    <dbl>    <dbl>
## 1 ATT                               71.0     74.1     66.0
## 2 DEF                               69.3     67.3     68.3
## 3 MID                               65.5     70.1     71.0

Strength and Overall Value based on Position Type

gridExtra::grid.arrange(
  eafc_outfield %>% 
    filter(Position_Type == "ATT") %>% 
    ggplot(aes(x = Strength, y = Overall,fill = Overall))+
    geom_col() +
    geom_vline(aes(xintercept = 70.95853), linetype = "dotted", color = "black", linewidth=1)+
    scale_fill_viridis_c()+
    theme_bw()+
    labs(title="Defenders Players Overall vs Strength")
    ,
  eafc_outfield %>% 
    filter(Position_Type == "MID") %>% 
    ggplot(aes(x = Strength, y = Overall,fill = Overall))+
    geom_col() +
    geom_vline(aes(xintercept = 70.95853), linetype = "dotted", color = "black", linewidth=1)+ scale_fill_viridis_c()+
    theme_bw()+
    labs(title="Defenders Players Overall vs Strength"),
  eafc_outfield %>% 
    filter(Position_Type == "DEF") %>% 
    ggplot(aes(x = Strength, y = Overall,fill = Overall))+
    geom_col() +
    geom_vline(aes(xintercept = 70.95853), linetype = "dotted", color = "black", linewidth=1)+
    scale_fill_viridis_c()+
    theme_bw()+
    labs(title="Defenders Players Overall vs Strength"),
  ncol = 1
)

Sprinting Ability and Overall Value based on Position Type

gridExtra::grid.arrange(
  eafc_outfield %>% 
    filter(Position_Type == "ATT") %>% 
    ggplot(aes(x = Sprint, y = Overall,fill = Overall))+
    geom_col() +
    geom_vline(aes(xintercept = 70.95853), linetype = "dotted", color = "black", linewidth=1)+
    scale_fill_viridis_c()+
    theme_bw()+
    labs(title="Attacking Players Overall vs Sprint Ability")
    ,
  eafc_outfield %>% 
    filter(Position_Type == "MID") %>% 
    ggplot(aes(x = Sprint, y = Overall,fill = Overall))+
    geom_col() +
    geom_vline(aes(xintercept = 70.95853), linetype = "dotted", color = "black", linewidth=1)+ scale_fill_viridis_c()+
    theme_bw()+
    labs(title="Midfielders Players Overall vs Sprint Ability"),
  eafc_outfield %>% 
    filter(Position_Type == "DEF") %>% 
    ggplot(aes(x = Sprint, y = Overall,fill = Overall))+
    geom_col() +
    geom_vline(aes(xintercept = 70.95853), linetype = "dotted", color = "black", linewidth=1)+
    scale_fill_viridis_c()+
    theme_bw()+
    labs(title="Defenders Players Overall vs Sprint Ability"),
  ncol = 1
)

Stamina and Overall Value

gridExtra::grid.arrange(
  eafc_outfield %>% 
    filter(Position_Type == "ATT") %>% 
    ggplot(aes(x = Stamina, y = Overall,fill = Overall))+
    geom_col() +
    geom_vline(aes(xintercept = 70.95853), linetype = "dotted", color = "black", linewidth=1)+
    scale_fill_viridis_c()+
    theme_bw()+
    labs(title="Attacking Players Overall vs Stamina")
    ,
  eafc_outfield %>% 
    filter(Position_Type == "MID") %>% 
    ggplot(aes(x = Stamina, y = Overall,fill = Overall))+
    geom_col() +
    geom_vline(aes(xintercept = 70.95853), linetype = "dotted", color = "black", linewidth=1)+ scale_fill_viridis_c()+
    theme_bw()+
    labs(title="Midfielders Players Overall vs Stamina"),
  eafc_outfield %>% 
    filter(Position_Type == "DEF") %>% 
    ggplot(aes(x = Stamina, y = Overall,fill = Overall))+
    geom_col() +
    geom_vline(aes(xintercept = 70.95853), linetype = "dotted", color = "black", linewidth=1)+
    scale_fill_viridis_c()+
    theme_bw()+
    labs(title="Defenders Players Overall vs Stamina"),
  ncol = 1
)

Top Strength-Speed-Stamina in Clubs and Players

# create the new metric called SSS

SSS_club <- eafc_outfield %>% 
  group_by(Club) %>% 
  summarise(
    SSS = mean(Strength) + mean(Sprint) + mean(Stamina)
  )

SSS_player <- eafc_outfield %>% 
  group_by(Name) %>% 
  summarise(
    SSS = round(mean(Strength) + mean(Sprint) + mean(Stamina),2)
  )

Club SSS

SSS_club %>% 
  arrange(desc(SSS)) %>% 
  slice(1:15) %>% 
  ggplot(aes(x = fct_reorder(Club,SSS), y = SSS, fill = SSS))+
  geom_col()+
  theme(axis.text.x = element_text(angle = 90))+
  geom_text(aes(label = round(SSS,2)), vjust = -0.5, size = 2)+
  scale_fill_viridis_c(option="viridis",direction=-1)+
  labs(title="Top 10 Clubs with the highest SSS", x = "Club")

SSS for Player

SSS_player %>% 
  arrange(desc(SSS)) %>% 
  slice(1:15) %>% 
  ggplot(aes(x = fct_reorder(Name,SSS), y = SSS, fill = SSS))+
  geom_col()+
  theme(axis.text.x = element_text(angle = 90))+
  geom_text(aes(label = round(SSS,2)), vjust = -0.5, size = 2)+
  scale_fill_viridis_c(option="viridis",direction=-1)+
  labs(title="Top 10 Players with the highest SSS", x = "Player")

SSS_player <- eafc_outfield %>% 
  group_by(Name) %>% 
  summarise(
    SSS = round(mean(Strength) + mean(Sprint) + mean(Stamina),2)
  )


SSS_Overall <- eafc_outfield %>% 
  mutate(
    SSS = round((Strength) + (Sprint) + (Stamina),2)
  ) %>% 
  ggplot(aes(x=SSS, y= Overall, color = Overall))+
  geom_point()+
  scale_color_viridis_c(option="turbo",direction=-1)+
  theme_bw()+
  stat_smooth(method="lm")+
  labs(title="Overall Rating vs SSS score")

ggplotly(SSS_Overall)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

Technical Ability

Set Piece Specialists

set_piece <- eafc_outfield %>% 
  arrange(desc(Free)) %>% 
  slice(1:15) %>% 
  ggplot(aes(x = Free, y = Penalties, color = Overall, label = Name))+
  geom_point(aes(size = 5))+
  scale_color_viridis_c(option="viridis",direction=-1)+
  labs(title = "Set Piece Specialists",x="Free Kick")

ggplotly(set_piece)

Scoring Ability

scoring <- eafc_outfield %>% 
  arrange(desc(Shooting)) %>% 
  slice(1:15) %>% 
  ggplot(aes(x = Finishing, y = Shooting, color = Overall, label = Name))+
  geom_point(aes(size = 5))+
  scale_color_viridis_c(option="viridis",direction=-1)+
  labs(title = "Finishers",x="Free Kick")

ggplotly(scoring)

Overall Ratings

Player Ratings

eafc_outfield  %>% 
  arrange(desc(Overall)) %>% 
  slice(1:10) %>%
  ggplot(aes(x = fct_reorder(Name,Overall), y = Overall, fill = Overall))+
  geom_col()+
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5, size = 8))+
  geom_text(aes(label = round(Overall,2)), vjust = -0.5, size = 3)+
  scale_fill_viridis_c(option="viridis",direction=-1)+
  labs(title="Top 10 Players - Overall Rating", x= "Player")

Player Ratings per Position

Attackers

eafc_outfield  %>%
  filter(Position_Type =="ATT") %>% 
  arrange(desc(Overall)) %>% 
  slice(1:5) %>%
  ggplot(aes(x = fct_reorder(Name,Overall), y = Overall, fill = Overall))+
  geom_col()+
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5))+
  geom_text(aes(label = round(Overall,2)), vjust = -0.5, size = 3)+
  scale_fill_viridis_c(option="viridis",direction=-1)+
  labs(title="Top 5 Attackers - Overall Rating", x= "Player")

Midfielders

eafc_outfield  %>%
  filter(Position_Type =="MID") %>% 
  arrange(desc(Overall)) %>% 
  slice(1:5) %>%
  ggplot(aes(x = fct_reorder(Name,Overall), y = Overall, fill = Overall))+
  geom_col()+
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5))+
  geom_text(aes(label = round(Overall,2)), vjust = -0.5, size = 3)+
  scale_fill_viridis_c(option="viridis",direction=-1)+
  labs(title="Top 5 Midfielders - Overall Rating", x= "Player")

Defenders

eafc_outfield  %>%
  filter(Position_Type =="DEF") %>% 
  arrange(desc(Overall)) %>% 
  slice(1:5) %>%
  ggplot(aes(x = fct_reorder(Name,Overall), y = Overall, fill = Overall))+
  geom_col()+
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5))+
  geom_text(aes(label = round(Overall,2)), vjust = -0.5, size = 3)+
  scale_fill_viridis_c(option="viridis",direction=-1)+
  labs(title="Top 5 Defenders - Overall Rating", x= "Player")

Club Ratings

eafc_outfield  %>%
  group_by(Club) %>% 
  summarise(Overall = mean(Overall)) %>% 
  arrange(desc(Overall)) %>% 
  slice(1:10) %>%
  ggplot(aes(x = fct_reorder(Club,Overall), y = Overall, fill = Overall))+
  geom_col()+
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5, size = 8))+
  geom_text(aes(label = round(Overall,2)), vjust = -0.5, size = 3)+
  scale_fill_viridis_c(option="viridis",direction=-1)+
  labs(title="Top 10 Clubs - Average Overall Rating", x= "Club")

Correlation to Overall Rating

Correlation of Physical Traits

phys_corr <- eafc_outfield_num %>% 
  select(c("Overall","Age","Pace","Physicality","Acceleration","Sprint","Agility", "Balance","Reactions", "Stamina","Strength","Aggression"))

ggcorr(phys_corr, nbreaks = 4, palette = "RdGy", hjust = 0.75,label = TRUE, label_size = 3, label_color = "white", size = 2)

Correlation of Technical Traits

tech_corr <- eafc_outfield_num %>% 
  select(c("Overall","Shooting","Passing","Dribbling","Defending", "Finishing","Shot","Long","Volleys","Penalties","Crossing","Free","Curve","Ball","Interceptions","Heading","Def","Standing","Sliding","Jumping"))

ggcorr(tech_corr, nbreaks = 4, palette = "RdGy", hjust = 0.75,label = TRUE, label_size = 3, label_color = "white", size = 2)

Correlation of Tactical Traits

tact_corr <- eafc_outfield_num %>% 
  select(c("Overall","Positioning","Vision","Composure"))

ggcorr(tact_corr, nbreaks = 4, palette = "RdGy", hjust = 0.75,label = TRUE, label_size = 3, label_color = "white", size = 2)