Read data

library("readr")
fifa19 <- as.data.frame(read_csv("G:/MODEL-STUDIO/data.csv"))
#fifa19 <- fifa18[,-1]
head(fifa19)
##   ...1     ID              Name Age
## 1    0 158023          L. Messi  31
## 2    1  20801 Cristiano Ronaldo  33
## 3    2 190871         Neymar Jr  26
## 4    3 193080            De Gea  27
## 5    4 192985      K. De Bruyne  27
## 6    5 183277         E. Hazard  27
##                                            Photo Nationality
## 1 https://cdn.sofifa.org/players/4/19/158023.png   Argentina
## 2  https://cdn.sofifa.org/players/4/19/20801.png    Portugal
## 3 https://cdn.sofifa.org/players/4/19/190871.png      Brazil
## 4 https://cdn.sofifa.org/players/4/19/193080.png       Spain
## 5 https://cdn.sofifa.org/players/4/19/192985.png     Belgium
## 6 https://cdn.sofifa.org/players/4/19/183277.png     Belgium
##                                  Flag Overall Potential                Club
## 1 https://cdn.sofifa.org/flags/52.png      94        94        FC Barcelona
## 2 https://cdn.sofifa.org/flags/38.png      94        94            Juventus
## 3 https://cdn.sofifa.org/flags/54.png      92        93 Paris Saint-Germain
## 4 https://cdn.sofifa.org/flags/45.png      91        93   Manchester United
## 5  https://cdn.sofifa.org/flags/7.png      91        92     Manchester City
## 6  https://cdn.sofifa.org/flags/7.png      91        91             Chelsea
##                                      Club Logo   Value  Wage Special
## 1 https://cdn.sofifa.org/teams/2/light/241.png \200110.5M \200565K    2202
## 2  https://cdn.sofifa.org/teams/2/light/45.png    \20077M \200405K    2228
## 3  https://cdn.sofifa.org/teams/2/light/73.png \200118.5M \200290K    2143
## 4  https://cdn.sofifa.org/teams/2/light/11.png    \20072M \200260K    1471
## 5  https://cdn.sofifa.org/teams/2/light/10.png   \200102M \200355K    2281
## 6   https://cdn.sofifa.org/teams/2/light/5.png    \20093M \200340K    2142
##   Preferred Foot International Reputation Weak Foot Skill Moves      Work Rate
## 1           Left                        5         4           4 Medium/ Medium
## 2          Right                        5         4           5      High/ Low
## 3          Right                        5         5           5   High/ Medium
## 4          Right                        4         3           1 Medium/ Medium
## 5          Right                        4         5           4     High/ High
## 6          Right                        4         4           4   High/ Medium
##    Body Type Real Face Position Jersey Number       Joined Loaned From
## 1      Messi       Yes       RF            10  Jul 1, 2004        <NA>
## 2 C. Ronaldo       Yes       ST             7 Jul 10, 2018        <NA>
## 3     Neymar       Yes       LW            10  Aug 3, 2017        <NA>
## 4       Lean       Yes       GK             1  Jul 1, 2011        <NA>
## 5     Normal       Yes      RCM             7 Aug 30, 2015        <NA>
## 6     Normal       Yes       LF            10  Jul 1, 2012        <NA>
##   Contract Valid Until Height Weight   LS   ST   RS   LW   LF   CF   RF   RW
## 1                 2021    5'7 159lbs 88+2 88+2 88+2 92+2 93+2 93+2 93+2 92+2
## 2                 2022    6'2 183lbs 91+3 91+3 91+3 89+3 90+3 90+3 90+3 89+3
## 3                 2022    5'9 150lbs 84+3 84+3 84+3 89+3 89+3 89+3 89+3 89+3
## 4                 2020    6'4 168lbs <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 5                 2023   5'11 154lbs 82+3 82+3 82+3 87+3 87+3 87+3 87+3 87+3
## 6                 2020    5'8 163lbs 83+3 83+3 83+3 89+3 88+3 88+3 88+3 89+3
##    LAM  CAM  RAM   LM  LCM   CM  RCM   RM  LWB  LDM  CDM  RDM  RWB   LB  LCB
## 1 93+2 93+2 93+2 91+2 84+2 84+2 84+2 91+2 64+2 61+2 61+2 61+2 64+2 59+2 47+2
## 2 88+3 88+3 88+3 88+3 81+3 81+3 81+3 88+3 65+3 61+3 61+3 61+3 65+3 61+3 53+3
## 3 89+3 89+3 89+3 88+3 81+3 81+3 81+3 88+3 65+3 60+3 60+3 60+3 65+3 60+3 47+3
## 4 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 5 88+3 88+3 88+3 88+3 87+3 87+3 87+3 88+3 77+3 77+3 77+3 77+3 77+3 73+3 66+3
## 6 89+3 89+3 89+3 89+3 82+3 82+3 82+3 89+3 66+3 63+3 63+3 63+3 66+3 60+3 49+3
##     CB  RCB   RB Crossing Finishing HeadingAccuracy ShortPassing Volleys
## 1 47+2 47+2 59+2       84        95              70           90      86
## 2 53+3 53+3 61+3       84        94              89           81      87
## 3 47+3 47+3 60+3       79        87              62           84      84
## 4 <NA> <NA> <NA>       17        13              21           50      13
## 5 66+3 66+3 73+3       93        82              55           92      82
## 6 49+3 49+3 60+3       81        84              61           89      80
##   Dribbling Curve FKAccuracy LongPassing BallControl Acceleration SprintSpeed
## 1        97    93         94          87          96           91          86
## 2        88    81         76          77          94           89          91
## 3        96    88         87          78          95           94          90
## 4        18    21         19          51          42           57          58
## 5        86    85         83          91          91           78          76
## 6        95    83         79          83          94           94          88
##   Agility Reactions Balance ShotPower Jumping Stamina Strength LongShots
## 1      91        95      95        85      68      72       59        94
## 2      87        96      70        95      95      88       79        93
## 3      96        94      84        80      61      81       49        82
## 4      60        90      43        31      67      43       64        12
## 5      79        91      77        91      63      90       75        91
## 6      95        90      94        82      56      83       66        80
##   Aggression Interceptions Positioning Vision Penalties Composure Marking
## 1         48            22          94     94        75        96      33
## 2         63            29          95     82        85        95      28
## 3         56            36          89     87        81        94      27
## 4         38            30          12     68        40        68      15
## 5         76            61          87     94        79        88      68
## 6         54            41          87     89        86        91      34
##   StandingTackle SlidingTackle GKDiving GKHandling GKKicking GKPositioning
## 1             28            26        6         11        15            14
## 2             31            23        7         11        15            14
## 3             24            33        9          9        15            15
## 4             21            13       90         85        87            88
## 5             58            51       15         13         5            10
## 6             27            22       11         12         6             8
##   GKReflexes Release Clause
## 1          8        \200226.5M
## 2         11        \200127.1M
## 3         11        \200228.1M
## 4         94        \200138.6M
## 5         13        \200196.4M
## 6          8        \200172.1M

Data Preprocessing

Transform Value into a standard numeric.

fifa19$Value <- substr(fifa19$Value,2,200)
fifa19$ValueNum <- sapply(as.character(fifa19$Value), function(x) {
  unit <- substr(x, nchar(x), nchar(x))
  if (unit == "M") return (as.numeric(substr(x, 1, nchar(x)-1)) * 1000000)
  if (unit == "K") return (as.numeric(substr(x, 1, nchar(x)-1)) * 1000)
  as.numeric(x)
})
rownames(fifa19) <- make.names(fifa19$Name, unique = TRUE)

Feature selection

Let's select only features related to player characteristics.

fifa19_selected <- fifa19[ ,c(4,8,14:18,55:88,90)]

fifa19_selected$`Preferred Foot` <- factor(fifa19_selected$`Preferred Foot`)

Create a gbm model

Let's use gbm library to create a gbm model with 250 trees 3 levels deep.

## Loaded gbm 2.1.8
## Distribution not specified, assuming gaussian ...

Create a DALEX explainer

Let's wrap gbm model into a DALEX explainer.

## Welcome to DALEX (version: 2.3.0).
## Find examples and detailed introduction at: http://ema.drwhy.ai/
## Preparation of a new explainer is initiated
##   -> model label       :  gbm  (  default  )
##   -> data              :  18207  rows  42  cols 
##   -> target variable   :  18207  values 
##   -> predict function  :  function(m, x) predict(m, x, n.trees = 250)^2 
##   -> predicted values  :  No value for predict function target column. (  default  )
##   -> model_info        :  package gbm , ver. 2.1.8 , task regression (  default  ) 
##   -> predicted values  :  numerical, min =  226416.2 , mean =  3.52238e+13 , max =  1.206726e+16  
##   -> residual function :  difference between y and yhat (  default  )
##   -> residuals         :  numerical, min =  -1.136101e+15 , mean =  1.889204e+12 , max =  1.974986e+15  
##   A new explainer has been created! 

Feature Importance explainer

Calculate Feature Importnace explainer.

library("ingredients")
fifa_feat <- ingredients::feature_importance(fifa_gbm_exp)
plot(fifa_feat, max_vars = 12)

Partial Dependency explainer

Calculate Partial Dependency explainer.

fifa19_pd <- ingredients::partial_dependency(fifa_gbm_exp, variables = "Age")
plot(fifa19_pd)

Ceteris Paribus explainer

Calculate Ceteris Paribus explainer.

fifa19_cp_pg <- ingredients::ceteris_paribus(fifa_gbm_exp, new_observation = fifa19_selected["P..Gulácsi",], variables = "Age", variable_splits = list(Age = seq(18,45,0.1)))
plot(fifa19_cp_pg)

Break Down explainer

Calculate Break Down explainer.

library("iBreakDown")
fifa_pg_gbm <- break_down(fifa_gbm_exp, new_observation = fifa19_selected["P..Gulácsi",])
plot(fifa_pg_gbm)

fifa_pg_gbm$label = "Break Down for Péter Gulácsi (GBM model)"
library("ggplot2")
library("scales")
plot(fifa_pg_gbm, digits = 0) +  
  scale_y_continuous(labels = dollar_format(suffix = "€", prefix = ""), name = "Estimated value", limits = 1000000*c(2,13), breaks = 1000000*c(2.5,5,7.5,10))