library("readr")
fifa19 <- as.data.frame(read_csv("G:/MODEL-STUDIO/data.csv"))
#fifa19 <- fifa18[,-1]
head(fifa19)
## ...1 ID Name Age
## 1 0 158023 L. Messi 31
## 2 1 20801 Cristiano Ronaldo 33
## 3 2 190871 Neymar Jr 26
## 4 3 193080 De Gea 27
## 5 4 192985 K. De Bruyne 27
## 6 5 183277 E. Hazard 27
## Photo Nationality
## 1 https://cdn.sofifa.org/players/4/19/158023.png Argentina
## 2 https://cdn.sofifa.org/players/4/19/20801.png Portugal
## 3 https://cdn.sofifa.org/players/4/19/190871.png Brazil
## 4 https://cdn.sofifa.org/players/4/19/193080.png Spain
## 5 https://cdn.sofifa.org/players/4/19/192985.png Belgium
## 6 https://cdn.sofifa.org/players/4/19/183277.png Belgium
## Flag Overall Potential Club
## 1 https://cdn.sofifa.org/flags/52.png 94 94 FC Barcelona
## 2 https://cdn.sofifa.org/flags/38.png 94 94 Juventus
## 3 https://cdn.sofifa.org/flags/54.png 92 93 Paris Saint-Germain
## 4 https://cdn.sofifa.org/flags/45.png 91 93 Manchester United
## 5 https://cdn.sofifa.org/flags/7.png 91 92 Manchester City
## 6 https://cdn.sofifa.org/flags/7.png 91 91 Chelsea
## Club Logo Value Wage Special
## 1 https://cdn.sofifa.org/teams/2/light/241.png \200110.5M \200565K 2202
## 2 https://cdn.sofifa.org/teams/2/light/45.png \20077M \200405K 2228
## 3 https://cdn.sofifa.org/teams/2/light/73.png \200118.5M \200290K 2143
## 4 https://cdn.sofifa.org/teams/2/light/11.png \20072M \200260K 1471
## 5 https://cdn.sofifa.org/teams/2/light/10.png \200102M \200355K 2281
## 6 https://cdn.sofifa.org/teams/2/light/5.png \20093M \200340K 2142
## Preferred Foot International Reputation Weak Foot Skill Moves Work Rate
## 1 Left 5 4 4 Medium/ Medium
## 2 Right 5 4 5 High/ Low
## 3 Right 5 5 5 High/ Medium
## 4 Right 4 3 1 Medium/ Medium
## 5 Right 4 5 4 High/ High
## 6 Right 4 4 4 High/ Medium
## Body Type Real Face Position Jersey Number Joined Loaned From
## 1 Messi Yes RF 10 Jul 1, 2004 <NA>
## 2 C. Ronaldo Yes ST 7 Jul 10, 2018 <NA>
## 3 Neymar Yes LW 10 Aug 3, 2017 <NA>
## 4 Lean Yes GK 1 Jul 1, 2011 <NA>
## 5 Normal Yes RCM 7 Aug 30, 2015 <NA>
## 6 Normal Yes LF 10 Jul 1, 2012 <NA>
## Contract Valid Until Height Weight LS ST RS LW LF CF RF RW
## 1 2021 5'7 159lbs 88+2 88+2 88+2 92+2 93+2 93+2 93+2 92+2
## 2 2022 6'2 183lbs 91+3 91+3 91+3 89+3 90+3 90+3 90+3 89+3
## 3 2022 5'9 150lbs 84+3 84+3 84+3 89+3 89+3 89+3 89+3 89+3
## 4 2020 6'4 168lbs <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 5 2023 5'11 154lbs 82+3 82+3 82+3 87+3 87+3 87+3 87+3 87+3
## 6 2020 5'8 163lbs 83+3 83+3 83+3 89+3 88+3 88+3 88+3 89+3
## LAM CAM RAM LM LCM CM RCM RM LWB LDM CDM RDM RWB LB LCB
## 1 93+2 93+2 93+2 91+2 84+2 84+2 84+2 91+2 64+2 61+2 61+2 61+2 64+2 59+2 47+2
## 2 88+3 88+3 88+3 88+3 81+3 81+3 81+3 88+3 65+3 61+3 61+3 61+3 65+3 61+3 53+3
## 3 89+3 89+3 89+3 88+3 81+3 81+3 81+3 88+3 65+3 60+3 60+3 60+3 65+3 60+3 47+3
## 4 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 5 88+3 88+3 88+3 88+3 87+3 87+3 87+3 88+3 77+3 77+3 77+3 77+3 77+3 73+3 66+3
## 6 89+3 89+3 89+3 89+3 82+3 82+3 82+3 89+3 66+3 63+3 63+3 63+3 66+3 60+3 49+3
## CB RCB RB Crossing Finishing HeadingAccuracy ShortPassing Volleys
## 1 47+2 47+2 59+2 84 95 70 90 86
## 2 53+3 53+3 61+3 84 94 89 81 87
## 3 47+3 47+3 60+3 79 87 62 84 84
## 4 <NA> <NA> <NA> 17 13 21 50 13
## 5 66+3 66+3 73+3 93 82 55 92 82
## 6 49+3 49+3 60+3 81 84 61 89 80
## Dribbling Curve FKAccuracy LongPassing BallControl Acceleration SprintSpeed
## 1 97 93 94 87 96 91 86
## 2 88 81 76 77 94 89 91
## 3 96 88 87 78 95 94 90
## 4 18 21 19 51 42 57 58
## 5 86 85 83 91 91 78 76
## 6 95 83 79 83 94 94 88
## Agility Reactions Balance ShotPower Jumping Stamina Strength LongShots
## 1 91 95 95 85 68 72 59 94
## 2 87 96 70 95 95 88 79 93
## 3 96 94 84 80 61 81 49 82
## 4 60 90 43 31 67 43 64 12
## 5 79 91 77 91 63 90 75 91
## 6 95 90 94 82 56 83 66 80
## Aggression Interceptions Positioning Vision Penalties Composure Marking
## 1 48 22 94 94 75 96 33
## 2 63 29 95 82 85 95 28
## 3 56 36 89 87 81 94 27
## 4 38 30 12 68 40 68 15
## 5 76 61 87 94 79 88 68
## 6 54 41 87 89 86 91 34
## StandingTackle SlidingTackle GKDiving GKHandling GKKicking GKPositioning
## 1 28 26 6 11 15 14
## 2 31 23 7 11 15 14
## 3 24 33 9 9 15 15
## 4 21 13 90 85 87 88
## 5 58 51 15 13 5 10
## 6 27 22 11 12 6 8
## GKReflexes Release Clause
## 1 8 \200226.5M
## 2 11 \200127.1M
## 3 11 \200228.1M
## 4 94 \200138.6M
## 5 13 \200196.4M
## 6 8 \200172.1M
Transform Value into a standard numeric.
fifa19$Value <- substr(fifa19$Value,2,200)
fifa19$ValueNum <- sapply(as.character(fifa19$Value), function(x) {
unit <- substr(x, nchar(x), nchar(x))
if (unit == "M") return (as.numeric(substr(x, 1, nchar(x)-1)) * 1000000)
if (unit == "K") return (as.numeric(substr(x, 1, nchar(x)-1)) * 1000)
as.numeric(x)
})
rownames(fifa19) <- make.names(fifa19$Name, unique = TRUE)
Let's select only features related to player characteristics.
fifa19_selected <- fifa19[ ,c(4,8,14:18,55:88,90)]
fifa19_selected$`Preferred Foot` <- factor(fifa19_selected$`Preferred Foot`)
Let's use gbm library to create a gbm model with 250 trees 3 levels deep.
## Loaded gbm 2.1.8
## Distribution not specified, assuming gaussian ...
Let's wrap gbm model into a DALEX explainer.
## Welcome to DALEX (version: 2.3.0).
## Find examples and detailed introduction at: http://ema.drwhy.ai/
## Preparation of a new explainer is initiated
## -> model label : gbm ( [33m default [39m )
## -> data : 18207 rows 42 cols
## -> target variable : 18207 values
## -> predict function : function(m, x) predict(m, x, n.trees = 250)^2
## -> predicted values : No value for predict function target column. ( [33m default [39m )
## -> model_info : package gbm , ver. 2.1.8 , task regression ( [33m default [39m )
## -> predicted values : numerical, min = 226416.2 , mean = 3.52238e+13 , max = 1.206726e+16
## -> residual function : difference between y and yhat ( [33m default [39m )
## -> residuals : numerical, min = -1.136101e+15 , mean = 1.889204e+12 , max = 1.974986e+15
## [32m A new explainer has been created! [39m
Calculate Feature Importnace explainer.
library("ingredients")
fifa_feat <- ingredients::feature_importance(fifa_gbm_exp)
plot(fifa_feat, max_vars = 12)
Calculate Partial Dependency explainer.
fifa19_pd <- ingredients::partial_dependency(fifa_gbm_exp, variables = "Age")
plot(fifa19_pd)
Calculate Ceteris Paribus explainer.
fifa19_cp_pg <- ingredients::ceteris_paribus(fifa_gbm_exp, new_observation = fifa19_selected["P..Gulácsi",], variables = "Age", variable_splits = list(Age = seq(18,45,0.1)))
plot(fifa19_cp_pg)
Calculate Break Down explainer.
library("iBreakDown")
fifa_pg_gbm <- break_down(fifa_gbm_exp, new_observation = fifa19_selected["P..Gulácsi",])
plot(fifa_pg_gbm)
fifa_pg_gbm$label = "Break Down for Péter Gulácsi (GBM model)"
library("ggplot2")
library("scales")
plot(fifa_pg_gbm, digits = 0) +
scale_y_continuous(labels = dollar_format(suffix = "€", prefix = ""), name = "Estimated value", limits = 1000000*c(2,13), breaks = 1000000*c(2.5,5,7.5,10))