Read the data from the kaggle website https://www.kaggle.com/karangadiya/fifa19.
library("readr")
fifa19 <- as.data.frame(read_csv("G:/MODEL-STUDIO/data.csv"))
#fifa19 <- fifa18[,-1]
Transform Value into a standard numeric.
fifa19$Value <- substr(fifa19$Value,2,200)
fifa19$ValueNum <- sapply(as.character(fifa19$Value), function(x) {
unit <- substr(x, nchar(x), nchar(x))
if (unit == "M") return (as.numeric(substr(x, 1, nchar(x)-1)) * 1000000)
if (unit == "K") return (as.numeric(substr(x, 1, nchar(x)-1)) * 1000)
as.numeric(x)
})
rownames(fifa19) <- make.names(fifa19$Name, unique = TRUE)
Let’s select only features related to player characteristics.
fifa19_selected <- fifa19[ ,c(4,8,14:18,55:88,90)]
fifa19_selected$`Preferred Foot` <- factor(fifa19_selected$`Preferred Foot`)
Let’s use gbm library to create a gbm model with 250 trees 3 levels deep.
set.seed(1313)
library("gbm")
fifa_gbm <- gbm(ValueNum~.-Overall, data = fifa19_selected, n.trees = 250, interaction.depth = 3)
## Distribution not specified, assuming gaussian ...
Let’s wrap gbm model into a DALEX explainer.
library("DALEX")
fifa_gbm_exp <- explain(fifa_gbm,
data = fifa19_selected,
y = fifa19_selected$ValueNum^2,
predict_function = function(m,x)
predict(m, x, n.trees = 250)^2)
## Preparation of a new explainer is initiated
## -> model label : gbm ([33mdefault[39m)
## -> data : 18207 rows 42 cols
## -> target variable : 18207 values
## -> predict function : function(m, x) predict(m, x, n.trees = 250)^2
## -> predicted values : numerical, min = 226416.2 , mean = 3.52238e+13 , max = 1.206726e+16
## -> residual function : difference between y and yhat ([33mdefault[39m)
## -> residuals : numerical, min = -1.136101e+15 , mean = 1.889204e+12 , max = 1.974986e+15
## [32mA new explainer has been created![39m
Calculate Feature Importnace explainer.
library("ingredients")
fifa_feat <- ingredients::feature_importance(fifa_gbm_exp)
plot(fifa_feat, max_vars = 12)
Calculate Partial Dependency explainer.
fifa19_pd <- ingredients::partial_dependency(fifa_gbm_exp, variables = "Age")
plot(fifa19_pd)
Calculate Ceteris Paribus explainer.
fifa19_cp_pg <- ingredients::ceteris_paribus(fifa_gbm_exp, new_observation = fifa19_selected["P..Gulácsi",], variables = "Age", variable_splits = list(Age = seq(18,45,0.1)))
plot(fifa19_cp_pg)
Calculate Break Down explainer.
library("iBreakDown")
fifa_pg_gbm <- break_down(fifa_gbm_exp, new_observation = fifa19_selected["P..Gulácsi",])
plot(fifa_pg_gbm)
fifa_pg_gbm$label = "Break Down for Péter Gulácsi (GBM model)"
library("ggplot2")
library("scales")
plot(fifa_pg_gbm, digits = 0) +
scale_y_continuous(labels = dollar_format(suffix = "€", prefix = ""), name = "Estimated value", limits = 1000000*c(2,13), breaks = 1000000*c(2.5,5,7.5,10))
Calculate modelStudio dashboard.
library("modelStudio")
options(
parallelMap.default.mode = "socket",
parallelMap.default.cpus = 4,
parallelMap.default.show.info = FALSE
)
nationality <- c("England", "France", "Argentina", "Spain", "Germany", "Sweden", "Netherlands", "Japan", "Poland", "United States", "Mexico", "Denmark", "Saudi Arabia", "Norway", "Argentina", "Portugal", "Brasil")
library(dplyr)
fifa19 %>%
filter(Nationality %in% nationality) %>%
group_by(Nationality) %>%
arrange(-ValueNum) %>%
top_n(2) %>%
select(Name, Nationality) %>%
as.data.frame() -> players
selected <- c("Cristiano.Ronaldo", "L..Messi","R..Lewandowski", "W..Szczęsny", "P..Gulácsi","A..Szalai", "Neymar.Jr",
"P..Dybala", "H..Kane", "K..Mbappé", "A..Griezmann",
"T..Kroos", "Isco", "C..Eriksen", "De.Gea",
"L..Sané", "R..Sterling", "V..van.Dijk", "M..Depay",
"E..Forsberg", "H..Lozano", "T..Delaney", "A..Christensen", "J..Corona",
"C..Pulisic", "S..Kagawa", "M..Elyounoussi", "V..Lindelöf",
"T..Inui", "J..Brooks", "S..Nakajima", "R..Jarstein", "S..Al.Faraj",
"S..Al.Dawsari", "Bernardo.Silva")
names_sel <- c("Cristiano Ronaldo (PT)", "Lionel Messi (AR)", "Robert Lewandowski (PL)", "Wojciech Szczęsny (PL)", "Péter Gulácsi (HU)","Ádám Szalai (HU)", "Neymar Jr (BR)",
"Paulo Dybala (AR)", "Harry Kane (UK)", "Kylian Mbappé (FR)", "Antoine Griezmann (FR)", "Toni Kroos (DE)", "Isco (ES)", "Christian Eriksen (DK)",
"De Gea (ES)", "Leroy Sané (DE)", "Raheem Sterling (UK)", "Virgil van Dijk (NL)", "Memphis Depay (NL)", "Emil Forsberg (SE)", "Hirving Lozano (MX)", "Thomas Delaney (DK)", "Andreas Christensen (DK)", "Jesús Corona (MX)", "Christian Pulisic (US)", "Shinji Kagawa (JP)", "Mohamed Elyounoussi (NO)", "Victor Lindelöf (SE)", "Takashi Inui (JP)", "John Brooks (US)", "Shoya Nakajima (JP)", "Rune Almenning Jarstein (NO)", "Salman Al Faraj (SA)", "Salem Al Dawsari (SA)", "Bernardo Silva (PT)")
fifa_selected6 <- fifa19_selected[selected, ]
rownames(fifa_selected6) <- names_sel
fifa_selected6 <- fifa_selected6[c(1,35,2,8,3:7,9:34),]
modelStudio(fifa_gbm_exp,
new_observation = fifa_selected6,
B = 5, N = 300,
parallel = TRUE,
options = modelStudioOptions(margin_left = 125, margin_ytitle = 90),
digits = 0)