Analisa data fifa, data berisi pemain bola internasional, pada study kali ini, akan diambil sample pemain Christiano Ronaldo dan Lewandowski. Analisa detail dua pemain tsb dapat diikuti dengan model statistic di bawah ini:
head(fifa,2)
## nationality overall potential wage_eur value_eur age
## L. Messi Argentina 94 94 565000 95500000 32
## Cristiano Ronaldo Portugal 93 93 405000 58500000 34
## height_cm weight_kg attacking_crossing attacking_finishing
## L. Messi 170 72 88 95
## Cristiano Ronaldo 187 83 84 94
## attacking_heading_accuracy attacking_short_passing
## L. Messi 70 92
## Cristiano Ronaldo 89 83
## attacking_volleys skill_dribbling skill_curve
## L. Messi 88 97 93
## Cristiano Ronaldo 87 89 81
## skill_fk_accuracy skill_long_passing skill_ball_control
## L. Messi 94 92 96
## Cristiano Ronaldo 76 77 92
## movement_acceleration movement_sprint_speed movement_agility
## L. Messi 91 84 93
## Cristiano Ronaldo 89 91 87
## movement_reactions movement_balance power_shot_power
## L. Messi 95 95 86
## Cristiano Ronaldo 96 71 95
## power_jumping power_stamina power_strength power_long_shots
## L. Messi 68 75 68 94
## Cristiano Ronaldo 95 85 78 93
## mentality_aggression mentality_interceptions
## L. Messi 48 40
## Cristiano Ronaldo 63 29
## mentality_positioning mentality_vision mentality_penalties
## L. Messi 94 94 75
## Cristiano Ronaldo 95 82 85
## mentality_composure defending_marking
## L. Messi 96 33
## Cristiano Ronaldo 95 28
## defending_standing_tackle defending_sliding_tackle
## L. Messi 37 26
## Cristiano Ronaldo 32 24
## goalkeeping_diving goalkeeping_handling goalkeeping_kicking
## L. Messi 6 11 15
## Cristiano Ronaldo 7 11 15
## goalkeeping_positioning goalkeeping_reflexes
## L. Messi 14 8
## Cristiano Ronaldo 14 11
Model assembly
fifa$LogValue <- log10(fifa$value_eur)
fifa_small <- fifa[,-c(1, 2, 3, 4, 6, 7)]
library(gbm)
## Loaded gbm 2.1.8
fifa_gbm_deep <- gbm(LogValue~., data = fifa_small, n.trees = 100,
interaction.depth = 4, distribution = "gaussian")
fifa_gbm_exp_deep <- DALEX::explain(fifa_gbm_deep,
data = fifa_small, y = 10^fifa_small$LogValue,
predict_function = function(m,x) 10^predict(m, x, n.trees = 100),
label = "GBM deep")
## Preparation of a new explainer is initiated
## -> model label : GBM deep
## -> data : 5000 rows 37 cols
## -> target variable : 5000 values
## -> predict function : function(m, x) 10^predict(m, x, n.trees = 100)
## -> predicted values : numerical, min = 260647.3 , mean = 7467100 , max = 87783729
## -> model_info : package gbm , ver. 2.1.8 , task regression ( [33m default [39m )
## -> residual function : difference between y and yhat ( [33m default [39m )
## -> residuals : numerical, min = -10746533 , mean = 6186.865 , max = 17716271
## [32m A new explainer has been created! [39m
Model audit
model_performance(fifa_gbm_exp_deep)#ok
## Measures for: regression
## mse : 211013297609
## rmse : 459361.8
## r2 : 0.9973163
## mad : 10713.33
##
## Residuals:
## 0% 10% 20% 30% 40%
## -1.074653e+07 -4.532869e+04 -1.622123e+04 -2.864477e+03 4.405487e+02
## 50% 60% 70% 80% 90%
## 8.658074e+02 4.941751e+03 9.663550e+03 2.242535e+04 4.710810e+04
## 100%
## 1.771627e+07
fifa_md_gbm_deep <- model_diagnostics(fifa_gbm_exp_deep)#ok
plot(fifa_md_gbm_deep,
variable = "y", yvariable = "y_hat") +
scale_x_continuous("Value in Euro", trans = "identity") +
scale_y_continuous("Predicted value in Euro", trans = "identity") +
geom_abline(slope = 1) +
ggtitle("Predicted and observed player's values", "")#ok
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
fifa["Cristiano Ronaldo",]
## nationality overall potential wage_eur value_eur age
## Cristiano Ronaldo Portugal 93 93 405000 58500000 34
## height_cm weight_kg attacking_crossing attacking_finishing
## Cristiano Ronaldo 187 83 84 94
## attacking_heading_accuracy attacking_short_passing
## Cristiano Ronaldo 89 83
## attacking_volleys skill_dribbling skill_curve
## Cristiano Ronaldo 87 89 81
## skill_fk_accuracy skill_long_passing skill_ball_control
## Cristiano Ronaldo 76 77 92
## movement_acceleration movement_sprint_speed movement_agility
## Cristiano Ronaldo 89 91 87
## movement_reactions movement_balance power_shot_power
## Cristiano Ronaldo 96 71 95
## power_jumping power_stamina power_strength power_long_shots
## Cristiano Ronaldo 95 85 78 93
## mentality_aggression mentality_interceptions
## Cristiano Ronaldo 63 29
## mentality_positioning mentality_vision mentality_penalties
## Cristiano Ronaldo 95 82 85
## mentality_composure defending_marking
## Cristiano Ronaldo 95 28
## defending_standing_tackle defending_sliding_tackle
## Cristiano Ronaldo 32 24
## goalkeeping_diving goalkeeping_handling goalkeeping_kicking
## Cristiano Ronaldo 7 11 15
## goalkeeping_positioning goalkeeping_reflexes LogValue
## Cristiano Ronaldo 14 11 7.767156
rfifa_bd_gbm <- predict_parts(fifa_gbm_exp_deep,
new_observation = fifa["Cristiano Ronaldo",],
type = "break_down")#ok
plot(rfifa_bd_gbm) +
scale_y_continuous("Predicted value in Euro")+
ggtitle("Break-down plot for Cristiano Ronaldo","") #ok
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
Prediction
rfifa_shap_gbm <- predict_parts(fifa_gbm_exp_deep,
new_observation = fifa["Cristiano Ronaldo",],
type = "shap")
plot(rfifa_shap_gbm, show_boxplots = FALSE) +
scale_y_continuous("Estimated value in Euro")+
ggtitle("Shapley values for Cristiano Ronaldo","")#ok
Select variable
selected_variables <- c("movement_reactions", "skill_ball_control",
"mentality_positioning", "attacking_finishing")
fifa_cp_gbm <- predict_profile(fifa_gbm_exp_deep,
new_observation = fifa["Cristiano Ronaldo",],
variables = selected_variables)
p1 <- plot(subtitle="Cristiano Ronaldo", fifa_cp_gbm, variables=selected_variables)#ok
p1
fifa_bd_gbm <- predict_parts(fifa_gbm_exp_deep,
new_observation = fifa["R. Lewandowski",],
type = "break_down")#ok
plot(fifa_bd_gbm) +
scale_y_continuous("Predicted value in Euro")+
ggtitle("Break-down plot for Robert Lewandowski","") #ok
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
Prediction
fifa_shap_gbm <- predict_parts(fifa_gbm_exp_deep,
new_observation = fifa["R. Lewandowski",],
type = "shap")
plot(fifa_shap_gbm, show_boxplots = FALSE) +
scale_y_continuous("Estimated value in Euro")+
ggtitle("Shapley values for Robert Lewandowski","")#ok
The predict_profile
selected_variables <- c("movement_reactions", "skill_ball_control", "mentality_positioning", "attacking_finishing")
fifa_cp_gbm <- predict_profile(fifa_gbm_exp_deep,
new_observation = fifa["R. Lewandowski",],
variables = selected_variables)
p2 <- plot(subtitle="R. Lewandowski",fifa_cp_gbm, variables = selected_variables)#ok
p2
plot all
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
pt <- grid.arrange(p1,p2)
pt
## TableGrob (2 x 1) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]