Analisa data fifa, data berisi pemain bola internasional, pada study kali ini, akan diambil sample pemain Christiano Ronaldo dan Lewandowski. Analisa detail dua pemain tsb dapat diikuti dengan model statistic di bawah ini:

Preface

Process

head(fifa,2)
##                   nationality overall potential wage_eur value_eur age
## L. Messi            Argentina      94        94   565000  95500000  32
## Cristiano Ronaldo    Portugal      93        93   405000  58500000  34
##                   height_cm weight_kg attacking_crossing attacking_finishing
## L. Messi                170        72                 88                  95
## Cristiano Ronaldo       187        83                 84                  94
##                   attacking_heading_accuracy attacking_short_passing
## L. Messi                                  70                      92
## Cristiano Ronaldo                         89                      83
##                   attacking_volleys skill_dribbling skill_curve
## L. Messi                         88              97          93
## Cristiano Ronaldo                87              89          81
##                   skill_fk_accuracy skill_long_passing skill_ball_control
## L. Messi                         94                 92                 96
## Cristiano Ronaldo                76                 77                 92
##                   movement_acceleration movement_sprint_speed movement_agility
## L. Messi                             91                    84               93
## Cristiano Ronaldo                    89                    91               87
##                   movement_reactions movement_balance power_shot_power
## L. Messi                          95               95               86
## Cristiano Ronaldo                 96               71               95
##                   power_jumping power_stamina power_strength power_long_shots
## L. Messi                     68            75             68               94
## Cristiano Ronaldo            95            85             78               93
##                   mentality_aggression mentality_interceptions
## L. Messi                            48                      40
## Cristiano Ronaldo                   63                      29
##                   mentality_positioning mentality_vision mentality_penalties
## L. Messi                             94               94                  75
## Cristiano Ronaldo                    95               82                  85
##                   mentality_composure defending_marking
## L. Messi                           96                33
## Cristiano Ronaldo                  95                28
##                   defending_standing_tackle defending_sliding_tackle
## L. Messi                                 37                       26
## Cristiano Ronaldo                        32                       24
##                   goalkeeping_diving goalkeeping_handling goalkeeping_kicking
## L. Messi                           6                   11                  15
## Cristiano Ronaldo                  7                   11                  15
##                   goalkeeping_positioning goalkeeping_reflexes
## L. Messi                               14                    8
## Cristiano Ronaldo                      14                   11

Model assembly

fifa$LogValue <- log10(fifa$value_eur)
fifa_small <- fifa[,-c(1, 2, 3, 4, 6, 7)]

library(gbm)
## Loaded gbm 2.1.8
fifa_gbm_deep <- gbm(LogValue~., data = fifa_small, n.trees = 100,
        interaction.depth = 4, distribution = "gaussian")

fifa_gbm_exp_deep <- DALEX::explain(fifa_gbm_deep, 
        data = fifa_small, y = 10^fifa_small$LogValue, 
        predict_function = function(m,x) 10^predict(m, x, n.trees = 100),
        label = "GBM deep")
## Preparation of a new explainer is initiated
##   -> model label       :  GBM deep 
##   -> data              :  5000  rows  37  cols 
##   -> target variable   :  5000  values 
##   -> predict function  :  function(m, x) 10^predict(m, x, n.trees = 100) 
##   -> predicted values  :  numerical, min =  260647.3 , mean =  7467100 , max =  87783729  
##   -> model_info        :  package gbm , ver. 2.1.8 , task regression (  default  ) 
##   -> residual function :  difference between y and yhat (  default  )
##   -> residuals         :  numerical, min =  -10746533 , mean =  6186.865 , max =  17716271  
##   A new explainer has been created! 

Model audit

model_performance(fifa_gbm_exp_deep)#ok
## Measures for:  regression
## mse        : 211013297609 
## rmse       : 459361.8 
## r2         : 0.9973163 
## mad        : 10713.33
## 
## Residuals:
##            0%           10%           20%           30%           40% 
## -1.074653e+07 -4.532869e+04 -1.622123e+04 -2.864477e+03  4.405487e+02 
##           50%           60%           70%           80%           90% 
##  8.658074e+02  4.941751e+03  9.663550e+03  2.242535e+04  4.710810e+04 
##          100% 
##  1.771627e+07
fifa_md_gbm_deep <- model_diagnostics(fifa_gbm_exp_deep)#ok

plot(fifa_md_gbm_deep, 
     variable = "y", yvariable = "y_hat") +
  scale_x_continuous("Value in Euro", trans = "identity") +

  scale_y_continuous("Predicted value in Euro", trans = "identity") +
 
  geom_abline(slope = 1) + 
  ggtitle("Predicted and observed player's values", "")#ok
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

Observation Cristiano Ronaldo

fifa["Cristiano Ronaldo",]
##                   nationality overall potential wage_eur value_eur age
## Cristiano Ronaldo    Portugal      93        93   405000  58500000  34
##                   height_cm weight_kg attacking_crossing attacking_finishing
## Cristiano Ronaldo       187        83                 84                  94
##                   attacking_heading_accuracy attacking_short_passing
## Cristiano Ronaldo                         89                      83
##                   attacking_volleys skill_dribbling skill_curve
## Cristiano Ronaldo                87              89          81
##                   skill_fk_accuracy skill_long_passing skill_ball_control
## Cristiano Ronaldo                76                 77                 92
##                   movement_acceleration movement_sprint_speed movement_agility
## Cristiano Ronaldo                    89                    91               87
##                   movement_reactions movement_balance power_shot_power
## Cristiano Ronaldo                 96               71               95
##                   power_jumping power_stamina power_strength power_long_shots
## Cristiano Ronaldo            95            85             78               93
##                   mentality_aggression mentality_interceptions
## Cristiano Ronaldo                   63                      29
##                   mentality_positioning mentality_vision mentality_penalties
## Cristiano Ronaldo                    95               82                  85
##                   mentality_composure defending_marking
## Cristiano Ronaldo                  95                28
##                   defending_standing_tackle defending_sliding_tackle
## Cristiano Ronaldo                        32                       24
##                   goalkeeping_diving goalkeeping_handling goalkeeping_kicking
## Cristiano Ronaldo                  7                   11                  15
##                   goalkeeping_positioning goalkeeping_reflexes LogValue
## Cristiano Ronaldo                      14                   11 7.767156
rfifa_bd_gbm <- predict_parts(fifa_gbm_exp_deep, 
                   new_observation = fifa["Cristiano Ronaldo",],
                   type = "break_down")#ok

plot(rfifa_bd_gbm) +
  scale_y_continuous("Predicted value in Euro")+ 
  ggtitle("Break-down plot for Cristiano Ronaldo","") #ok
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

Prediction

rfifa_shap_gbm <- predict_parts(fifa_gbm_exp_deep, 
                     new_observation = fifa["Cristiano Ronaldo",], 
                     type = "shap")

plot(rfifa_shap_gbm, show_boxplots = FALSE) +
  scale_y_continuous("Estimated value in Euro")+ 
  ggtitle("Shapley values for Cristiano Ronaldo","")#ok

Select variable

selected_variables <- c("movement_reactions", "skill_ball_control", 
                        "mentality_positioning", "attacking_finishing")

fifa_cp_gbm <- predict_profile(fifa_gbm_exp_deep,
                   new_observation = fifa["Cristiano Ronaldo",], 
                         variables = selected_variables)
p1 <- plot(subtitle="Cristiano Ronaldo", fifa_cp_gbm, variables=selected_variables)#ok
p1

Observation R. Lewandowski

fifa_bd_gbm <- predict_parts(fifa_gbm_exp_deep, 
                   new_observation = fifa["R. Lewandowski",],
                   type = "break_down")#ok

plot(fifa_bd_gbm) +
  scale_y_continuous("Predicted value in Euro")+ 

  ggtitle("Break-down plot for Robert Lewandowski","") #ok
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

Prediction

fifa_shap_gbm <- predict_parts(fifa_gbm_exp_deep, 
                     new_observation = fifa["R. Lewandowski",], 
                     type = "shap")

plot(fifa_shap_gbm, show_boxplots = FALSE) +
  scale_y_continuous("Estimated value in Euro")+ 

  ggtitle("Shapley values for Robert Lewandowski","")#ok

The predict_profile

selected_variables <- c("movement_reactions", "skill_ball_control", "mentality_positioning", "attacking_finishing")
fifa_cp_gbm <- predict_profile(fifa_gbm_exp_deep,
                   new_observation = fifa["R. Lewandowski",], 
                         variables = selected_variables)
p2 <- plot(subtitle="R. Lewandowski",fifa_cp_gbm, variables = selected_variables)#ok
p2

plot all

Plot hasil analisa

library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
pt <- grid.arrange(p1,p2)

pt
## TableGrob (2 x 1) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]