library("DALEX")
## Warning: package 'DALEX' was built under R version 4.1.2
## Welcome to DALEX (version: 2.3.0).
## Find examples and detailed introduction at: http://ema.drwhy.ai/
## Additional features will be available after installation of: ggpubr.
## Use 'install_dependencies()' to get all suggested dependencies
head(titanic_imputed)
##   gender age class    embarked  fare sibsp parch survived
## 1   male  42   3rd Southampton  7.11     0     0        0
## 2   male  13   3rd Southampton 20.05     0     2        0
## 3   male  16   3rd Southampton 20.05     1     1        0
## 4 female  39   3rd Southampton 20.05     1     1        1
## 5 female  16   3rd Southampton  7.13     0     0        1
## 6   male  25   3rd Southampton  7.13     0     0        1
library("ranger")
## Warning: package 'ranger' was built under R version 4.1.2
model_titanic_rf <- ranger(survived ~ gender + age + class + embarked +
                             fare + sibsp + parch,  data = titanic_imputed,
                           classification = TRUE)
model_titanic_rf
## Ranger result
## 
## Call:
##  ranger(survived ~ gender + age + class + embarked + fare + sibsp +      parch, data = titanic_imputed, classification = TRUE) 
## 
## Type:                             Classification 
## Number of trees:                  500 
## Sample size:                      2207 
## Number of independent variables:  7 
## Mtry:                             2 
## Target node size:                 1 
## Variable importance mode:         none 
## Splitrule:                        gini 
## OOB prediction error:             19.35 %
library("DALEX")
explain_titanic_rf <- explain(model_titanic_rf, 
                              data = titanic_imputed,
                              y = titanic_imputed$survived, 
                              label = "Random Forest v7",
                              colorize = FALSE)
## Preparation of a new explainer is initiated
##   -> model label       :  Random Forest v7 
##   -> data              :  2207  rows  8  cols 
##   -> target variable   :  2207  values 
##   -> predict function  :  yhat.ranger  will be used (  default  )
##   -> predicted values  :  No value for predict function target column. (  default  )
##   -> model_info        :  package ranger , ver. 0.13.1 , task classification (  default  ) 
##   -> predicted values  :  numerical, min =  0 , mean =  0.2115995 , max =  1  
##   -> residual function :  difference between y and yhat (  default  )
##   -> residuals         :  numerical, min =  -1 , mean =  0.1110104 , max =  1  
##   A new explainer has been created!
vi_rf <- model_parts(explain_titanic_rf)
head(vi_rf)
##       variable mean_dropout_loss            label
## 1 _full_model_         0.2131350 Random Forest v7
## 2     survived         0.2130589 Random Forest v7
## 3        parch         0.2298854 Random Forest v7
## 4        sibsp         0.2312393 Random Forest v7
## 5     embarked         0.2349043 Random Forest v7
## 6         fare         0.2594464 Random Forest v7
plot(vi_rf)

vr_age  <- model_profile(explain_titanic_rf, variables =  "age")
head(vr_age)
## $cp_profiles
## Top profiles    : 
##       gender        age class  embarked    fare sibsp parch survived _yhat_
## 168   female  0.1666667   3rd Cherbourg 15.0411     0     2        0      1
## 168.1 female  2.0000000   3rd Cherbourg 15.0411     0     2        0      1
## 168.2 female  4.0000000   3rd Cherbourg 15.0411     0     2        0      1
## 168.3 female  7.0000000   3rd Cherbourg 15.0411     0     2        0      1
## 168.4 female  9.0000000   3rd Cherbourg 15.0411     0     2        0      1
## 168.5 female 13.0000000   3rd Cherbourg 15.0411     0     2        0      1
##       _vname_ _ids_          _label_
## 168       age   168 Random Forest v7
## 168.1     age   168 Random Forest v7
## 168.2     age   168 Random Forest v7
## 168.3     age   168 Random Forest v7
## 168.4     age   168 Random Forest v7
## 168.5     age   168 Random Forest v7
## 
## 
## Top observations:
##      gender age            class    embarked    fare sibsp parch survived
## 168  female  40              3rd   Cherbourg 15.0411     0     2        0
## 543    male  24              2nd Southampton 73.1000     2     0        0
## 613    male  26              3rd Southampton  7.1506     0     0        1
## 1460   male  32 restaurant staff Southampton  0.0000     0     0        0
## 1359   male  40 victualling crew Southampton  0.0000     0     0        0
## 1554   male  38 engineering crew Southampton  0.0000     0     0        0
##      _yhat_          _label_ _ids_
## 168       0 Random Forest v7     1
## 543       0 Random Forest v7     2
## 613       0 Random Forest v7     3
## 1460      0 Random Forest v7     4
## 1359      0 Random Forest v7     5
## 1554      0 Random Forest v7     6
## 
## $agr_profiles
## Top profiles    : 
##   _vname_          _label_        _x_ _yhat_ _ids_
## 1     age Random Forest v7  0.1666667   0.56     0
## 2     age Random Forest v7  2.0000000   0.59     0
## 3     age Random Forest v7  4.0000000   0.60     0
## 4     age Random Forest v7  7.0000000   0.54     0
## 5     age Random Forest v7  9.0000000   0.49     0
## 6     age Random Forest v7 13.0000000   0.29     0
## 
## $color
## [1] "#4378bf"
plot(vr_age)

vr_class  <- model_profile(explain_titanic_rf, variables =  "class")
## 'variable_type' changed to 'categorical' due to lack of numerical variables.
plot(vr_class)

new_passanger <- data.frame(
  class = factor("1st", levels = c("1st", "2nd", "3rd", "deck crew", "engineering crew", "restaurant staff", "victualling crew")),
  gender = factor("male", levels = c("female", "male")),
  age = 8,
  sibsp = 0,
  parch = 0,
  fare = 72,
  embarked = factor("Southampton", levels = c("Belfast", "Cherbourg", "Queenstown", "Southampton"))
)

sp_rf <- predict_parts(explain_titanic_rf, new_passanger)
plot(sp_rf)

shap_henry <- predict_parts( explain_titanic_rf, 
                            new_passanger, 
                            type = "shap")
plot(shap_henry)

plot(shap_henry, show_boxplots = FALSE)