library("DALEX")
## Warning: package 'DALEX' was built under R version 4.1.2
## Welcome to DALEX (version: 2.3.0).
## Find examples and detailed introduction at: http://ema.drwhy.ai/
## Additional features will be available after installation of: ggpubr.
## Use 'install_dependencies()' to get all suggested dependencies
head(titanic_imputed)
## gender age class embarked fare sibsp parch survived
## 1 male 42 3rd Southampton 7.11 0 0 0
## 2 male 13 3rd Southampton 20.05 0 2 0
## 3 male 16 3rd Southampton 20.05 1 1 0
## 4 female 39 3rd Southampton 20.05 1 1 1
## 5 female 16 3rd Southampton 7.13 0 0 1
## 6 male 25 3rd Southampton 7.13 0 0 1
library("ranger")
## Warning: package 'ranger' was built under R version 4.1.2
model_titanic_rf <- ranger(survived ~ gender + age + class + embarked +
fare + sibsp + parch, data = titanic_imputed,
classification = TRUE)
model_titanic_rf
## Ranger result
##
## Call:
## ranger(survived ~ gender + age + class + embarked + fare + sibsp + parch, data = titanic_imputed, classification = TRUE)
##
## Type: Classification
## Number of trees: 500
## Sample size: 2207
## Number of independent variables: 7
## Mtry: 2
## Target node size: 1
## Variable importance mode: none
## Splitrule: gini
## OOB prediction error: 19.35 %
library("DALEX")
explain_titanic_rf <- explain(model_titanic_rf,
data = titanic_imputed,
y = titanic_imputed$survived,
label = "Random Forest v7",
colorize = FALSE)
## Preparation of a new explainer is initiated
## -> model label : Random Forest v7
## -> data : 2207 rows 8 cols
## -> target variable : 2207 values
## -> predict function : yhat.ranger will be used ( default )
## -> predicted values : No value for predict function target column. ( default )
## -> model_info : package ranger , ver. 0.13.1 , task classification ( default )
## -> predicted values : numerical, min = 0 , mean = 0.2115995 , max = 1
## -> residual function : difference between y and yhat ( default )
## -> residuals : numerical, min = -1 , mean = 0.1110104 , max = 1
## A new explainer has been created!
vi_rf <- model_parts(explain_titanic_rf)
head(vi_rf)
## variable mean_dropout_loss label
## 1 _full_model_ 0.2131350 Random Forest v7
## 2 survived 0.2130589 Random Forest v7
## 3 parch 0.2298854 Random Forest v7
## 4 sibsp 0.2312393 Random Forest v7
## 5 embarked 0.2349043 Random Forest v7
## 6 fare 0.2594464 Random Forest v7
plot(vi_rf)

vr_age <- model_profile(explain_titanic_rf, variables = "age")
head(vr_age)
## $cp_profiles
## Top profiles :
## gender age class embarked fare sibsp parch survived _yhat_
## 168 female 0.1666667 3rd Cherbourg 15.0411 0 2 0 1
## 168.1 female 2.0000000 3rd Cherbourg 15.0411 0 2 0 1
## 168.2 female 4.0000000 3rd Cherbourg 15.0411 0 2 0 1
## 168.3 female 7.0000000 3rd Cherbourg 15.0411 0 2 0 1
## 168.4 female 9.0000000 3rd Cherbourg 15.0411 0 2 0 1
## 168.5 female 13.0000000 3rd Cherbourg 15.0411 0 2 0 1
## _vname_ _ids_ _label_
## 168 age 168 Random Forest v7
## 168.1 age 168 Random Forest v7
## 168.2 age 168 Random Forest v7
## 168.3 age 168 Random Forest v7
## 168.4 age 168 Random Forest v7
## 168.5 age 168 Random Forest v7
##
##
## Top observations:
## gender age class embarked fare sibsp parch survived
## 168 female 40 3rd Cherbourg 15.0411 0 2 0
## 543 male 24 2nd Southampton 73.1000 2 0 0
## 613 male 26 3rd Southampton 7.1506 0 0 1
## 1460 male 32 restaurant staff Southampton 0.0000 0 0 0
## 1359 male 40 victualling crew Southampton 0.0000 0 0 0
## 1554 male 38 engineering crew Southampton 0.0000 0 0 0
## _yhat_ _label_ _ids_
## 168 0 Random Forest v7 1
## 543 0 Random Forest v7 2
## 613 0 Random Forest v7 3
## 1460 0 Random Forest v7 4
## 1359 0 Random Forest v7 5
## 1554 0 Random Forest v7 6
##
## $agr_profiles
## Top profiles :
## _vname_ _label_ _x_ _yhat_ _ids_
## 1 age Random Forest v7 0.1666667 0.56 0
## 2 age Random Forest v7 2.0000000 0.59 0
## 3 age Random Forest v7 4.0000000 0.60 0
## 4 age Random Forest v7 7.0000000 0.54 0
## 5 age Random Forest v7 9.0000000 0.49 0
## 6 age Random Forest v7 13.0000000 0.29 0
##
## $color
## [1] "#4378bf"
plot(vr_age)

vr_class <- model_profile(explain_titanic_rf, variables = "class")
## 'variable_type' changed to 'categorical' due to lack of numerical variables.
plot(vr_class)

new_passanger <- data.frame(
class = factor("1st", levels = c("1st", "2nd", "3rd", "deck crew", "engineering crew", "restaurant staff", "victualling crew")),
gender = factor("male", levels = c("female", "male")),
age = 8,
sibsp = 0,
parch = 0,
fare = 72,
embarked = factor("Southampton", levels = c("Belfast", "Cherbourg", "Queenstown", "Southampton"))
)
sp_rf <- predict_parts(explain_titanic_rf, new_passanger)
plot(sp_rf)

shap_henry <- predict_parts( explain_titanic_rf,
new_passanger,
type = "shap")
plot(shap_henry)

plot(shap_henry, show_boxplots = FALSE)
