dime package : Deep Interactive Model Explanations
- The dime package is still in the experimental phase.
- With the dime package you can combine any number of interactive widgets into a single dashboard. You can connect local, global explanations or EDA tools like histograms or barplots
# message=FALSE
# devtools::install_github("ModelOriented/dime")
# devtools::install_github("ModelOriented/ingredients")
# devtools::install_github("ModelOriented/iBreakDown")
# install.packages("DALEX")
library("dime")
library("DALEX")
library("ingredients")
library("iBreakDown")
# eval=TRUE, echo=TRUE
titanic <- na.omit(titanic)
str(titanic)
## 'data.frame': 2099 obs. of 9 variables:
## $ gender : Factor w/ 2 levels "female","male": 2 2 2 1 1 2 2 1 2 2 ...
## $ age : num 42 13 16 39 16 25 30 28 27 20 ...
## $ class : Factor w/ 7 levels "1st","2nd","3rd",..: 3 3 3 3 3 3 2 2 3 3 ...
## $ embarked: Factor w/ 4 levels "Belfast","Cherbourg",..: 4 4 4 4 4 4 2 2 2 4 ...
## $ country : Factor w/ 48 levels "Argentina","Australia",..: 44 44 44 15 30 44 17 17 26 16 ...
## $ fare : num 7.11 20.05 20.05 20.05 7.13 ...
## $ sibsp : num 0 0 1 1 0 0 1 1 0 0 ...
## $ parch : num 0 2 1 1 0 0 0 0 0 0 ...
## $ survived: Factor w/ 2 levels "no","yes": 1 1 1 2 2 2 1 2 2 2 ...
## - attr(*, "na.action")= 'omit' Named int 46 90 118 119 122 132 139 145 151 152 ...
## ..- attr(*, "names")= chr "46" "90" "118" "119" ...
head(titanic)
## gender age class embarked country fare sibsp parch survived
## 1 male 42 3rd Southampton United States 7.11 0 0 no
## 2 male 13 3rd Southampton United States 20.05 0 2 no
## 3 male 16 3rd Southampton United States 20.05 1 1 no
## 4 female 39 3rd Southampton England 20.05 1 1 yes
## 5 female 16 3rd Southampton Norway 7.13 0 0 yes
## 6 male 25 3rd Southampton United States 7.13 0 0 yes
# eval=TRUE, echo=TRUE
set.seed(1313)
titanic_small <- titanic[sample(1:nrow(titanic), 500), c(1,2,3,6,7,9)]
str(titanic_small)
## 'data.frame': 500 obs. of 6 variables:
## $ gender : Factor w/ 2 levels "female","male": 1 2 2 2 2 2 2 2 2 1 ...
## $ age : num 36 21 33 56 30 28 44 37 20 35 ...
## $ class : Factor w/ 7 levels "1st","2nd","3rd",..: 7 3 7 1 7 7 1 2 3 3 ...
## $ fare : num 0 7.17 0 26.11 0 ...
## $ sibsp : num 0 0 0 0 0 0 0 1 0 0 ...
## $ survived: Factor w/ 2 levels "no","yes": 2 1 1 1 1 1 1 1 1 2 ...
# eval=TRUE, echo=TRUE
model_titanic_glm <- glm(survived == "yes" ~ gender + age + fare + class + sibsp,
data = titanic_small, family = "binomial")
summary(model_titanic_glm) # survived == "no" 일때는 베타값의 부호만 바뀜
##
## Call:
## glm(formula = survived == "yes" ~ gender + age + fare + class +
## sibsp, family = "binomial", data = titanic_small)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.8060 -0.7000 -0.4697 0.5691 2.3537
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.650986 0.697730 5.233 1.67e-07 ***
## gendermale -2.845227 0.330574 -8.607 < 2e-16 ***
## age -0.037618 0.010743 -3.502 0.000463 ***
## fare 0.002362 0.003536 0.668 0.504176
## class2nd -1.319498 0.498139 -2.649 0.008076 **
## class3rd -2.181320 0.494408 -4.412 1.02e-05 ***
## classdeck crew 0.654111 0.681273 0.960 0.336990
## classengineering crew -0.860380 0.506977 -1.697 0.089682 .
## classrestaurant staff -3.087216 1.223061 -2.524 0.011597 *
## classvictualling crew -0.921083 0.472174 -1.951 0.051090 .
## sibsp -0.015643 0.206190 -0.076 0.939525
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 617.49 on 499 degrees of freedom
## Residual deviance: 462.51 on 489 degrees of freedom
## AIC: 484.51
##
## Number of Fisher Scoring iterations: 5
# eval=TRUE, echo=TRUE
explain_titanic_glm <- explain(model_titanic_glm,
data = titanic_small[,-6],
y = titanic_small$survived == "yes",
label = "glm")
new_observations <- titanic_small[1:4,-6]
rownames(new_observations) <- c("Lisa", "James", "Thomas", "Nancy")
# It’s very easy to generate such website. Just create an explainer and call the modelStudio() function.
modelStudio(explain_titanic_glm,
new_observations,
facet_dim = c(2,2), N = 200, B = 20, time = 0)
##
|
| | 0%
|
|======= | 11%
|
|============== | 22%
|
|====================== | 33%
|
|============================= | 44%
|
|==================================== | 56%
|
|=========================================== | 67%
|
|=================================================== | 78%
|
|========================================================== | 89%
|
|=================================================================| 100%
***