dime package : Deep Interactive Model Explanations

# message=FALSE

# devtools::install_github("ModelOriented/dime")
# devtools::install_github("ModelOriented/ingredients")
# devtools::install_github("ModelOriented/iBreakDown")
# install.packages("DALEX")

library("dime")
library("DALEX")
library("ingredients")
library("iBreakDown")

# eval=TRUE, echo=TRUE

titanic <- na.omit(titanic)
str(titanic)
## 'data.frame':    2099 obs. of  9 variables:
##  $ gender  : Factor w/ 2 levels "female","male": 2 2 2 1 1 2 2 1 2 2 ...
##  $ age     : num  42 13 16 39 16 25 30 28 27 20 ...
##  $ class   : Factor w/ 7 levels "1st","2nd","3rd",..: 3 3 3 3 3 3 2 2 3 3 ...
##  $ embarked: Factor w/ 4 levels "Belfast","Cherbourg",..: 4 4 4 4 4 4 2 2 2 4 ...
##  $ country : Factor w/ 48 levels "Argentina","Australia",..: 44 44 44 15 30 44 17 17 26 16 ...
##  $ fare    : num  7.11 20.05 20.05 20.05 7.13 ...
##  $ sibsp   : num  0 0 1 1 0 0 1 1 0 0 ...
##  $ parch   : num  0 2 1 1 0 0 0 0 0 0 ...
##  $ survived: Factor w/ 2 levels "no","yes": 1 1 1 2 2 2 1 2 2 2 ...
##  - attr(*, "na.action")= 'omit' Named int  46 90 118 119 122 132 139 145 151 152 ...
##   ..- attr(*, "names")= chr  "46" "90" "118" "119" ...
head(titanic)
##   gender age class    embarked       country  fare sibsp parch survived
## 1   male  42   3rd Southampton United States  7.11     0     0       no
## 2   male  13   3rd Southampton United States 20.05     0     2       no
## 3   male  16   3rd Southampton United States 20.05     1     1       no
## 4 female  39   3rd Southampton       England 20.05     1     1      yes
## 5 female  16   3rd Southampton        Norway  7.13     0     0      yes
## 6   male  25   3rd Southampton United States  7.13     0     0      yes

# eval=TRUE, echo=TRUE

set.seed(1313)
titanic_small <- titanic[sample(1:nrow(titanic), 500), c(1,2,3,6,7,9)]
str(titanic_small)
## 'data.frame':    500 obs. of  6 variables:
##  $ gender  : Factor w/ 2 levels "female","male": 1 2 2 2 2 2 2 2 2 1 ...
##  $ age     : num  36 21 33 56 30 28 44 37 20 35 ...
##  $ class   : Factor w/ 7 levels "1st","2nd","3rd",..: 7 3 7 1 7 7 1 2 3 3 ...
##  $ fare    : num  0 7.17 0 26.11 0 ...
##  $ sibsp   : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ survived: Factor w/ 2 levels "no","yes": 2 1 1 1 1 1 1 1 1 2 ...

# eval=TRUE, echo=TRUE

model_titanic_glm <- glm(survived == "yes" ~ gender + age + fare + class + sibsp,
                         data = titanic_small, family = "binomial")    
summary(model_titanic_glm)    # survived == "no" 일때는 베타값의 부호만 바뀜
## 
## Call:
## glm(formula = survived == "yes" ~ gender + age + fare + class + 
##     sibsp, family = "binomial", data = titanic_small)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.8060  -0.7000  -0.4697   0.5691   2.3537  
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            3.650986   0.697730   5.233 1.67e-07 ***
## gendermale            -2.845227   0.330574  -8.607  < 2e-16 ***
## age                   -0.037618   0.010743  -3.502 0.000463 ***
## fare                   0.002362   0.003536   0.668 0.504176    
## class2nd              -1.319498   0.498139  -2.649 0.008076 ** 
## class3rd              -2.181320   0.494408  -4.412 1.02e-05 ***
## classdeck crew         0.654111   0.681273   0.960 0.336990    
## classengineering crew -0.860380   0.506977  -1.697 0.089682 .  
## classrestaurant staff -3.087216   1.223061  -2.524 0.011597 *  
## classvictualling crew -0.921083   0.472174  -1.951 0.051090 .  
## sibsp                 -0.015643   0.206190  -0.076 0.939525    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 617.49  on 499  degrees of freedom
## Residual deviance: 462.51  on 489  degrees of freedom
## AIC: 484.51
## 
## Number of Fisher Scoring iterations: 5

# eval=TRUE, echo=TRUE

explain_titanic_glm <- explain(model_titanic_glm,
                               data = titanic_small[,-6],
                               y = titanic_small$survived == "yes",
                               label = "glm")

new_observations <- titanic_small[1:4,-6]
rownames(new_observations) <- c("Lisa", "James", "Thomas", "Nancy")

# It’s very easy to generate such website. Just create an explainer and call the modelStudio() function.
modelStudio(explain_titanic_glm,
            new_observations,
            facet_dim = c(2,2), N = 200, B = 20, time = 0)
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=======                                                          |  11%
  |                                                                       
  |==============                                                   |  22%
  |                                                                       
  |======================                                           |  33%
  |                                                                       
  |=============================                                    |  44%
  |                                                                       
  |====================================                             |  56%
  |                                                                       
  |===========================================                      |  67%
  |                                                                       
  |===================================================              |  78%
  |                                                                       
  |==========================================================       |  89%
  |                                                                       
  |=================================================================| 100%

***