Course analysis, determine which groups are most influential, and which variables are important.

Data preparation

Upload library

Read data

Preparation for Green group

dg <- read.csv("d:/CANADA/Final_Project_359.csv")

Create Model(hide)

Model explainer(hide)

Outlayer

mp_model_lm <- DALEX::model_performance(explainer_model_lm)
mp_model_rf <- DALEX::model_performance(explainer_model_rf)
mp_model_svm <- DALEX::model_performance(explainer_model_svm)

Plot Model

plot(mp_model_lm,mp_model_rf,mp_model_svm)#ok

plot(mp_model_lm,mp_model_rf,mp_model_svm,geom = "boxplot")

Breakdown Variable

set.seed(1)#CO_green age_green gender_green
vars <- c("CO_green","age_green","gender_green")
model_parts(explainer = explainer_model_rf, 
        loss_function = loss_root_mean_square,
                    B = 1,
            variables = vars)
##       variable mean_dropout_loss        label
## 1 _full_model_         0.8762281 randomForest
## 2 gender_green         0.9842917 randomForest
## 3     CO_green         1.1246204 randomForest
## 4    age_green         1.2893214 randomForest
## 5   _baseline_         1.3938454 randomForest
 (vip.30 <- model_parts(explainer = explainer_model_rf, 
                    loss_function = loss_root_mean_square,
                                B = 30,
                             type = "difference"))
##       variable mean_dropout_loss        label
## 1 _full_model_         0.0000000 randomForest
## 2 gender_green         0.1186020 randomForest
## 3     CO_green         0.2299802 randomForest
## 4    age_green         0.4353821 randomForest
## 5   _baseline_         0.6180254 randomForest

Plot

library("ggplot2")
p1 <- plot(vip.30) +
  ggtitle("Green-Mean variable-importance over 30 permutations", "")
p1

### Preparation for Red Group

outlayer

Breakdown

library("ggplot2")
p2 <- plot(rvip.30) +
  ggtitle("Red-Mean variable-importance over 30 permutations", "")
p2

Preparation for Blue group

outlayer

Breakdown

library("ggplot2")

p3 <- plot(bvip.30) +
  ggtitle("Blue-Mean variable-importance over 30 permutations", "")
p3

Plot all analysis

library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:randomForest':
## 
##     combine
## The following object is masked from 'package:dplyr':
## 
##     combine
ptot <- grid.arrange(p1,p2,p3)#ok

ptot
## TableGrob (3 x 1) "arrange": 3 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]