Course analysis, determine which groups are most influential, and which variables are important.
dg <- read.csv("d:/CANADA/Final_Project_359.csv")
mp_model_lm <- DALEX::model_performance(explainer_model_lm)
mp_model_rf <- DALEX::model_performance(explainer_model_rf)
mp_model_svm <- DALEX::model_performance(explainer_model_svm)
plot(mp_model_lm,mp_model_rf,mp_model_svm)#ok
plot(mp_model_lm,mp_model_rf,mp_model_svm,geom = "boxplot")
set.seed(1)#CO_green age_green gender_green
vars <- c("CO_green","age_green","gender_green")
model_parts(explainer = explainer_model_rf,
loss_function = loss_root_mean_square,
B = 1,
variables = vars)
## variable mean_dropout_loss label
## 1 _full_model_ 0.8762281 randomForest
## 2 gender_green 0.9842917 randomForest
## 3 CO_green 1.1246204 randomForest
## 4 age_green 1.2893214 randomForest
## 5 _baseline_ 1.3938454 randomForest
(vip.30 <- model_parts(explainer = explainer_model_rf,
loss_function = loss_root_mean_square,
B = 30,
type = "difference"))
## variable mean_dropout_loss label
## 1 _full_model_ 0.0000000 randomForest
## 2 gender_green 0.1186020 randomForest
## 3 CO_green 0.2299802 randomForest
## 4 age_green 0.4353821 randomForest
## 5 _baseline_ 0.6180254 randomForest
library("ggplot2")
p1 <- plot(vip.30) +
ggtitle("Green-Mean variable-importance over 30 permutations", "")
p1
### Preparation for Red Group
library("ggplot2")
p2 <- plot(rvip.30) +
ggtitle("Red-Mean variable-importance over 30 permutations", "")
p2
library("ggplot2")
p3 <- plot(bvip.30) +
ggtitle("Blue-Mean variable-importance over 30 permutations", "")
p3
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:randomForest':
##
## combine
## The following object is masked from 'package:dplyr':
##
## combine
ptot <- grid.arrange(p1,p2,p3)#ok
ptot
## TableGrob (3 x 1) "arrange": 3 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]