library(randomForestExplainer)
library(randomForest)
library(tidyverse)
Explains a random forest in a html document using plots created by randomForestExplainer
#install.packages("randomForestExplainer")
forest <- randomForest::randomForest(Species ~ ., data = iris, localImp = TRUE)
#explain
suppressPackageStartupMessages(suppressMessages(suppressWarnings(explain_forest(forest, interactions = TRUE))))
|
| | 0%
|
|... | 5%
inline R code fragments
|
|...... | 10%
label: setup (with options)
List of 1
$ include: logi FALSE
|
|.......... | 15%
ordinary text without R code
|
|............. | 20%
label: unnamed-chunk-3
|
|................ | 25%
ordinary text without R code
|
|.................... | 30%
label: unnamed-chunk-4
|
|....................... | 35%
ordinary text without R code
|
|.......................... | 40%
label: unnamed-chunk-5
|
|............................. | 45%
ordinary text without R code
|
|................................ | 50%
label: unnamed-chunk-6
|
|.................................... | 55%
ordinary text without R code
|
|....................................... | 60%
label: unnamed-chunk-7
|
|.......................................... | 65%
inline R code fragments
[1] accuracy_decrease and gini_decrease
|
|.............................................. | 70%
label: unnamed-chunk-8
|
|................................................. | 75%
inline R code fragments
[1] mean_min_depth, accuracy_decrease, gini_decrease, no_of_nodes, times_a_root
|
|.................................................... | 80%
label: unnamed-chunk-9 (with options)
List of 2
$ fig.width : num 10
$ fig.height: num 9
|
|....................................................... | 85%
ordinary text without R code
|
|.......................................................... | 90%
label: unnamed-chunk-10 (with options)
List of 2
$ fig.width : num 10
$ fig.height: num 9
|
|.............................................................. | 95%
ordinary text without R code
|
|.................................................................| 100%
label: conditional_print (with options)
List of 2
$ child: chr "Explain_forest_template_interactions.Rmd"
$ eval : symbol interactions
|
| | 0%
|
|......... | 14%
inline R code fragments
[1] Petal.Length, Petal.Width, Sepal.Length, Sepal.Width
|
|................... | 29%
label: unnamed-chunk-11
|
|............................ | 43%
ordinary text without R code
|
|..................................... | 57%
label: unnamed-chunk-12
|
|.............................................. | 71%
inline R code fragments
|
|........................................................ | 86%
label: unnamed-chunk-13
|
|.................................................................| 100%
ordinary text without R code
/Applications/RStudio.app/Contents/MacOS/pandoc/pandoc +RTS -K512m -RTS Explain_forest_template.utf8.md --to html --from markdown+autolink_bare_uris+ascii_identifiers+tex_math_single_backslash --output /Users/nanaakwasiabayieboateng/Documents/memphisclassesbooks/DataMiningscience/Your_forest_explained.html --smart --email-obfuscation none --self-contained --standalone --section-divs --table-of-contents --toc-depth 3 --variable toc_float=1 --variable toc_selectors=h1,h2,h3 --variable toc_collapsed=1 --variable toc_smooth_scroll=1 --variable toc_print=1 --template /Library/Frameworks/R.framework/Versions/3.4/Resources/library/rmarkdown/rmd/h/default.html --no-highlight --variable highlightjs=1 --variable 'theme:bootstrap' --include-in-header /var/folders/mj/w1gxzjcd0qx2cw_0690z7y640000gn/T//Rtmp9LrmXm/rmarkdown-str351f599a15d3.html --mathjax --variable 'mathjax-url:https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML'
#suppressWarnings()
Get the names of k variables with highest sum of rankings based on the specified importance mea- sures
important_variables(measure_importance(forest), k = 3)
[1] "Petal.Length" "Petal.Width" "Sepal.Length"
Importance of variables in a random forest
measure_importance(forest, mean_sample = "top_trees", measures = NULL)
mean_sample has value “all_trees”, “top_trees”, “relevant_trees”
measure_importance(forest)
Calculate minimal depth distribution of a random forest
min_depth_distribution(forest)%>%head()
Calculate mean conditional minimal depth with respect to a vector of variables
min_depth_interactions(forest,
vars = important_variables(measure_importance(forest)),
mean_sample = "top_trees", uncond_mean_sample = mean_sample)
forest <- randomForest::randomForest(Species ~ ., data = iris, ntree = 100)
min_depth_interactions(forest, c("Petal.Width", "Petal.Length"))%>%head()
Plot importance measures with ggpairs Plot selected measures of importance of variables in a forest using ggpairs
plot_importance_ggpairs(importance_frame,
measures = names(importance_frame)[c(2, 4, 5, 3, 7)],
main = "Relations between measures of importance")
forest <- randomForest::randomForest(Species ~ ., data = iris, localImp = TRUE, ntree = 200)
frame <- measure_importance(forest, measures = c("mean_min_depth", "times_a_root"))
plot_importance_ggpairs(frame, measures = c("mean_min_depth", "times_a_root"))
Plot importance measures rankings with ggpairs Description Plot against each other rankings of variables according to various measures of importance
plot_importance_rankings(importance_frame,
measures = names(importance_frame)[c(2, 4, 5, 3, 7)],
main = "Relations between rankings according to different measures")
forest <- randomForest::randomForest(Species ~ ., data = iris, localImp = TRUE, ntree = 300)
frame <- measure_importance(forest, measures = c("mean_min_depth", "times_a_root"))
plot_importance_ggpairs(frame, measures = c("mean_min_depth", "times_a_root"))
Plot the distribution of minimal depth in a random forest Description Plot the distribution of minimal depth in a random forest
plot_min_depth_distribution(min_depth_frame, k = 10, min_no_of_trees = 0,
mean_sample = "top_trees", mean_scale = FALSE, mean_round = 2,
main = "Distribution of minimal depth and its mean")
forest <- randomForest::randomForest(Species ~ ., data = iris, ntree = 300)
plot_min_depth_distribution(min_depth_distribution(forest))
Plot the top mean conditional minimal depth
plot_min_depth_interactions(interactions_frame, k = 30,
main = paste0("Mean minimal depth for ", paste0(k,
" most frequent interactions")))
forest <- randomForest::randomForest(Species ~ ., data = iris, ntree = 100)
plot_min_depth_interactions(min_depth_interactions(forest, c("Petal.Width", "Petal.Length")))
Multi-way importance plot
Plot two or three measures of importance of variables in a random fores. Choose importance mea- sures from the colnames(importance_frame).
forest <- randomForest::randomForest(Species ~ ., data = iris, localImp = TRUE)
plot_multi_way_importance(measure_importance(forest))
Plot the prediction of the forest for a grid of values of two numerical variables
Plot the prediction of the forest for a grid of values of two numerical variables
forest <- randomForest::randomForest(Species ~., data = iris)
plot_predict_interaction(forest, iris, "Petal.Width", "Sepal.Width")
NA