1 ranger, vivid

## https://cran.r-project.org/web/packages/vivid/vivid.pdf
library(vivid)

library(ranger)
dim(airquality)
## [1] 153   6
aq <- na.omit(airquality)
dim(aq)
## [1] 111   6
head(aq)
##   Ozone Solar.R Wind Temp Month Day
## 1    41     190  7.4   67     5   1
## 2    36     118  8.0   72     5   2
## 3    12     149 12.6   74     5   3
## 4    18     313 11.5   62     5   4
## 7    23     299  8.6   65     5   7
## 8    19      99 13.8   59     5   8
aq <- aq[1:20,]# for speed
rF <- ranger(Ozone ~ ., data = aq, importance = "permutation")
myMat <- vivi(fit = rF, data = aq, response = "Ozone")
myDf <- as.data.frame(myMat)
dim(myDf)
## [1] 25  6
head(myDf)
##   Variable_1 Variable_2     Value Measure Row Col
## 1    Solar.R    Solar.R 2.8394360    Vimp   1   1
## 2        Day    Solar.R 0.5026187    Vint   2   1
## 3       Wind    Solar.R 0.2188328    Vint   3   1
## 4       Temp    Solar.R 0.2685547    Vint   4   1
## 5      Month    Solar.R 0.0000000    Vint   5   1
## 6    Solar.R        Day 0.5026187    Vint   1   2
# Load in the data:
aq <- na.omit(airquality)
f <- lm(Ozone ~ ., data = aq)
pdpPairs(aq, f, "Ozone")

# Run a ranger model:
library(ranger)
library(MASS)
data(Boston)
head(Boston)
##      crim zn indus chas   nox    rm  age    dis rad tax ptratio  black lstat
## 1 0.00632 18  2.31    0 0.538 6.575 65.2 4.0900   1 296    15.3 396.90  4.98
## 2 0.02731  0  7.07    0 0.469 6.421 78.9 4.9671   2 242    17.8 396.90  9.14
## 3 0.02729  0  7.07    0 0.469 7.185 61.1 4.9671   2 242    17.8 392.83  4.03
## 4 0.03237  0  2.18    0 0.458 6.998 45.8 6.0622   3 222    18.7 394.63  2.94
## 5 0.06905  0  2.18    0 0.458 7.147 54.2 6.0622   3 222    18.7 396.90  5.33
## 6 0.02985  0  2.18    0 0.458 6.430 58.7 6.0622   3 222    18.7 394.12  5.21
##   medv
## 1 24.0
## 2 21.6
## 3 34.7
## 4 33.4
## 5 36.2
## 6 28.7
Boston1 <- Boston[, c(4:6, 8, 13:14)]
Boston1$chas <- factor(Boston1$chas)
fit <- ranger(medv ~ ., data = Boston1, importance = "permutation")
pdpPairs(Boston1[1:30, ], fit, "medv")

pdpPairs(Boston1[1:30, ], fit, "medv", comboImage = TRUE)

viv <- vivi(Boston1, fit, "medv")
# show top variables only
pdpPairs(Boston1[1:30, ], fit, "medv", comboImage = TRUE, vars = rownames(viv)[1:4])

library(ranger)
rf <- ranger(Species ~ ., data = iris, probability = TRUE)

pdpPairs(iris, rf, "Species") # prediction probs for first class, setosa

pdpPairs(iris, rf, "Species", class = "versicolor") # prediction probs versicolor

# Load in the data:
aq <- na.omit(airquality)
fit <- lm(Ozone ~ ., data = aq)
pdpVars(aq, fit, "Ozone")

# Classification
library(ranger)
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
rfClassif <- ranger(Species ~ ., data = iris, probability = TRUE)
pdpVars(iris, rfClassif, "Species", class = 3)

pp <- pdpVars(iris, rfClassif, "Species", class = 2, draw = FALSE)
pp[[1]]

pdpVars(iris, rfClassif, "Species", class = 2, colorVar = "Species")

## Not run:
library(MASS)
library(ranger)
Boston1 <- Boston
Boston1$chas <- factor(Boston1$chas)
rf <- ranger(medv ~ ., data = Boston1)
pdpZen(Boston1[1:30, ], rf, response = "medv", zpath = names(Boston1)[1:4], comboImage = T) # Find the top variables in rf

set.seed(123)
viv <- vivi(Boston1, rf, "medv", nmax = 30) # use 30 rows, for speed
pdpZen(Boston1, rf, response = "medv", zpath = rownames(viv)[1:4], comboImage = T)

zpath <- zPath(viv, cutoff = .2) # find plots whose interaction score exceeds .2
pdpZen(Boston1, rf, response = "medv", zpath = zpath, comboImage = T)

2 vip, vivid

library(ranger)
library(vip)
aq <- na.omit(airquality) # get data
nameAq <- names(aq[-1]) # get feature names
rF <- ranger(Ozone ~ ., data = aq, importance = "permutation") # create ranger random forest fit 
vImp <- vi(rF) # vip importance
vInt <- vint(rF, feature_names = nameAq) # vip interaction

aq <- na.omit(airquality)
f <- lm(Ozone ~ ., data = aq)
m <- vivi(fit = f, data = aq, response = "Ozone") # as expected all interactions are zero viviHeatmap(m)




aq <- na.omit(airquality)
f <- lm(Ozone ~ ., data = aq)
m <- vivi(fit = f, data = aq, response = "Ozone") # as expected all interactions are zero 
viviHeatmap(m)

3 randomForest, vivid

# Select importance metric
library(randomForest)
rf1 <- randomForest(Ozone~., data = aq, importance = TRUE)
m2 <- vivi(fit = rf1, data = aq, response = 'Ozone',
           importanceType = '%IncMSE') # select %IncMSE as the importance measure
viviHeatmap(m2)

library(ranger)
rf <- ranger(Species ~ ., data = iris, importance = "impurity", probability = TRUE)
vivi(fit = rf, data = iris, response = "Species") # returns agnostic importance
##              Petal.Length Petal.Width Sepal.Length Sepal.Width
## Petal.Length    0.3240889   7.4730141   7.36318184  5.05911063
## Petal.Width     7.4730141   0.2977507   7.34588033  4.77553746
## Sepal.Length    7.3631818   7.3458803   0.02368022  4.61923203
## Sepal.Width     5.0591106   4.7755375   4.61923203  0.01133608
## attr(,"class")
## [1] "vivid"  "matrix" "array"
vivi(fit = rf, data = iris, response = "Species",
     importanceType = "impurity") # returns selected 'impurity' importance.
##              Petal.Width Petal.Length Sepal.Length Sepal.Width
## Petal.Width    43.171833     7.644195     7.584293    5.628961
## Petal.Length    7.644195    42.449346     7.720061    5.857848
## Sepal.Length    7.584293     7.720061     8.779035    4.940441
## Sepal.Width     5.628961     5.857848     4.940441    1.316285
## attr(,"class")
## [1] "vivid"  "matrix" "array"
f <- lm(Sepal.Length ~ ., data = iris[, -5])
m <- vivi(fit = f, data = iris[, -5], response = "Sepal.Length")
corimp <- abs(cor(iris[, -5])[1, -1])
viviUpdate(m, corimp) # use correlation as importance and reorder
##              Petal.Width Petal.Length Sepal.Width
## Petal.Width    0.8179411    0.0000000   0.0000000
## Petal.Length   0.0000000    0.8717538   0.0000000
## Sepal.Width    0.0000000    0.0000000   0.1175698
## attr(,"class")
## [1] "vivid"  "matrix" "array"

4 ml3, vivid

aq <- na.omit(airquality) * 1.0
# Run an mlr3 ranger model:
library(mlr3)
library(mlr3learners)
library(ranger)
ozonet <- TaskRegr$new(id = "airQ", backend = aq, target = "Ozone")
ozonel <- lrn("regr.ranger", importance = "permutation")
ozonef <- ozonel$train(ozonet)
viv <- vivi(aq, ozonef, "Ozone")
# Calculate Zpath:
zpath <- zPath(viv, .8)
zpath
## [1] "Wind"    "Temp"    "Day"     "Temp"    "Solar.R" "Wind"