ranger, vivid
## https://cran.r-project.org/web/packages/vivid/vivid.pdf
library(vivid)
library(ranger)
dim(airquality)
## [1] 153 6
aq <- na.omit(airquality)
dim(aq)
## [1] 111 6
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 7 23 299 8.6 65 5 7
## 8 19 99 13.8 59 5 8
aq <- aq[1:20,]# for speed
rF <- ranger(Ozone ~ ., data = aq, importance = "permutation")
myMat <- vivi(fit = rF, data = aq, response = "Ozone")
myDf <- as.data.frame(myMat)
dim(myDf)
## [1] 25 6
## Variable_1 Variable_2 Value Measure Row Col
## 1 Solar.R Solar.R 2.8394360 Vimp 1 1
## 2 Day Solar.R 0.5026187 Vint 2 1
## 3 Wind Solar.R 0.2188328 Vint 3 1
## 4 Temp Solar.R 0.2685547 Vint 4 1
## 5 Month Solar.R 0.0000000 Vint 5 1
## 6 Solar.R Day 0.5026187 Vint 1 2
# Load in the data:
aq <- na.omit(airquality)
f <- lm(Ozone ~ ., data = aq)
pdpPairs(aq, f, "Ozone")

# Run a ranger model:
library(ranger)
library(MASS)
data(Boston)
head(Boston)
## crim zn indus chas nox rm age dis rad tax ptratio black lstat
## 1 0.00632 18 2.31 0 0.538 6.575 65.2 4.0900 1 296 15.3 396.90 4.98
## 2 0.02731 0 7.07 0 0.469 6.421 78.9 4.9671 2 242 17.8 396.90 9.14
## 3 0.02729 0 7.07 0 0.469 7.185 61.1 4.9671 2 242 17.8 392.83 4.03
## 4 0.03237 0 2.18 0 0.458 6.998 45.8 6.0622 3 222 18.7 394.63 2.94
## 5 0.06905 0 2.18 0 0.458 7.147 54.2 6.0622 3 222 18.7 396.90 5.33
## 6 0.02985 0 2.18 0 0.458 6.430 58.7 6.0622 3 222 18.7 394.12 5.21
## medv
## 1 24.0
## 2 21.6
## 3 34.7
## 4 33.4
## 5 36.2
## 6 28.7
Boston1 <- Boston[, c(4:6, 8, 13:14)]
Boston1$chas <- factor(Boston1$chas)
fit <- ranger(medv ~ ., data = Boston1, importance = "permutation")
pdpPairs(Boston1[1:30, ], fit, "medv")

pdpPairs(Boston1[1:30, ], fit, "medv", comboImage = TRUE)

viv <- vivi(Boston1, fit, "medv")
# show top variables only
pdpPairs(Boston1[1:30, ], fit, "medv", comboImage = TRUE, vars = rownames(viv)[1:4])

library(ranger)
rf <- ranger(Species ~ ., data = iris, probability = TRUE)
pdpPairs(iris, rf, "Species") # prediction probs for first class, setosa

pdpPairs(iris, rf, "Species", class = "versicolor") # prediction probs versicolor

# Load in the data:
aq <- na.omit(airquality)
fit <- lm(Ozone ~ ., data = aq)
pdpVars(aq, fit, "Ozone")

# Classification
library(ranger)
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
rfClassif <- ranger(Species ~ ., data = iris, probability = TRUE)
pdpVars(iris, rfClassif, "Species", class = 3)

pp <- pdpVars(iris, rfClassif, "Species", class = 2, draw = FALSE)
pp[[1]]

pdpVars(iris, rfClassif, "Species", class = 2, colorVar = "Species")

## Not run:
library(MASS)
library(ranger)
Boston1 <- Boston
Boston1$chas <- factor(Boston1$chas)
rf <- ranger(medv ~ ., data = Boston1)
pdpZen(Boston1[1:30, ], rf, response = "medv", zpath = names(Boston1)[1:4], comboImage = T) # Find the top variables in rf

set.seed(123)
viv <- vivi(Boston1, rf, "medv", nmax = 30) # use 30 rows, for speed
pdpZen(Boston1, rf, response = "medv", zpath = rownames(viv)[1:4], comboImage = T)

zpath <- zPath(viv, cutoff = .2) # find plots whose interaction score exceeds .2
pdpZen(Boston1, rf, response = "medv", zpath = zpath, comboImage = T)

randomForest,
vivid
# Select importance metric
library(randomForest)
rf1 <- randomForest(Ozone~., data = aq, importance = TRUE)
m2 <- vivi(fit = rf1, data = aq, response = 'Ozone',
importanceType = '%IncMSE') # select %IncMSE as the importance measure
viviHeatmap(m2)

library(ranger)
rf <- ranger(Species ~ ., data = iris, importance = "impurity", probability = TRUE)
vivi(fit = rf, data = iris, response = "Species") # returns agnostic importance
## Petal.Length Petal.Width Sepal.Length Sepal.Width
## Petal.Length 0.3240889 7.4730141 7.36318184 5.05911063
## Petal.Width 7.4730141 0.2977507 7.34588033 4.77553746
## Sepal.Length 7.3631818 7.3458803 0.02368022 4.61923203
## Sepal.Width 5.0591106 4.7755375 4.61923203 0.01133608
## attr(,"class")
## [1] "vivid" "matrix" "array"
vivi(fit = rf, data = iris, response = "Species",
importanceType = "impurity") # returns selected 'impurity' importance.
## Petal.Width Petal.Length Sepal.Length Sepal.Width
## Petal.Width 43.171833 7.644195 7.584293 5.628961
## Petal.Length 7.644195 42.449346 7.720061 5.857848
## Sepal.Length 7.584293 7.720061 8.779035 4.940441
## Sepal.Width 5.628961 5.857848 4.940441 1.316285
## attr(,"class")
## [1] "vivid" "matrix" "array"
f <- lm(Sepal.Length ~ ., data = iris[, -5])
m <- vivi(fit = f, data = iris[, -5], response = "Sepal.Length")
corimp <- abs(cor(iris[, -5])[1, -1])
viviUpdate(m, corimp) # use correlation as importance and reorder
## Petal.Width Petal.Length Sepal.Width
## Petal.Width 0.8179411 0.0000000 0.0000000
## Petal.Length 0.0000000 0.8717538 0.0000000
## Sepal.Width 0.0000000 0.0000000 0.1175698
## attr(,"class")
## [1] "vivid" "matrix" "array"