library(gt)
library(mlbench)
library(caret)
library(skimr)
library(AppliedPredictiveModeling)
library(rpart)
library(tidyverse)
library(tidymodels)
library(vip)
library(ggthemes)
library(randomForest)
library(gbm)
library(party)
library(Cubist)rfImp <- varImp(m1, scale = FALSE)
level <- rownames(rfImp)
rfImp %>% rownames_to_column(var="Variable") %>%
mutate(Variable = factor(Variable, levels=level, ordered=TRUE)) %>%
ggplot(aes(x=Overall, y=factor(Variable))) + geom_col(fill="steelblue") +
theme_fivethirtyeight() +
labs(title="Importance", subtitle="Model: Random Forest",
y="Variable", x="Importance")simulated$duplicate2 <- simulated$V1 + rnorm(200) * 0.1
m3 <- randomForest(y ~ ., data = simulated,
importance = TRUE, ntree = 100)
rfImp <- varImp(m3, scale = FALSE)
level <- rownames(rfImp)
rfImp %>% rownames_to_column(var="Variable") %>%
mutate(Variable = factor(Variable, levels=level, ordered=T)) %>%
ggplot(aes(x=Overall, y=factor(Variable))) + geom_col(fill="steelblue") +
theme_fivethirtyeight() +
labs(title="Importance", subtitle="Model: Random Forest M3",
y="Variable", x="Overall Importance")cforest function in the party package to fit a random forest model using conditional inference trees. The party package function varimp can calculate predictor importance. The conditional argument of that function toggles between the traditional importance measure and the modified version described in Strobl et al. (2007). Do these importances show the same pattern as the traditional random forest model?m1_c <- cforest(y ~ ., data=simulated)
conImp <- varimp(m1_c, conditional=TRUE)
conImp <- as.data.frame(conImp)
names(conImp) <- "Overall"
conImp %>% rownames_to_column(var="Variable") %>%
mutate(Variable = factor(Variable, levels=level, ordered=TRUE)) %>%
ggplot(aes(x=Overall, y=factor(Variable))) + geom_col(fill="steelblue") +
theme_fivethirtyeight() +
labs(title="Importance (Conditional)",subtitle="Model: Random Forest M1_C", y="Variable", x="Importance (Conditional)")set.seed(200)
dep <- simulated %>% select(y)
ind <- simulated %>% select(-y)
param.cubist <- expand.grid(committees = seq(1,10,by=1),neighbors = seq(1,9,by=2))
ctrl.cubist <- trainControl(method="cv",n=10)
m1_cubist <- train(x=ind, y=dep$y, method="cubist",trControl = ctrl.cubist, tuneGrid = param.cubist, verbose=FALSE)
m1_cubist$bestTune## committees neighbors
## 25 5 9
rfImp <- varImp(m1_cubist$finalModel, scale = FALSE)
level <- rownames(rfImp)
rfImp %>% rownames_to_column(var="Variable") %>%
mutate(Variable = factor(Variable, levels=level, ordered=TRUE)) %>%
ggplot(aes(x=Overall, y=factor(Variable))) + geom_col(fill="steelblue") +
theme_fivethirtyeight() +
labs(title="Importance", subtitle="Modle: m1_cubist",
y="Variable", x="Importance")fig8.3