Recreate the simulated data from Exercise 7.2.
# Code provided in exercise
set.seed(200)
simulated <- mlbench.friedman1(200, sd = 1)
simulated <- cbind(simulated$x, simulated$y)
simulated <- as.data.frame(simulated)
colnames(simulated)[ncol(simulated)] <- "y"
Fit a random forest model to all of the predictors, then estimate the variable importance scores. Did the random forest model significantly use the uninformative predictors (V6– V10)?
Answer: Yes, the random forest (RF) model used the uninformative predictors (V6-V10); however, their importance scores are smaller than those of V1-V5.
# Random forest model of simulated data
set.seed(200)
RFmodel1 <- caret::train(y ~ ., data = simulated, method = 'rf')
# I scaled the importance scores to facilitate comparison among models
RFmodel1_varimp_list <- varImp(RFmodel1, scale = TRUE)
RFmodel1_varimp_df <- RFmodel1_varimp_list[['importance']]
RFmodel1_varimp_df
## Overall
## V1 100.00000
## V2 78.13348
## V3 9.54344
## V4 90.22926
## V5 28.81540
## V6 3.15705
## V7 3.16427
## V8 0.00000
## V9 0.01473
## V10 1.58339
Use the cforest function in the party
package to fit a random forest model using conditional inference trees.
The party package function varimp can calculate predictor
importance. The conditional argument of that function toggles between
the traditional importance measure and the modified version described in
Strobl et al. (2007).
Do these importances show the same pattern as the traditional random forest model?
Answer: As shown in the plot below, the pattern is somewhat different. The importance of V1 decreased a similar amount in both models. Interestingly, the importance of duplicate variable 1 in the conditional RF model was less than that in the traditional RF model, but the importance of duplicate variable 2 was similar in both models. Also, V3 is no longer an informative predictor in the conditional RF model.
# Conditional random forest model
set.seed(200)
CFmodel <- caret::train(y ~ ., data = simulated, method = 'cforest')
CFmodel_varimp_list <- varImp(CFmodel, scale = TRUE)
CFmodel_varimp_df <- CFmodel_varimp_list[['importance']]
CFmodel_varimp_df
## Overall
## V1 76.71200
## V2 75.66111
## V3 0.56690
## V4 100.00000
## V5 21.55383
## V6 0.33743
## V7 1.36729
## V8 0.00000
## V9 0.00497
## V10 0.44711
## duplicate1 7.43321
## duplicate2 4.71813
# Add variable importance scores from conditional random forest model to
# previous results and update bar plot
CFmodel_varimp_df$model = rep('Conditional RF', 12)
CFmodel_varimp_df$variable <- rownames(CFmodel_varimp_df)
rownames(CFmodel_varimp_df) <- NULL
CFmodel_varimp_df <- CFmodel_varimp_df %>%
select(model, variable, importance = 'Overall')
# Combine the dataframes
models_varimp_long_df <- bind_rows(RFmodel_varimp_long_df, CFmodel_varimp_df) %>%
select(model, variable, importance)
# Define factors
models_varimp_long_df$variable <- factor(models_varimp_long_df$variable,
levels = rev(c('V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
'duplicate1', 'duplicate2')))
models_varimp_long_df$model <- factor(models_varimp_long_df$model,
levels = rev(c('RF', 'RF + dup var', 'Conditional RF')))
# Bar plot
ggplot(models_varimp_long_df, aes(x = variable, y = importance, fill = model)) +
geom_bar(stat='identity', position = 'dodge') +
scale_fill_manual(values = c('RF' = '#FFC20A',
'RF + dup var' ='#21918C',
'Conditional RF' = '#440154')) +
guides(fill = guide_legend(reverse = TRUE)) +
coord_flip() +
labs(
y = 'scaled importance',
title = 'Comparison of variable importance in different models',
caption = 'RF, random forest.'
) +
theme(
axis.title = element_text(face = 'bold'),
plot.title = element_text(face = 'bold'),
plot.caption = element_text(color = "darkgray", hjust = 0)
)
Repeat this process with different tree models, such as boosted trees and Cubist. Does the same pattern occur?
Answer: As shown in the plot below, the importance of V1 decreased in all models except the cubist model, and the importance of the duplicate variables varied among models. Specifically, the importance of duplicate variable 1 was similar in the traditional RF and GBM models, less important in the conditional RF model, and not important in the Cubist model. The importance of duplicate variable 2 was similar in the traditional and conditional RF models and not important in the Cubist model. The effect of adding highly correlated variables on the importance of other variables varied among models. In general, V2 and V4 were more important, and V6-V10 were less important.
set.seed(200)
GBmodel <- caret::train(y ~ ., data = simulated, method = 'gbm', verbose = FALSE)
GBmodel_varimp_list <- varImp(GBmodel, scale = TRUE)
GBmodel_varimp_df <- GBmodel_varimp_list[['importance']]
GBmodel_varimp_df
## Overall
## V1 49.1153
## V2 64.7276
## V3 28.4695
## V4 100.0000
## V5 33.4554
## V6 1.6777
## V7 1.6087
## V8 0.0000
## V9 0.4432
## V10 1.2362
## duplicate1 25.3773
## duplicate2 9.3412
# Cubist model generated by caret package
set.seed(200)
cubist_model <- caret::train(y ~ ., data = simulated, method = 'cubist')
cubist_model_varimp_list <- varImp(cubist_model, scale = TRUE)
cubist_model_varimp_df <- cubist_model_varimp_list[['importance']]
cubist_model_varimp_df
## Overall
## V1 100.00
## V3 58.87
## V2 77.30
## V4 69.50
## V5 48.23
## V6 0.00
## V7 0.00
## V8 0.00
## V9 0.00
## V10 0.00
## duplicate1 0.00
## duplicate2 0.00
# Add variable importance scores to previous results and update bar plot
# GBM
GBmodel_varimp_df$model = rep('GBM', 12)
GBmodel_varimp_df$variable <- rownames(GBmodel_varimp_df)
rownames(GBmodel_varimp_df) <- NULL
GBmodel_varimp_df <- GBmodel_varimp_df %>%
select(model, variable, importance = 'Overall')
# Cubist
cubist_model_varimp_df$model = rep('Cubist', 12)
cubist_model_varimp_df$variable <- rownames(cubist_model_varimp_df)
rownames(cubist_model_varimp_df) <- NULL
cubist_model_varimp_df <- cubist_model_varimp_df %>%
select(model, variable, importance = 'Overall')
# Combine the dataframes
models_varimp_long_df <- bind_rows(models_varimp_long_df,
GBmodel_varimp_df, cubist_model_varimp_df) %>%
select(model, variable, importance)
# Define factors
models_varimp_long_df$variable <- factor(models_varimp_long_df$variable,
levels = rev(c('V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
'duplicate1', 'duplicate2')))
models_varimp_long_df$model <- factor(models_varimp_long_df$model,
levels = rev(c('RF', 'RF + dup var', 'Conditional RF', 'GBM', 'Cubist')))
# Bar plot
ggplot(models_varimp_long_df, aes(x = variable, y = importance, fill = model)) +
geom_bar(stat='identity', position = 'dodge') +
scale_fill_manual(values = c('RF' = '#FFC20A',
'RF + dup var' = '#5EC962',
'Conditional RF' = '#21918C',
'GBM' = '#3B528B',
'Cubist' = '#440154')) +
guides(fill = guide_legend(reverse = TRUE)) +
coord_flip() +
labs(
y = 'scaled importance',
title = 'Comparison of variable importance in different models',
caption = 'GBM, gradient boosting machine; RF, random forest.'
) +
theme(
axis.title = element_text(face = 'bold'),
plot.title = element_text(face = 'bold'),
plot.caption = element_text(color = "darkgray", hjust = 0)
)
Use a simulation to show tree bias with different granularities.
Answer: Regression tree models are prone to
selection bias such that features that are more granular (ie, have more
detail) tend to be included because they have more possible nodes for
data splits. A simple simulation can be performed by regressing a
tree-based model on a dataset with two predictor variables, one of which
is a binary variable (x1; low granularity) that is used to
calculate the target variable (y), and the other is a
continuous random variable (x2; high granularity) that is
unrelated to the target. In the absence of selection bias, we would
expect the classification to be based on x1, because it is
known to be predictive of y whereas x2 is
not.
The results below show that the regression tree only has
x1 in the root node but all other nodes rely on
x2 for data splits even though it is not predictive of the
target. This demonstrates that the model is affected by selection
bias.
set.seed(200)
n <- 1000 # number of observations
# x1 is predictor of binary outcome (target) y
x1 <- sample(0:1, n, replace = TRUE)
noise <- rnorm(n, mean = 0, sd = 1)
y <- 10 * x1 + noise > 10
# x2 is random continuous variable
x2 <- runif(n, min = 0, max = 1)
simulated_data <- data.frame(x1, x2, y)
# Fit classification model
# Small cp (complexity parameter) value grows a bigger tree
# so we can see how nodes are split
rpart_model <- rpart(y ~ ., data = simulated_data, method = 'class', cp = 0.01)
# Visualize tree
# 'tweak' parameter scales font sizes
rpart.plot(rpart_model, box.palette = 'Blues', tweak = 2)
In stochastic gradient boosting the bagging fraction and learning rate will govern the construction of the trees as they are guided by the gradient. Although the optimal values of these parameters should be obtained through the tuning process, it is helpful to understand how the magnitudes of these parameters affect magnitudes of variable importance.
Figure 8.24 provides the variable importance plots for boosting using two extreme values for the bagging fraction (0.1 and 0.9) and the learning rate (0.1 and 0.9) for the solubility data.
Answer: High bagging fraction and high shrinkage (learning rate) make the algorithm use more of the training data and be “greedier” in its selection of predictors. Together, this makes the model converge faster and identify the strongest predictors (ie, “low-hanging fruit”). When these parameters are relaxed, the algorithm converges more slowly, allowing it to explore the gradient of the loss function in more detail and identify weaker predictors that may collectively have better performance.
Answer: I would expect a model with low shrinkage (learning rate) to be more predictive because it would be less prone to overfitting the training data and therefore more generalizable to new data.
Answer: Increasing the interaction depth (ie, deeper trees) would be expected to decrease the slope of the predictor importance plot, so that more predictors would have non-zero importance score. This would most affect the model with low bagging fraction and low shrinkage (learning rate), which converges more slowly and evaluates more predictors (see part (a)).
The data for this exercise were pre-processed and modeled using partial least squares (PLS) in Exercise 6.3 (Homework 7) and a nonlinear model (SVM) in Exercise 7.5 (Homework 8). Missing data were imputed using multivariate imputation by chained equations (MICE) with predictive mean matching. This method generates multiple imputed datasets, each of which are used for modeling, followed by integration of performance metrics using Rubin’s rules.1
Which tree-based regression model gives the optimal resampling and test set performance?
Answer:
I trained and compared the performance of five tree-based regression models (single tree, bagged trees, random forest, boosted trees, and cubist).
# Define cross-validation method
ctrl <- trainControl(method = "cv", number = 10)
# Single tree
# ------------
set.seed(144)
# Create list to store models
single_models <- list()
# Create matrix to store predictions for all models
predictions_mat <- matrix(NA, nrow = nrow(test_df), ncol = n_impute_datasets)
# The line below is commented out because I found that tuning for the
# complexity parameter resulted in the default value as the optimal value.
# So I tuned for tree depth instead, which does not need a tuneGrid
# singleGrid <- expand.grid(cp = c(0.001, 0.01, 0.1))
# Loop through the imputed datasets, fit model and make predictions for each
for (i in 1 : n_impute_datasets) {
# Extract the imputed dataset
full_imputed <- mice::complete(CMP_imputations, i)
# Subset training and test rows
train_imputed <- full_imputed[train_idx, ]
test_imputed <- full_imputed[-train_idx, ]
# Fit model using training data
model <- train(
Yield ~ ., data = train_imputed,
# method = 'rpart', # tune complexity
# tuneGrid = singleGrid,
method = 'rpart2', # tune depth
tuneLength = 5,
trControl = ctrl
)
# Make predictions using test data
predictions_mat[, i] <- predict(model, newdata = test_imputed)
# Store the model
single_models[[i]] <- model
}
# Average the predictions and then calculate performance metrics
mean_predictions <- rowMeans(predictions_mat)
single_performance <- caret::postResample(pred = mean_predictions, obs = test_imputed$Yield)
# Bagged trees
# ------------
set.seed(144)
# Create matrix to store predictions for all models
predictions_mat <- matrix(NA, nrow = nrow(test_df), ncol = n_impute_datasets)
# Loop through the imputed datasets, fit model and make predictions for each
for (i in 1 : n_impute_datasets) {
# Extract the imputed dataset
full_imputed <- mice::complete(CMP_imputations, i)
# Subset training and test rows
train_imputed <- full_imputed[train_idx, ]
test_imputed <- full_imputed[-train_idx, ]
# Fit model using training data
model <- train(
Yield ~ ., data = train_imputed,
method = 'treebag',
trControl = ctrl
)
# Make predictions using test data
predictions_mat[, i] <- predict(model, newdata = test_imputed)
}
# Average the predictions and then calculate performance metrics
mean_predictions <- rowMeans(predictions_mat)
bagged_performance <- caret::postResample(pred = mean_predictions, obs = test_imputed$Yield)
# Random forest
# -------------
# Note: The default values for number of trees (ntree = 500) and
# number of variables randomly sampled at each split (mtry = 7) are
# close to the "rule of thumb" initial values for these parameters.
# The ChemicalManufacturingProcess dataset has 57 features, so
# ntree = 57 x 10 = 570 and mtry = sqrt(57) = 7.6
set.seed(144)
# Create matrix to store predictions for all models
predictions_mat <- matrix(NA, nrow = nrow(test_df), ncol = n_impute_datasets)
# Loop through the imputed datasets, fit model and make predictions for each
for (i in 1 : n_impute_datasets) {
# Extract the imputed dataset
full_imputed <- mice::complete(CMP_imputations, i)
# Subset training and test rows
train_imputed <- full_imputed[train_idx, ]
test_imputed <- full_imputed[-train_idx, ]
# Fit model using training data
model <- train(
Yield ~ ., data = train_imputed,
method = 'rf',
trControl = ctrl
)
# Make predictions using test data
predictions_mat[, i] <- predict(model, newdata = test_imputed)
}
# Average the predictions and then calculate performance metrics
mean_predictions <- rowMeans(predictions_mat)
rf_performance <- caret::postResample(pred = mean_predictions, obs = test_imputed$Yield)
# Boosted trees
# -------------
set.seed(144)
# Create matrix to store predictions for all models
predictions_mat <- matrix(NA, nrow = nrow(test_df), ncol = n_impute_datasets)
# Loop through the imputed datasets, fit model and make predictions for each
for (i in 1 : n_impute_datasets) {
# Extract the imputed dataset
full_imputed <- mice::complete(CMP_imputations, i)
# Subset training and test rows
train_imputed <- full_imputed[train_idx, ]
test_imputed <- full_imputed[-train_idx, ]
# Fit model using training data
model <- train(
Yield ~ ., data = train_imputed,
method = 'gbm',
verbose = FALSE,
trControl = ctrl
)
# Make predictions using test data
predictions_mat[, i] <- predict(model, newdata = test_imputed)
}
# Average the predictions and then calculate performance metrics
mean_predictions <- rowMeans(predictions_mat)
gbm_performance <- caret::postResample(pred = mean_predictions, obs = test_imputed$Yield)
# Cubist
# ------
set.seed(144)
# Create list to store models
cubist_models <- list()
# Create matrix to store predictions for all models
predictions_mat <- matrix(NA, nrow = nrow(test_df), ncol = n_impute_datasets)
# Loop through the imputed datasets, fit model and make predictions for each
for (i in 1 : n_impute_datasets) {
# Extract the imputed dataset
full_imputed <- mice::complete(CMP_imputations, i)
# Subset training and test rows
train_imputed <- full_imputed[train_idx, ]
test_imputed <- full_imputed[-train_idx, ]
# Fit model using training data
model <- train(
Yield ~ ., data = train_imputed,
method = 'cubist',
trControl = ctrl
)
# Make predictions using test data
predictions_mat[, i] <- predict(model, newdata = test_imputed)
# Store the model
cubist_models[[i]] <- model
}
# Average the predictions and then calculate performance metrics
mean_predictions <- rowMeans(predictions_mat)
cubist_performance <- caret::postResample(pred = mean_predictions, obs = test_imputed$Yield)
Of the five models, Cubist had the lowest RMSE and MAE and the highest \(R^2\), which indicates that it is the best fit for the data.
# Combine performance metrics from all models into dataframe
cmp_model_performance_df <- as.data.frame(rbind(single_performance,
bagged_performance, rf_performance, gbm_performance, cubist_performance))
cmp_model_performance_df$model <- c('Single tree', 'Bagged', 'RF', 'GBM', 'Cubist')
rownames(cmp_model_performance_df) <- NULL
cmp_model_performance_df %>%
select(model, RMSE, Rsquared, MAE) %>%
arrange(RMSE)
## model RMSE Rsquared MAE
## 1 Cubist 1.026 0.7040 0.7507
## 2 GBM 1.244 0.5637 0.9220
## 3 RF 1.306 0.5269 0.9660
## 4 Bagged 1.382 0.4612 1.0196
## 5 Single tree 1.579 0.3261 1.2003
Although not part of the exercise, it is interesting to note that the performance of the Cubist model is very close to that of the SVM model (Homework 8; \(RMSE=1.025\), \(R^2=0.71\), \(MAE=0.84\)), which was previously the best model. However, an important difference between these two models is that the Cubist model is more explainable (since it is rule based). This may affect the choice of the final model.
Which predictors are most important in the optimal tree-based regression model? Do either the biological or process variables dominate the list? How do the top 10 important predictors compare to the top 10 predictors from the optimal linear and nonlinear models?
Answer:
Because the MICE imputation process generates multiple imputed datasets, each of which has an associated model, I compared variable importance for each model.
The top 10 important variables in all models tended to include more manufacturing process predictors than biological material predictors. This differed from the previous optimal linear model (PLS; Homework 7) and optimal nonlinear model (SVM; Homework 8), which had a more balanced number of process and material variables.
In four of the five Cubist models, the most common predictor was
Process32. This was also the most important predictor in
all PLS and all SVM models, suggesting that it is indeed the most
important predictor (and mutable variable) of product yield.
Besides Process32, there were no common predictors among
the top four predictors in the PLS, SVM, and Cubist models. In the PLS
models, other important predictors included Process36,
Process17, Material06, and
Material02 (in order of importance). In the SVM models,
other important predictors included Material06,
Material02, and Material03 (in order of
importance). In the Cubist models, other important predictors included
Process13 (four models) and Material04 (three
models). Thus, no conclusions can be drawn from these models about which
other predictors may be important, although Material06 and
Material02 ranked in the top four predictors for two of the
three models (PLS and SVM). However, because biological material
predictors cannot be changed (per exercise instructions), these
variables would only be useful for quality control of the raw material
rather than improving product yield.
varImp(cubist_models[[1]])
## cubist variable importance
##
## only 20 most important variables shown (out of 56)
##
## Overall
## Process32 100.0
## Process13 50.4
## Process09 44.3
## Process29 41.7
## Material06 39.1
## Process04 34.8
## Process33 33.0
## Material10 31.3
## Material04 28.7
## Material08 27.0
## Process01 20.9
## Material12 20.9
## Material03 19.1
## Process10 15.7
## Process27 15.7
## Process17 15.7
## Process39 15.7
## Material09 15.7
## Process45 12.2
## Process25 12.2
varImp(cubist_models[[2]])
## cubist variable importance
##
## only 20 most important variables shown (out of 56)
##
## Overall
## Material04 100
## Process32 100
## Material10 100
## Process04 100
## Process15 100
## Process09 100
## Process13 100
## Process31 0
## Process25 0
## Process38 0
## Process05 0
## Material06 0
## Process45 0
## Process28 0
## Process40 0
## Material09 0
## Process29 0
## Process24 0
## Process22 0
## Process44 0
varImp(cubist_models[[3]])
## cubist variable importance
##
## only 20 most important variables shown (out of 56)
##
## Overall
## Process32 100.00
## Process04 32.54
## Process13 30.95
## Material04 30.16
## Process09 30.16
## Material02 26.98
## Material03 23.02
## Process33 23.02
## Material10 23.02
## Process31 21.43
## Process29 19.84
## Process43 18.25
## Material01 18.25
## Process15 17.46
## Process17 15.08
## Process11 12.70
## Process37 11.90
## Material09 11.11
## Process34 10.32
## Process01 7.94
varImp(cubist_models[[4]])
## cubist variable importance
##
## only 20 most important variables shown (out of 56)
##
## Overall
## Process32 100.00
## Process13 51.72
## Material04 35.34
## Material10 35.34
## Process17 25.86
## Material03 25.86
## Process04 24.14
## Process09 23.28
## Process27 21.55
## Process15 20.69
## Process33 20.69
## Material06 15.52
## Material11 14.66
## Material02 14.66
## Material09 13.79
## Process42 12.93
## Material12 12.93
## Process29 12.93
## Process01 12.07
## Process26 9.48
varImp(cubist_models[[5]])
## cubist variable importance
##
## only 20 most important variables shown (out of 56)
##
## Overall
## Process32 100.0
## Process13 58.8
## Process33 47.4
## Process09 40.2
## Material12 33.0
## Process04 33.0
## Material04 32.0
## Material11 27.8
## Material06 25.8
## Material03 23.7
## Material10 22.7
## Material02 22.7
## Process25 22.7
## Process15 21.6
## Process11 16.5
## Process28 15.5
## Process17 15.5
## Material05 15.5
## Material08 13.4
## Process39 13.4
Plot the optimal single tree with the distribution of yield in the terminal nodes. Does this view of the data provide additional knowledge about the biological or process predictors and their relationship with yield?
Answer:
Because the MICE imputation process generates multiple imputed
datasets, each of which has an associated model, I plotted each of the
five single tree models. All five trees use Process32 to
split data at the root node, which is consistent with it being the most
important predictor of product yield (see part (b)). Of the trees with
another level, three of them subsequently split data on the value of
Material11, and two of them split on
Process31, which suggests that they are also important
predictors of product yield. However, these variables were not among the
most important predictors in other models (PLS, SVM, Cubist; part (b)).
No further insights about the relationship between predictors and
Yield are possible from these single trees, because they
are not very deep.
# Note that during model development (Exercise 8.7(a)),
# caret compared several trees. Here, I examine the best ('final') model
(single_model1 <- single_models[[1]]$finalModel)
## n= 124
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 124 411.80 40.19
## 2) Process32< 159.5 73 113.70 39.15
## 4) Material11< 145.6 39 48.03 38.53 *
## 5) Material11>=145.6 34 34.11 39.85 *
## 3) Process32>=159.5 51 102.90 41.69
## 6) Process31>=70.95 21 27.41 40.96 *
## 7) Process31< 70.95 30 56.24 42.21 *
rpart.plot(single_model1, box.palette = 'Blues', tweak = 1.2)
(single_model2 <- single_models[[2]]$finalModel)
## n= 124
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 124 411.8 40.19
## 2) Process32< 159.5 73 113.7 39.15 *
## 3) Process32>=159.5 51 102.9 41.69 *
rpart.plot(single_model2, box.palette = 'Blues', tweak = 1.2)
(single_model3 <- single_models[[3]]$finalModel)
## n= 124
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 124 411.8 40.19
## 2) Process32< 159.5 73 113.7 39.15 *
## 3) Process32>=159.5 51 102.9 41.69 *
rpart.plot(single_model3, box.palette = 'Blues', tweak = 1.2)
(single_model4 <- single_models[[4]]$finalModel)
## n= 124
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 124 411.80 40.19
## 2) Process32< 159.5 73 113.70 39.15
## 4) Material11< 145.6 39 48.03 38.53 *
## 5) Material11>=145.6 34 34.11 39.85 *
## 3) Process32>=159.5 51 102.90 41.69
## 6) Process30< 9.5 37 59.75 41.29 *
## 7) Process30>=9.5 14 20.79 42.77 *
rpart.plot(single_model4, box.palette = 'Blues', tweak = 1.2)
(single_model5 <- single_models[[5]]$finalModel)
## n= 124
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 124 411.80 40.19
## 2) Process32< 159.5 73 113.70 39.15
## 4) Material11< 145.6 39 48.03 38.53 *
## 5) Material11>=145.6 34 34.11 39.85 *
## 3) Process32>=159.5 51 102.90 41.69
## 6) Process31>=70.95 21 27.41 40.96 *
## 7) Process31< 70.95 30 56.24 42.21 *
rpart.plot(single_model5, box.palette = 'Blues', tweak = 1.2)
pander::pander(sessionInfo())
R version 4.5.2 (2025-10-31)
Platform: aarch64-apple-darwin20
locale: en_US.UTF-8||en_US.UTF-8||en_US.UTF-8||C||en_US.UTF-8||en_US.UTF-8
attached base packages: stats4, grid, stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: pander(v.0.6.6), Cubist(v.0.6.0), gbm(v.2.2.3), partykit(v.1.2-24), libcoin(v.1.0-10), party(v.1.3-20), strucchange(v.1.5-4), sandwich(v.3.1-1), zoo(v.1.8-15), modeltools(v.0.2-24), mvtnorm(v.1.3-3), randomForest(v.4.7-1.2), ipred(v.0.9-15), rpart.plot(v.3.1.4), rpart(v.4.1.24), caret(v.7.0-1), lattice(v.0.22-7), mlbench(v.2.1-7), mice(v.3.19.0), lubridate(v.1.9.5), forcats(v.1.0.1), stringr(v.1.6.0), dplyr(v.1.2.0), purrr(v.1.2.1), readr(v.2.1.6), tidyr(v.1.3.2), tibble(v.3.3.1), ggplot2(v.4.0.2), tidyverse(v.2.0.0) and AppliedPredictiveModeling(v.1.1-7)
loaded via a namespace (and not attached): Rdpack(v.2.6.5), pROC(v.1.19.0.1), rlang(v.1.1.7), magrittr(v.2.0.4), multcomp(v.1.4-30), otel(v.0.2.0), e1071(v.1.7-17), matrixStats(v.1.5.0), compiler(v.4.5.2), vctrs(v.0.7.2), reshape2(v.1.4.5), pkgconfig(v.2.0.3), shape(v.1.4.6.1), fastmap(v.1.2.0), backports(v.1.5.0), labeling(v.0.4.3), inum(v.1.0-5), rmarkdown(v.2.30), prodlim(v.2025.04.28), tzdb(v.0.5.0), nloptr(v.2.2.1), xfun(v.0.56), glmnet(v.4.1-10), jomo(v.2.7-6), cachem(v.1.1.0), jsonlite(v.2.0.0), recipes(v.1.3.1), pan(v.1.9), broom(v.1.0.12), parallel(v.4.5.2), cluster(v.2.1.8.1), R6(v.2.6.1), CORElearn(v.1.57.3.1), coin(v.1.4-3), bslib(v.0.10.0), stringi(v.1.8.7), RColorBrewer(v.1.1-3), parallelly(v.1.46.1), boot(v.1.3-32), jquerylib(v.0.1.4), Rcpp(v.1.1.1), iterators(v.1.0.14), knitr(v.1.51), future.apply(v.1.20.1), Matrix(v.1.7-4), splines(v.4.5.2), nnet(v.7.3-20), timechange(v.0.4.0), tidyselect(v.1.2.1), rstudioapi(v.0.18.0), yaml(v.2.3.12), timeDate(v.4052.112), codetools(v.0.2-20), listenv(v.0.10.0), plyr(v.1.8.9), withr(v.3.0.2), S7(v.0.2.1), evaluate(v.1.0.5), future(v.1.69.0), survival(v.3.8-6), proxy(v.0.4-29), pillar(v.1.11.1), foreach(v.1.5.2), reformulas(v.0.4.3.1), ellipse(v.0.5.0), generics(v.0.1.4), hms(v.1.1.4), scales(v.1.4.0), minqa(v.1.2.8), globals(v.0.18.0), class(v.7.3-23), glue(v.1.8.0), tools(v.4.5.2), data.table(v.1.18.2.1), lme4(v.1.1-38), ModelMetrics(v.1.2.2.2), gower(v.1.0.2), plotrix(v.3.8-14), rbibutils(v.2.4.1), nlme(v.3.1-168), Formula(v.1.2-5), cli(v.3.6.5), lava(v.1.8.2), gtable(v.0.3.6), sass(v.0.4.10), digest(v.0.6.39), TH.data(v.1.1-5), farver(v.2.1.2), htmltools(v.0.5.9), lifecycle(v.1.0.5), hardhat(v.1.4.2), mitml(v.0.4-5) and MASS(v.7.3-65)