Load libraries
library(tidyverse)
library(fpp3)
library(caret)
library(RANN)
library(mlbench)
library(nnet)
library(earth)
library(party)
library(AppliedPredictiveModeling)
Recreate the simulated data from Exercise 7.2:
library(mlbench)
set.seed(200)
simulated <- mlbench.friedman1(200, sd = 1)
simulated <- cbind(simulated$x, simulated$y)
simulated <- as.data.frame(simulated)
colnames(simulated)[ncol(simulated)] <- "y"
library(randomForest)
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:ggplot2':
##
## margin
library(caret)
model1 <- randomForest(y ~ ., data = simulated,
importance = TRUE,
ntree = 1000)
rfImp1 <- varImp(model1, scale = FALSE)
print(rfImp1)
## Overall
## V1 8.62743275
## V2 6.27437240
## V3 0.72305459
## V4 7.50258584
## V5 2.13575650
## V6 0.12395003
## V7 0.02927888
## V8 -0.11724317
## V9 -0.10344797
## V10 0.04312556
Did the random forest model significantly use the uninformative predictors (V6 – V10)?
The random forest model did not use the uninformative predictors as they are lower on the importance score compared to V1 to V5.
simulated$duplicate1 <- simulated$V1 + rnorm(200) * .1
cor(simulated$duplicate1, simulated$V1)
## [1] 0.9485201
Fit another random forest model to these data. Did the importance score for V1 change? What happens when you add another predictor that is also highly correlated with V1?
Then importance score for V1 is getting smaller and smaller as you add in additional predictors.
# Random Forest Model with the new predictor
model2 <- randomForest(y ~ ., data = simulated,
importance = TRUE,
ntree = 1000)
# variable importance
rfImp2 <- varImp(model2, scale = FALSE)
print(rfImp2)
## Overall
## V1 6.774034589
## V2 6.426340527
## V3 0.613805379
## V4 7.135941576
## V5 2.135242904
## V6 0.171933358
## V7 0.142238552
## V8 -0.073192083
## V9 -0.098719872
## V10 -0.009701234
## duplicate1 3.084990840
simulated$duplicate2 <- simulated$V1 + rnorm(200) * .1
cor(simulated$duplicate2, simulated$V1)
## [1] 0.9337221
# Random Forest Model adding predictor
model3 <- randomForest(y ~ ., data = simulated,
importance = TRUE,
ntree = 1000)
rfImp3 <- varImp(model3, scale = FALSE)
print(rfImp3)
## Overall
## V1 5.908641677
## V2 6.586726939
## V3 0.559845667
## V4 7.373782389
## V5 1.987341138
## V6 0.162417814
## V7 0.038423138
## V8 0.007497423
## V9 -0.001806331
## V10 0.004023755
## duplicate1 2.351543736
## duplicate2 2.305339113
The importance show the same pattern as the traditional random forest model as the most important predictors are V1, V2 and V4. The difference between the traditional and modified importance was the importance scores were lower in the modified version.
# cforest with conditional inference trees
cforest_model <- cforest(y ~ ., data = simulated,
controls = cforest_unbiased(mtry = 3, ntree = 1000))
#importance scores from cforest with traditional importance measure
cforest_traditional <- varimp(cforest_model, conditional = FALSE)
#importance scores from cforest with modified importance measure
cforest_modified <- varimp(cforest_model, conditional = TRUE)
# Print the importance scores
print(cforest_traditional)
## V1 V2 V3 V4 V5 V6
## 4.85650670 4.83873965 0.06427383 5.69238840 1.74353145 0.02667855
## V7 V8 V9 V10 duplicate1 duplicate2
## 0.02181069 -0.02407249 -0.02433254 -0.05178070 2.25686987 1.68311274
print(cforest_modified)
## V1 V2 V3 V4 V5 V6
## 1.780641419 3.587323075 0.100029765 4.247910855 1.172723715 -0.003434175
## V7 V8 V9 V10 duplicate1 duplicate2
## 0.003453134 0.003450580 -0.001294559 -0.011044080 0.856867376 0.453573306
In the boosted tree and cubist had the most important predictors as V1, V2, V4 and V5.
Boosted Trees
library(gbm)
## Loaded gbm 2.2.2
## This version of gbm is no longer under development. Consider transitioning to gbm3, https://github.com/gbm-developers/gbm3
gbmGrid <- expand.grid(interaction.depth = seq(1, 7, by = 2),
n.trees = seq(100, 1000, by = 50),
shrinkage = c(0.01, 0.1),
n.minobsinnode = c(5, 10))
set.seed(200)
# Fit a boosted tree model
gbmTune <- train(y ~ ., data = simulated,
method = "gbm",
tuneGrid = gbmGrid,
verbose = FALSE)
# Get variable importance
boosted_importance <- summary(gbmTune, plot = FALSE)
print(boosted_importance)
## var rel.inf
## V4 V4 27.3493418
## V1 V1 23.5768113
## V2 V2 21.6040483
## V5 V5 10.6337328
## V3 V3 9.0076391
## duplicate2 duplicate2 2.0705780
## V6 V6 1.5229452
## duplicate1 duplicate1 1.4176309
## V7 V7 1.0763947
## V10 V10 0.6902218
## V9 V9 0.5874660
## V8 V8 0.4631900
Cubist
library(Cubist)
cubistGrid <- expand.grid(committees = c(1, 10, 20),
neighbors = c(0, 5, 9))
set.seed(200)
# Fit a Cubist model
cubist_model<- train(y ~ ., data = simulated,
method = "cubist",
tuneGrid = cubistGrid,
trControl = trainControl(method = "cv", number = 5))
# importance
cubist_importance <- varImp(cubist_model)
print(cubist_importance)
## cubist variable importance
##
## Overall
## V1 100.0000
## V2 93.7984
## V3 78.2946
## V4 74.4186
## V5 50.3876
## V6 23.2558
## duplicate1 7.7519
## duplicate2 3.8760
## V8 3.1008
## V10 0.7752
## V7 0.0000
## V9 0.0000
Use a simulation to show tree bias with different granularities
library(rpart)
library(partykit)
## Loading required package: libcoin
## Registered S3 method overwritten by 'inum':
## method from
## format.interval tsibble
##
## Attaching package: 'partykit'
## The following objects are masked from 'package:party':
##
## cforest, ctree, ctree_control, edge_simple, mob, mob_control,
## node_barplot, node_bivplot, node_boxplot, node_inner, node_surv,
## node_terminal, varimp
# Data with different granularities
set.seed(200)
a <- sample(1:10 / 10, 500, replace = TRUE)
b <- sample(1:100 / 100, 500, replace = TRUE)
c <- sample(1:1000 / 1000, 500, replace = TRUE)
d <- sample(1:10000 / 10000, 500, replace = TRUE)
e <- sample(1:100000 / 100000, 500, replace = TRUE)
# Response variable y is a combination of the predictors
y <- a + b + c + d + e
grandata <- data.frame(a, b, c, d, e, y)
grandata
## a b c d e y
## 1 0.6 0.91 0.535 0.0608 0.14553 2.25133
## 2 0.2 0.22 0.564 0.1052 0.85073 1.93993
## 3 0.8 0.13 0.819 0.4126 0.87276 3.03436
## 4 0.7 0.25 0.414 0.0165 0.33877 1.71927
## 5 0.5 0.12 0.497 0.2359 0.50319 1.85609
## 6 1.0 0.86 0.203 0.1302 0.67017 2.86337
## 7 0.2 0.28 0.981 0.7928 0.50497 2.75877
## 8 0.6 0.23 0.244 0.8967 0.93975 2.91045
## 9 0.8 0.10 0.791 0.8624 0.33879 2.89219
## 10 0.8 0.86 0.441 0.0815 0.41908 2.60158
## 11 0.4 1.00 0.841 0.7577 0.80449 3.80319
## 12 0.6 0.48 0.060 0.4320 0.59054 2.16254
## 13 0.8 0.68 0.563 0.1074 0.37325 2.52365
## 14 0.4 0.98 0.272 0.0383 0.41992 2.11022
## 15 0.6 0.69 0.148 0.6939 0.93632 3.06822
## 16 0.6 0.42 0.746 0.8496 0.05141 2.66701
## 17 0.7 0.63 0.441 0.1395 0.93291 2.84341
## 18 0.6 0.26 0.962 0.2703 0.69261 2.78491
## 19 0.3 0.60 0.248 0.9981 0.41604 2.56214
## 20 0.3 0.45 0.648 0.8801 0.79814 3.07624
## 21 0.5 0.88 0.614 0.3002 0.84196 3.13616
## 22 0.1 0.82 0.819 0.1931 0.13167 2.06377
## 23 0.6 0.46 0.498 0.4123 0.46155 2.43185
## 24 0.8 0.59 0.666 0.8953 0.84416 3.79546
## 25 0.7 0.53 0.663 0.1388 0.28847 2.32027
## 26 0.8 0.22 0.877 0.6958 0.06300 2.65580
## 27 0.2 0.95 0.846 0.4818 0.08914 2.56694
## 28 0.9 0.60 0.428 0.9531 0.34935 3.23045
## 29 0.6 0.04 0.474 0.0377 0.45198 1.60368
## 30 0.4 0.72 0.529 0.9627 0.74296 3.35466
## 31 0.6 0.50 0.981 0.2347 0.38710 2.70280
## 32 0.4 0.99 0.481 0.4496 0.50642 2.82702
## 33 0.3 0.63 0.292 0.4066 0.39413 2.02273
## 34 0.4 0.70 0.579 0.3119 0.29659 2.28749
## 35 0.3 0.51 0.439 0.3698 0.47091 2.08971
## 36 0.5 0.03 0.689 0.6837 0.85690 2.75960
## 37 0.8 0.58 0.955 0.4030 0.34088 3.07888
## 38 0.5 0.76 0.453 0.1204 0.03910 1.87250
## 39 0.6 0.86 0.901 0.6147 0.44760 3.42330
## 40 0.2 0.77 0.486 0.4126 0.84747 2.71607
## 41 0.6 0.23 0.473 0.3136 0.25012 1.86672
## 42 0.8 0.58 0.708 0.3452 0.66542 3.09862
## 43 0.6 0.35 0.986 0.3896 0.00730 2.33290
## 44 0.9 0.34 0.804 0.4905 0.86712 3.40162
## 45 0.1 0.64 0.415 0.0287 0.81881 2.00251
## 46 0.5 0.58 0.365 0.9504 0.06727 2.46267
## 47 0.6 0.94 0.493 0.0812 0.96833 3.08253
## 48 0.3 0.51 0.984 0.2047 0.56661 2.56531
## 49 0.9 0.91 0.267 0.8620 0.67180 3.61080
## 50 0.4 0.13 0.203 0.9664 0.38236 2.08176
## 51 0.6 0.32 0.601 0.5244 0.28552 2.33092
## 52 0.5 0.80 0.832 0.2879 0.48728 2.90718
## 53 0.4 0.86 0.492 0.5895 0.44888 2.79038
## 54 0.6 0.34 0.055 0.5285 0.30441 1.82791
## 55 0.8 0.04 0.796 0.8714 0.26806 2.77546
## 56 1.0 0.67 0.836 0.2944 0.35688 3.15728
## 57 0.7 0.08 0.460 0.8769 0.58515 2.70205
## 58 0.2 0.37 0.518 0.1098 0.56798 1.76578
## 59 0.7 0.69 0.958 0.4955 0.20281 3.04631
## 60 0.6 0.98 0.889 0.2144 0.96880 3.65220
## 61 1.0 0.02 0.563 0.4138 0.30818 2.30498
## 62 1.0 0.64 0.913 0.1862 0.59361 3.33281
## 63 0.3 0.47 0.977 0.4676 0.94204 3.15664
## 64 0.7 0.18 0.769 0.1094 0.83712 2.59552
## 65 0.8 0.49 0.419 0.0037 0.70520 2.41790
## 66 0.2 0.64 0.128 0.5777 0.60635 2.15205
## 67 0.1 0.63 0.379 0.5363 0.98651 2.63181
## 68 0.3 0.16 0.574 0.7000 0.52884 2.26284
## 69 0.8 0.28 0.716 0.6934 0.69524 3.18464
## 70 0.1 0.46 0.996 0.8217 0.37343 2.75113
## 71 0.7 0.59 0.307 0.5919 0.93232 3.12122
## 72 0.3 0.52 0.969 0.2196 0.75321 2.76181
## 73 0.5 0.19 0.576 0.3168 0.03031 1.61311
## 74 0.4 0.37 0.813 0.6657 0.92825 3.17695
## 75 0.3 0.34 0.301 0.8898 0.86211 2.69291
## 76 0.5 0.69 0.663 0.8163 0.85626 3.52556
## 77 0.3 0.97 0.386 0.8961 0.46923 3.02133
## 78 0.9 0.55 0.452 0.8043 0.87419 3.58049
## 79 1.0 0.74 0.915 0.0328 0.34693 3.03473
## 80 0.5 0.70 0.790 0.1246 0.12626 2.24086
## 81 0.4 0.68 0.482 0.6772 0.99239 3.23159
## 82 0.8 0.21 0.544 0.3360 0.22830 2.11830
## 83 0.2 0.60 0.645 0.6881 0.61782 2.75092
## 84 0.3 0.36 0.259 0.9604 0.31762 2.19702
## 85 1.0 0.94 0.065 0.5848 0.86315 3.45295
## 86 0.1 0.43 0.495 0.2032 0.38708 1.61528
## 87 1.0 0.38 0.648 0.2501 0.95229 3.23039
## 88 1.0 0.84 0.245 0.9270 0.52500 3.53700
## 89 1.0 0.74 0.769 0.0594 0.72312 3.29152
## 90 0.9 0.25 0.539 0.7874 0.68338 3.15978
## 91 1.0 0.84 0.332 0.6683 0.60543 3.44573
## 92 0.6 0.65 0.858 0.1118 0.05592 2.27572
## 93 0.3 0.22 0.983 0.3682 0.41183 2.28303
## 94 0.8 0.85 0.832 0.7413 0.19909 3.42239
## 95 0.6 0.62 0.188 0.5551 0.08827 2.05137
## 96 0.1 0.74 0.679 0.4118 0.51830 2.44910
## 97 0.8 0.61 0.896 0.3569 0.24953 2.91243
## 98 0.5 0.79 0.181 0.1683 0.76795 2.40725
## 99 0.2 0.22 0.727 0.6089 0.71659 2.47249
## 100 0.7 0.29 0.810 0.3530 0.73828 2.89128
## 101 0.7 0.43 0.352 0.3114 0.77835 2.57175
## 102 0.8 0.58 0.286 0.7256 0.54484 2.93644
## 103 0.5 0.04 0.040 0.0498 0.45043 1.08023
## 104 0.7 0.61 0.725 0.0457 0.17303 2.25373
## 105 0.8 0.32 0.924 0.5747 0.22960 2.84830
## 106 0.4 0.89 0.005 0.5245 0.15167 1.97117
## 107 0.9 0.30 0.359 0.7135 0.35105 2.62355
## 108 0.3 0.65 0.370 0.1144 0.91921 2.35361
## 109 1.0 0.52 0.930 0.3846 0.59560 3.43020
## 110 0.6 0.81 0.866 0.8887 0.94103 4.10573
## 111 0.3 0.98 0.308 0.2659 0.97337 2.82727
## 112 0.4 0.84 0.118 0.8612 0.92476 3.14396
## 113 0.7 0.47 0.897 0.3650 0.92204 3.35404
## 114 0.3 0.80 0.631 0.5662 0.20967 2.50687
## 115 0.4 0.21 0.982 0.3428 0.34045 2.27525
## 116 0.1 0.41 0.362 0.2381 0.79555 1.90565
## 117 0.9 0.94 0.019 0.5312 0.62350 3.01370
## 118 0.8 0.93 0.450 0.5955 0.82713 3.60263
## 119 0.7 0.74 0.281 0.7048 0.62955 3.05535
## 120 1.0 0.60 0.991 0.7671 0.19849 3.55659
## 121 1.0 0.86 0.072 0.6940 0.84094 3.46694
## 122 0.6 0.92 0.421 0.5853 0.10230 2.62860
## 123 0.1 0.13 0.556 0.3026 0.01698 1.10558
## 124 0.3 0.01 0.915 0.3397 0.21129 1.77599
## 125 0.1 0.30 0.421 0.8565 0.08982 1.76732
## 126 0.3 0.28 0.572 0.5895 0.02832 1.76982
## 127 0.7 0.95 0.427 0.0253 0.05021 2.15251
## 128 0.6 0.52 0.622 0.9310 0.17888 2.85188
## 129 0.2 0.88 0.163 0.3461 0.79458 2.38368
## 130 0.3 0.08 0.478 0.2094 0.81054 1.87794
## 131 0.1 0.14 0.654 0.9289 0.51964 2.34254
## 132 0.6 0.01 0.373 0.4451 0.34914 1.77724
## 133 0.5 0.14 0.413 0.6534 0.66665 2.37305
## 134 1.0 0.12 0.126 0.8834 0.52922 2.65862
## 135 0.5 0.42 0.682 0.2651 0.74771 2.61481
## 136 0.7 0.67 0.187 0.4166 0.35473 2.32833
## 137 0.2 0.95 0.720 0.2724 0.26590 2.40830
## 138 0.9 0.85 0.807 0.1324 0.27139 2.96079
## 139 0.4 0.40 0.409 0.0135 0.66526 1.88776
## 140 0.5 0.20 0.538 0.5626 0.01397 1.81457
## 141 0.5 0.11 0.079 0.7163 0.42085 1.82615
## 142 0.7 0.73 0.225 0.6562 0.95495 3.26615
## 143 0.9 0.29 0.350 0.3257 0.19267 2.05837
## 144 0.5 0.59 0.108 0.6787 0.69753 2.57423
## 145 1.0 0.29 0.800 0.0317 0.58967 2.71137
## 146 0.3 0.71 0.757 0.6243 0.08826 2.47956
## 147 0.8 0.27 0.434 0.0011 0.63249 2.13759
## 148 0.2 0.52 0.463 0.6737 0.71784 2.57454
## 149 0.9 0.77 0.101 0.8012 0.02311 2.59531
## 150 0.8 0.41 0.759 0.1246 0.15259 2.24619
## 151 0.1 0.68 0.657 0.2321 0.57885 2.24795
## 152 0.5 0.39 0.112 0.5402 0.29950 1.84170
## 153 1.0 0.04 0.921 0.2906 0.60260 2.85420
## 154 0.7 0.62 0.815 0.1387 0.42637 2.70007
## 155 0.2 0.72 0.422 0.6152 0.12091 2.07811
## 156 0.4 0.39 0.437 0.3363 0.01042 1.57372
## 157 1.0 0.27 0.852 0.9456 0.81584 3.88344
## 158 0.9 0.33 0.415 0.7894 0.40339 2.83779
## 159 0.2 1.00 0.130 0.5910 0.77372 2.69472
## 160 0.5 0.31 0.156 0.6826 0.89700 2.54560
## 161 0.2 0.51 0.416 0.3467 0.01265 1.48535
## 162 0.4 0.66 0.342 0.3936 0.47423 2.26983
## 163 0.2 0.72 0.576 0.5188 0.77670 2.79150
## 164 0.1 0.60 0.622 0.3305 0.63468 2.28718
## 165 0.2 0.86 0.157 0.3906 0.24913 1.85673
## 166 0.1 0.31 0.601 0.0329 0.99020 2.03410
## 167 0.1 0.94 0.541 0.6642 0.44270 2.68790
## 168 0.8 0.52 0.678 0.8706 0.30436 3.17296
## 169 0.7 0.98 0.064 0.0121 0.42157 2.17767
## 170 0.2 0.04 0.515 0.3699 0.79611 1.92101
## 171 1.0 0.46 0.712 0.4862 0.88084 3.53904
## 172 1.0 0.60 0.269 0.9915 0.41986 3.28036
## 173 0.3 0.77 0.170 0.5748 0.67009 2.48489
## 174 0.2 0.04 0.206 0.2796 0.78115 1.50675
## 175 0.9 0.39 0.649 0.8357 0.81774 3.59244
## 176 0.4 0.81 0.279 0.0700 0.98905 2.54805
## 177 0.1 0.26 0.878 0.6521 0.49515 2.38525
## 178 1.0 0.89 0.778 0.3376 0.50187 3.50747
## 179 0.3 0.68 0.357 0.5093 0.99795 2.84425
## 180 0.8 0.64 0.046 0.6325 0.24376 2.36226
## 181 0.8 0.42 0.016 0.5542 0.11380 1.90400
## 182 0.7 0.77 0.406 0.4267 0.15659 2.45929
## 183 0.6 0.44 0.411 0.1707 0.73068 2.35238
## 184 0.8 0.18 0.718 0.5659 0.35762 2.62152
## 185 0.2 0.69 0.605 0.7274 0.77966 3.00206
## 186 1.0 0.13 0.945 0.0788 0.30822 2.46202
## 187 1.0 0.14 0.185 0.8369 0.83687 2.99877
## 188 0.3 0.63 0.471 0.6555 0.76373 2.82023
## 189 0.7 0.83 0.366 0.9857 0.76669 3.64839
## 190 0.5 0.32 0.024 0.1835 0.62247 1.64997
## 191 1.0 0.44 0.310 0.1891 0.67192 2.61102
## 192 0.9 0.74 0.882 0.4973 0.62378 3.64308
## 193 1.0 0.21 0.519 0.6531 0.23405 2.61615
## 194 0.2 0.96 0.333 0.7893 0.60692 2.88922
## 195 0.4 0.96 0.103 0.4899 0.10137 2.05427
## 196 0.2 0.20 0.967 0.7796 0.73428 2.88088
## 197 1.0 0.85 0.342 0.1804 0.76181 3.13421
## 198 0.5 0.74 0.495 0.8172 0.45306 3.00526
## 199 0.2 0.28 0.736 0.2870 0.74623 2.24923
## 200 0.6 0.85 0.563 0.8282 0.86294 3.70414
## 201 0.6 0.29 0.656 0.9630 0.82273 3.33173
## 202 0.4 0.11 0.439 0.9052 0.30823 2.16243
## 203 1.0 0.20 0.027 0.6957 0.65735 2.58005
## 204 0.2 0.07 0.003 0.1021 0.26926 0.64436
## 205 1.0 0.42 0.979 0.4807 0.61781 3.49751
## 206 0.1 0.08 0.257 0.4709 0.49382 1.40172
## 207 0.5 0.05 0.553 0.5735 0.06046 1.73696
## 208 0.7 1.00 0.700 0.7495 0.26946 3.41896
## 209 0.6 0.98 0.121 0.9427 0.10576 2.74946
## 210 0.9 1.00 0.388 0.8626 0.19442 3.34502
## 211 0.4 0.15 0.438 0.0546 0.74300 1.78560
## 212 0.1 0.67 0.115 0.4638 0.89167 2.24047
## 213 0.2 0.19 0.184 0.0115 0.54415 1.12965
## 214 0.7 0.81 0.309 0.0514 0.98872 2.85912
## 215 0.3 0.80 0.517 0.5152 0.09287 2.22507
## 216 0.9 0.75 0.419 0.0619 0.23763 2.36853
## 217 0.2 0.59 0.094 0.5135 0.98981 2.38731
## 218 0.9 0.92 0.142 0.7722 0.26771 3.00191
## 219 0.3 0.74 0.066 0.9490 0.43614 2.49114
## 220 1.0 0.11 0.038 0.6530 0.92233 2.72333
## 221 0.6 0.36 0.192 0.1224 0.33633 1.61073
## 222 0.5 0.68 0.091 0.5925 0.38113 2.24463
## 223 0.6 0.77 0.240 0.5906 0.68499 2.88559
## 224 1.0 0.01 0.626 0.9414 0.33023 2.90763
## 225 0.2 0.63 0.993 0.2366 0.72994 2.78954
## 226 0.4 0.52 0.571 0.6024 0.35257 2.44597
## 227 0.7 0.79 0.116 0.0833 0.13781 1.82711
## 228 0.5 0.47 0.403 0.9803 0.23173 2.58503
## 229 0.7 0.94 0.884 0.8368 0.97026 4.33106
## 230 0.1 0.49 0.959 0.5021 0.67784 2.72894
## 231 0.8 0.50 0.848 0.6688 0.32141 3.13821
## 232 1.0 0.73 0.848 0.4654 0.32016 3.36356
## 233 0.2 0.26 0.296 0.1987 0.61467 1.56937
## 234 0.8 0.12 0.533 0.1313 0.78653 2.37083
## 235 1.0 0.81 0.135 0.3602 0.15495 2.46015
## 236 0.1 0.42 0.224 0.8089 0.87426 2.42716
## 237 0.7 0.13 0.609 0.9296 0.79432 3.16292
## 238 0.4 0.95 0.622 0.6580 0.79901 3.42901
## 239 0.6 0.50 0.966 0.8917 0.56940 3.52710
## 240 0.3 0.62 0.237 0.9019 0.30382 2.36272
## 241 0.8 0.72 0.539 0.2263 0.35291 2.63821
## 242 1.0 0.48 0.735 0.7599 0.64088 3.61578
## 243 0.9 0.63 0.360 0.1597 0.45765 2.50735
## 244 0.3 0.50 0.541 0.2895 0.60889 2.23939
## 245 0.3 0.25 0.020 0.8524 0.53375 1.95615
## 246 0.3 0.71 0.009 0.2658 0.11363 1.39843
## 247 1.0 0.16 0.283 0.5868 0.84093 2.87073
## 248 0.4 0.75 0.651 0.3419 0.94797 3.09087
## 249 0.9 0.29 0.899 0.6718 0.67060 3.43140
## 250 0.3 0.63 0.638 0.3210 0.08902 1.97802
## 251 0.4 0.71 0.864 0.8526 0.02017 2.84677
## 252 0.4 0.30 0.610 0.5103 0.89021 2.71051
## 253 0.8 0.36 0.734 0.1023 0.83134 2.82764
## 254 0.3 0.71 0.254 0.6262 0.15061 2.04081
## 255 0.1 0.86 0.240 0.4017 0.23460 1.83630
## 256 0.9 0.13 0.999 0.2338 0.64414 2.90694
## 257 0.3 0.11 0.621 0.5008 0.19023 1.72203
## 258 0.8 0.12 0.597 0.9389 0.13013 2.58603
## 259 0.8 0.15 0.012 0.8642 0.18065 2.00685
## 260 0.1 0.53 0.354 0.1758 0.56761 1.72741
## 261 1.0 0.65 0.875 0.0042 0.55747 3.08667
## 262 0.9 0.29 0.215 0.8305 0.60806 2.84356
## 263 0.3 0.14 0.982 0.7203 0.93839 3.08069
## 264 0.7 0.43 0.150 0.4108 0.99698 2.68778
## 265 0.7 0.58 0.091 0.0175 0.71255 2.10105
## 266 0.5 0.82 0.619 0.9749 0.59498 3.50888
## 267 0.1 0.20 0.581 0.5560 0.47536 1.91236
## 268 0.5 0.17 0.984 0.2480 0.91147 2.81347
## 269 0.5 0.69 0.599 0.9275 0.20823 2.92473
## 270 0.1 0.56 0.692 0.3876 0.85782 2.59742
## 271 0.2 0.61 0.022 0.0158 0.40846 1.25626
## 272 0.4 0.12 0.477 0.6067 0.27984 1.88354
## 273 0.5 0.85 0.345 0.1798 0.76244 2.63724
## 274 0.8 0.33 0.737 0.3789 0.92930 3.17520
## 275 0.1 0.35 0.911 0.5940 0.07157 2.02657
## 276 0.3 0.02 0.888 0.2260 0.45158 1.88558
## 277 0.5 0.52 0.440 0.1495 0.64264 2.25214
## 278 1.0 0.88 0.106 0.3345 0.24761 2.56811
## 279 0.1 0.02 0.553 0.2366 0.21962 1.12922
## 280 0.7 0.35 0.397 0.2401 0.73432 2.42142
## 281 0.8 0.07 0.010 0.2956 0.25597 1.43157
## 282 0.9 0.51 0.501 0.5592 0.40929 2.87949
## 283 0.7 0.63 0.577 0.8636 0.95051 3.72111
## 284 0.8 0.96 0.097 0.5983 0.09756 2.55286
## 285 0.1 0.69 0.072 0.6939 0.62563 2.18153
## 286 0.4 0.09 0.809 0.6350 0.14354 2.07754
## 287 0.9 0.61 0.108 0.1160 0.66113 2.39513
## 288 0.3 0.17 0.782 0.8122 0.67153 2.73573
## 289 1.0 0.58 0.312 0.3578 0.57188 2.82168
## 290 0.4 0.16 0.657 0.2783 0.48599 1.98129
## 291 0.9 0.13 0.773 0.1167 0.81934 2.73904
## 292 0.3 0.93 0.830 0.5833 0.46613 3.10943
## 293 0.4 0.75 0.348 0.1293 0.17645 1.80375
## 294 0.9 0.46 0.239 0.8093 0.10894 2.51724
## 295 0.8 0.52 0.085 0.3725 0.70329 2.48079
## 296 1.0 0.19 0.256 0.6219 0.82369 2.89159
## 297 0.9 0.24 0.481 0.7271 0.94183 3.28993
## 298 0.7 0.99 0.195 0.3889 0.86935 3.14325
## 299 0.8 0.02 0.171 0.8115 0.46308 2.26558
## 300 0.9 0.61 0.562 0.4445 0.22324 2.73974
## 301 0.1 0.13 0.208 0.8015 0.80847 2.04797
## 302 0.8 0.50 0.091 0.9508 0.94578 3.28758
## 303 0.2 0.58 0.493 0.4225 0.76955 2.46505
## 304 0.8 0.74 0.540 0.7344 0.63920 3.45360
## 305 0.6 0.54 0.724 0.9054 0.07885 2.84825
## 306 0.1 0.05 0.988 0.0014 0.30772 1.44712
## 307 0.7 0.60 0.072 0.5441 0.70935 2.62545
## 308 0.6 0.61 0.106 0.3075 0.53583 2.15933
## 309 0.9 0.26 0.853 0.7787 0.33958 3.13128
## 310 0.8 0.99 0.703 0.8713 0.78398 4.14828
## 311 0.1 0.63 0.991 0.9476 0.95115 3.61975
## 312 0.7 0.52 0.014 0.7625 0.95967 2.95617
## 313 0.5 0.94 0.096 0.7791 0.57290 2.88800
## 314 0.1 0.38 0.776 0.2710 0.70636 2.23336
## 315 1.0 0.96 0.018 0.0537 0.83161 2.86331
## 316 0.9 0.48 0.195 0.9381 0.50678 3.01988
## 317 0.8 0.33 0.460 0.6258 0.25488 2.47068
## 318 0.2 0.35 0.957 0.0616 0.66081 2.22941
## 319 0.8 0.55 0.113 0.4098 0.81740 2.69020
## 320 0.6 0.29 0.895 0.9403 0.95918 3.68448
## 321 0.9 0.05 0.200 0.8572 0.73766 2.74486
## 322 0.8 0.46 0.727 0.9391 0.44142 3.36752
## 323 0.7 0.76 0.545 0.8708 0.55360 3.42940
## 324 0.5 0.38 0.053 0.9945 0.07894 2.00644
## 325 0.9 0.47 0.719 0.2841 0.65038 3.02348
## 326 0.3 0.10 0.352 0.0657 0.30709 1.12479
## 327 0.3 0.09 0.914 0.4996 0.82841 2.63201
## 328 0.9 0.10 0.543 0.0164 0.75099 2.31039
## 329 0.2 0.90 0.621 0.3355 0.71663 2.77313
## 330 0.3 0.53 0.089 0.6571 0.19859 1.77469
## 331 0.9 0.91 0.039 0.3847 0.47239 2.70609
## 332 0.5 0.95 0.888 0.2763 0.26892 2.88322
## 333 0.8 0.34 0.210 0.6557 0.95772 2.96342
## 334 0.5 0.85 0.755 0.1593 0.94369 3.20799
## 335 0.8 0.28 0.150 0.9254 0.17208 2.32748
## 336 1.0 0.26 0.978 0.2448 0.09478 2.57758
## 337 0.4 0.19 0.455 0.6709 0.11007 1.82597
## 338 0.2 0.64 0.985 0.5725 0.97309 3.37059
## 339 0.4 0.25 0.594 0.3583 0.45979 2.06209
## 340 0.4 0.09 0.872 0.3376 0.32001 2.01961
## 341 0.8 0.74 0.450 0.6234 0.97111 3.58451
## 342 0.5 0.81 0.650 0.7529 0.58927 3.30217
## 343 0.2 0.06 0.048 0.5013 0.48552 1.29482
## 344 0.9 0.97 0.712 0.4753 0.50593 3.56323
## 345 0.2 0.30 0.954 0.3386 0.48187 2.27447
## 346 0.2 0.99 0.148 0.9942 0.34354 2.67574
## 347 0.7 0.80 0.252 0.4665 0.76173 2.98023
## 348 0.6 0.70 0.473 0.6482 0.56849 2.98969
## 349 0.1 0.42 0.474 0.4814 0.71122 2.18662
## 350 1.0 0.71 0.436 0.7979 0.34210 3.28600
## 351 0.6 0.96 0.381 0.9532 0.78132 3.67552
## 352 0.4 0.41 0.995 0.1420 0.72330 2.67030
## 353 0.6 0.65 0.899 0.6453 0.04622 2.84052
## 354 0.3 0.60 0.688 0.1061 0.14977 1.84387
## 355 0.9 0.37 0.551 0.8627 0.87558 3.55928
## 356 0.3 0.46 0.434 0.4467 0.88036 2.52106
## 357 0.6 0.13 0.497 0.8815 0.66696 2.77546
## 358 0.3 0.46 0.211 0.6956 0.80885 2.47545
## 359 0.4 0.81 0.025 0.9687 0.36082 2.56452
## 360 0.4 0.88 0.176 0.1062 0.18232 1.74452
## 361 0.2 0.19 0.509 0.0319 0.46000 1.39090
## 362 0.2 0.12 0.623 0.3075 0.55660 1.80710
## 363 0.3 0.75 0.291 0.8638 0.22164 2.42644
## 364 0.6 0.80 0.654 0.3409 0.49369 2.88859
## 365 0.5 0.88 0.606 0.6562 0.95472 3.59692
## 366 0.2 0.31 0.206 0.9366 0.43494 2.08754
## 367 0.8 0.90 0.208 0.1636 0.66520 2.73680
## 368 0.6 0.58 0.213 0.5925 0.45986 2.44536
## 369 1.0 0.85 0.152 0.1218 0.80655 2.93035
## 370 0.8 0.52 0.719 0.4658 0.71645 3.22125
## 371 0.7 0.57 0.833 0.0118 0.35930 2.47410
## 372 0.4 0.13 0.844 0.9734 0.18187 2.52927
## 373 0.5 0.38 0.123 0.0512 0.98643 2.04063
## 374 1.0 0.16 0.629 0.4458 0.41273 2.64753
## 375 0.8 0.87 0.749 0.0040 0.25825 2.68125
## 376 0.4 0.88 0.608 0.8164 0.60608 3.31048
## 377 0.5 0.22 0.469 0.3538 0.87754 2.42034
## 378 0.8 0.31 0.785 0.4836 0.65491 3.03351
## 379 0.7 0.94 0.686 0.4367 0.82944 3.59214
## 380 0.4 0.90 0.469 0.0340 0.02939 1.83239
## 381 0.7 0.45 0.443 0.4565 0.92423 2.97373
## 382 0.4 0.35 0.086 0.7546 0.77206 2.36266
## 383 0.2 0.95 0.390 0.3014 0.39726 2.23866
## 384 0.2 0.25 0.905 0.8328 0.31144 2.49924
## 385 0.7 0.07 0.788 0.9500 0.70974 3.21774
## 386 0.4 0.70 0.859 0.7985 0.79348 3.55098
## 387 0.7 0.40 0.399 0.9612 0.47465 2.93485
## 388 0.9 0.92 0.321 0.3859 0.36831 2.89521
## 389 0.2 0.28 0.429 0.2098 0.79891 1.91771
## 390 0.1 0.61 0.302 0.0931 0.30137 1.40647
## 391 0.5 0.52 0.408 0.7792 0.12040 2.32760
## 392 0.3 0.84 0.105 0.3339 0.63172 2.21062
## 393 0.9 0.50 0.291 0.7580 0.67211 3.12111
## 394 0.9 0.23 0.555 0.3756 0.42758 2.48818
## 395 0.2 0.33 0.784 0.9507 0.12017 2.38487
## 396 0.6 0.75 0.760 0.1658 0.63246 2.90826
## 397 0.1 0.15 0.339 0.1228 0.08755 0.79935
## 398 0.8 0.68 0.105 0.5512 0.92717 3.06337
## 399 0.1 0.34 0.426 0.5298 0.86498 2.26078
## 400 0.6 0.60 0.165 0.7188 0.87193 2.95573
## 401 0.1 0.67 0.621 0.4905 0.48220 2.36370
## 402 0.9 0.30 0.658 0.9933 0.04004 2.89134
## 403 0.3 0.14 0.631 0.0778 0.45801 1.60681
## 404 0.8 0.58 0.682 0.3553 0.33127 2.74857
## 405 0.6 0.93 0.481 0.8629 0.54933 3.42323
## 406 0.3 0.29 0.641 0.9842 0.20412 2.41932
## 407 0.5 0.92 0.249 0.6034 0.40600 2.67840
## 408 0.5 0.37 0.357 0.9543 0.93212 3.11342
## 409 0.3 0.42 0.529 0.9399 0.75335 2.94225
## 410 0.2 0.76 0.083 0.0539 0.59511 1.69201
## 411 0.7 0.97 0.635 0.3935 0.05992 2.75842
## 412 0.2 1.00 0.647 0.8587 0.24254 2.94824
## 413 0.7 0.81 0.446 0.9540 0.61518 3.52518
## 414 1.0 0.87 0.268 0.3653 0.98985 3.49315
## 415 0.8 0.80 0.765 0.6711 0.65482 3.69092
## 416 0.4 0.08 0.091 0.2716 0.54855 1.39115
## 417 0.9 0.67 0.927 0.0326 0.69980 3.22940
## 418 1.0 0.63 0.674 0.8270 0.04801 3.17901
## 419 0.4 0.64 0.101 0.4266 0.47200 2.03960
## 420 0.7 0.36 0.693 0.7895 0.75911 3.30161
## 421 0.9 0.16 0.125 0.9672 0.36324 2.51544
## 422 0.6 0.53 0.248 0.6156 0.09444 2.08804
## 423 0.5 0.36 0.275 0.6429 0.48177 2.25967
## 424 0.6 0.78 0.421 0.2170 0.31723 2.33523
## 425 1.0 0.75 0.371 0.4934 0.67906 3.29346
## 426 0.3 0.09 0.476 0.2629 0.28243 1.41133
## 427 0.3 0.69 0.683 0.3115 0.75093 2.73543
## 428 0.4 0.48 0.231 0.2237 0.83385 2.16855
## 429 0.9 0.97 0.296 0.5688 0.58093 3.31573
## 430 0.4 0.53 0.310 0.4666 0.74542 2.45202
## 431 0.3 0.76 0.776 0.8465 0.13157 2.81407
## 432 0.7 0.70 0.540 0.4008 0.85363 3.19443
## 433 0.3 0.85 0.188 0.1864 0.02070 1.54510
## 434 0.3 0.66 0.351 0.9079 0.47454 2.69344
## 435 1.0 0.36 0.138 0.6457 0.58222 2.72592
## 436 0.1 0.36 0.511 0.7417 0.67173 2.38443
## 437 0.5 0.04 0.719 0.0559 0.62386 1.93876
## 438 0.2 0.81 0.715 0.0408 0.44866 2.21446
## 439 0.6 0.57 0.144 0.2551 0.49969 2.06879
## 440 0.1 0.14 0.514 0.8641 0.91700 2.53510
## 441 0.5 0.60 0.746 0.1498 0.73321 2.72901
## 442 0.9 0.34 0.451 0.3936 0.67846 2.76306
## 443 0.3 0.23 0.491 0.0437 0.97423 2.03893
## 444 0.7 0.69 0.764 0.3254 0.89255 3.37195
## 445 0.1 0.08 0.754 0.3040 0.10921 1.34721
## 446 0.9 0.68 0.511 0.9625 0.86001 3.91351
## 447 0.9 0.02 0.639 0.0008 0.36353 1.92333
## 448 0.6 0.29 0.615 0.9768 0.51982 3.00162
## 449 0.6 0.31 0.573 0.8759 0.34621 2.70511
## 450 0.2 0.92 0.288 0.6481 0.51531 2.57141
## 451 0.3 0.63 0.997 0.1959 0.25871 2.38161
## 452 0.9 0.19 0.705 0.3793 0.31075 2.48505
## 453 1.0 0.03 0.170 0.4433 0.81031 2.45361
## 454 0.2 0.08 0.456 0.6163 0.38784 1.74014
## 455 0.7 0.48 0.981 0.2770 0.62013 3.05813
## 456 0.5 0.50 0.044 0.6476 0.22951 1.92111
## 457 0.5 0.56 0.764 0.3664 0.32996 2.52036
## 458 0.4 0.54 0.103 0.3369 0.67004 2.04994
## 459 0.1 0.93 0.134 0.2232 0.79250 2.17970
## 460 0.8 0.15 0.570 0.8158 0.80615 3.14195
## 461 0.8 0.40 0.856 0.1703 0.79659 3.02289
## 462 0.2 0.94 0.423 0.7030 0.92683 3.19283
## 463 0.8 0.04 0.856 0.9597 0.88615 3.54185
## 464 0.7 0.26 0.219 0.6638 0.33824 2.18104
## 465 0.3 0.11 0.371 0.4688 0.29392 1.54372
## 466 0.5 0.62 0.715 0.4316 0.96427 3.23087
## 467 0.6 0.14 0.947 0.9635 0.94785 3.59835
## 468 0.4 0.42 0.228 0.8647 0.44067 2.35337
## 469 0.6 0.98 0.194 0.9507 0.06125 2.78595
## 470 0.5 0.58 0.704 0.5416 0.65839 2.98399
## 471 0.9 0.10 0.554 0.3082 0.68109 2.54329
## 472 0.9 0.32 0.628 0.5550 0.11474 2.51774
## 473 0.3 0.35 0.689 0.5980 0.40587 2.34287
## 474 0.8 0.51 0.998 0.3041 0.48725 3.09935
## 475 0.3 0.18 0.029 0.0727 0.44523 1.02693
## 476 0.3 0.87 0.677 0.1724 0.49288 2.51228
## 477 0.2 0.05 0.345 0.2471 0.78399 1.62609
## 478 0.8 0.81 0.866 0.9531 0.21088 3.63998
## 479 0.9 0.81 0.448 0.4336 0.07253 2.66413
## 480 0.1 0.70 0.391 0.1532 0.49756 1.84176
## 481 0.1 0.66 0.251 0.4894 0.65835 2.15875
## 482 0.2 0.41 0.895 0.4487 0.86679 2.82049
## 483 0.8 0.46 0.615 0.3390 0.01878 2.23278
## 484 1.0 0.34 0.433 0.8268 0.49142 3.09122
## 485 0.9 0.75 0.377 0.6856 0.47741 3.19001
## 486 0.3 0.34 0.002 0.2172 0.77943 1.63863
## 487 0.1 0.95 0.917 0.2571 0.40062 2.62472
## 488 0.4 0.99 0.827 0.8081 0.10608 3.13118
## 489 0.6 0.26 0.535 0.7081 0.77882 2.88192
## 490 0.4 0.24 0.810 0.9159 0.11135 2.47725
## 491 0.4 0.17 0.848 0.1187 0.51656 2.05326
## 492 0.5 0.87 0.078 0.6606 0.27926 2.38786
## 493 0.9 0.34 0.881 0.0520 0.66762 2.84062
## 494 0.6 0.06 0.316 0.4876 0.57704 2.04064
## 495 0.2 0.45 0.713 0.5480 0.88107 2.79207
## 496 0.5 0.44 0.900 0.0731 0.79371 2.70681
## 497 0.2 0.90 0.074 0.2907 0.82769 2.29239
## 498 0.3 0.12 0.957 0.2661 0.04241 1.68551
## 499 1.0 0.18 0.396 0.8513 0.63458 3.06188
## 500 0.9 0.52 0.051 0.5073 0.09542 2.07372
# Decision tree model
rpartTree <- rpart(y ~ ., data = grandata)
tree_party <- as.party(rpartTree)
# Plot the tree
plot(tree_party, gp = gpar(fontsize = 7))
In stochastic gradient boosting the bagging fraction and learning rate will govern the construction of the trees as they are guided by the gradient. Although the optimal values of these parameters should be obtained through the tuning process, it is helpful to understand how the magnitudes of these parameters affect magnitudes of variable importance. Figure 8.24 provides the variable importance plots for boosting using two extreme values for the bagging fraction (0.1 and 0.9) and the learning rate (0.1 and 0.9) for the solubility data. The left-hand plot has both parameters set to 0.1, and the right-hand plot has both set to 0.9:
The right model focuses on a few predictors due to the high bagging faction and learning rate which makes it focus on the top predictors and less chance for other predictors. . The left model has lower bagging faction and learning rate which takes in more variables and spreads across the tree therefore showing more predictors as importance.
The left model would be more predictive of other samples since it’s taking in more variables which creates more importance predictors versus the right one who only focuses on a few predictors which may cause overfitting.
Increasing the interaction dept will make the importance of the top predictor stand out more in the models. It makes the top predictor have a bigger difference between the other predictors.
Refer to Exercises 6.3 and 7.5 which describe a chemical manufacturing process. Use the same data imputation, data splitting, and pre-processing steps as before and train several tree-based models:
set.seed(200)
# Data Splitting
data("ChemicalManufacturingProcess")
trainIndex <- createDataPartition(ChemicalManufacturingProcess$Yield, p = 0.8, list = FALSE)
train_data <- ChemicalManufacturingProcess[trainIndex, ]
test_data <- ChemicalManufacturingProcess[-trainIndex, ]
# Imputation and Preprocess
preprocess_data<- preProcess(train_data, method = "knnImpute")
trainData <- predict(preprocess_data, train_data)
testData <- predict(preprocess_data, test_data)
The cubist perform the best since it has the highest Rsquared and lowest RMSE.
Random Forest Model
set.seed(200)
rain_forest <- train(Yield ~ ., data = trainData,
method = "rf",
trControl = trainControl(method = "cv", number = 5),
importance = TRUE)
rainforest_pred <- predict(rain_forest, newdata = testData)
rf_result <- postResample(rainforest_pred, testData$Yield)
Cubist Model
set.seed(200)
cubist_model <- train(Yield ~ ., data = trainData,
method = "cubist",
trControl = trainControl(method = "cv", number = 5))
cubist_pred <- predict(cubist_model, newdata = testData)
c_result <- postResample(cubist_pred, testData$Yield)
Gradient Boosting Model (GBM) Model
set.seed(200)
gbm_model <- train(Yield ~ ., data = trainData,
method = "gbm",
trControl = trainControl(method = "cv", number = 5),
verbose = FALSE)
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
gbm_pred <- predict(gbm_model, newdata = testData)
gbm_result <- postResample(gbm_pred, testData$Yield)
# Compare performances
performance_results <- rbind(
RF = rf_result,
Cubist = c_result,
GBM = gbm_result)
print(performance_results)
## RMSE Rsquared MAE
## RF 0.7693580 0.5290745 0.5391135
## Cubist 0.6649941 0.6781984 0.5392419
## GBM 0.7250877 0.5792192 0.5375876
The most important predictors are ManufacturingProcess32 in Rainforest and GM. ManufacturingProcess17 was the most importance predictor in Cubist. The process variables dominate the list as they remain typically the top predictors on the models. Tree based models can capture complex information that linear models can not and the data may differ.
# Importance for Random Forest
rf_importance <- varImp(rain_forest, scale = FALSE)
print(rf_importance)
## rf variable importance
##
## only 20 most important variables shown (out of 57)
##
## Overall
## ManufacturingProcess32 42.411
## ManufacturingProcess17 13.903
## BiologicalMaterial11 13.171
## ManufacturingProcess31 9.102
## BiologicalMaterial12 8.489
## ManufacturingProcess06 7.346
## ManufacturingProcess39 7.138
## ManufacturingProcess13 6.893
## ManufacturingProcess10 6.787
## BiologicalMaterial03 6.353
## BiologicalMaterial04 6.151
## BiologicalMaterial06 5.781
## ManufacturingProcess19 5.617
## BiologicalMaterial01 5.384
## ManufacturingProcess28 4.848
## ManufacturingProcess09 4.833
## ManufacturingProcess34 4.737
## BiologicalMaterial02 4.613
## ManufacturingProcess20 4.584
## ManufacturingProcess26 4.350
plot(rf_importance, top = 10)
# Importance for GBM
gbm_importance <- varImp(gbm_model, scale = FALSE)
print(gbm_importance)
## gbm variable importance
##
## only 20 most important variables shown (out of 57)
##
## Overall
## ManufacturingProcess32 172.407
## ManufacturingProcess17 29.798
## ManufacturingProcess31 27.164
## BiologicalMaterial12 26.211
## ManufacturingProcess09 17.944
## BiologicalMaterial08 16.353
## BiologicalMaterial09 15.887
## ManufacturingProcess06 15.110
## ManufacturingProcess30 14.934
## ManufacturingProcess27 14.367
## ManufacturingProcess13 13.826
## BiologicalMaterial03 12.122
## ManufacturingProcess05 12.052
## BiologicalMaterial11 11.891
## ManufacturingProcess23 11.490
## ManufacturingProcess34 10.006
## ManufacturingProcess16 9.911
## ManufacturingProcess01 9.675
## ManufacturingProcess14 9.487
## ManufacturingProcess15 9.052
plot(gbm_importance, top = 10)
# Importance for Cubist
cubist_importance <- varImp(cubist_model, scale = FALSE)
print(cubist_importance)
## cubist variable importance
##
## only 20 most important variables shown (out of 57)
##
## Overall
## ManufacturingProcess17 47.0
## ManufacturingProcess32 44.5
## ManufacturingProcess13 20.5
## ManufacturingProcess39 16.5
## ManufacturingProcess04 15.5
## BiologicalMaterial12 14.5
## BiologicalMaterial06 12.0
## ManufacturingProcess26 10.5
## ManufacturingProcess33 10.0
## ManufacturingProcess10 10.0
## BiologicalMaterial02 9.5
## ManufacturingProcess29 9.5
## BiologicalMaterial09 9.5
## ManufacturingProcess09 8.0
## ManufacturingProcess30 8.0
## BiologicalMaterial08 7.0
## ManufacturingProcess01 6.5
## BiologicalMaterial10 6.0
## ManufacturingProcess14 5.5
## ManufacturingProcess21 5.0
plot(cubist_importance, top = 10)
The view of this data allows for us to see the relationship between the predictors and yield.It shows biologicaland process variables influence yield with ManufacturingProcess32 as the most important predictor.
# Single decision tree
set.seed(200)
singleTree <- rpart(Yield ~ ., data = trainData)
treeParty <- as.party(singleTree)
# Plot the tree
plot(treeParty, main = "Tree with Yield Distribution", gp=gpar(fontsize = 6))