Load libraries

library(tidyverse)
library(fpp3)
library(caret)
library(RANN)
library(mlbench)
library(nnet)
library(earth)
library(party)
library(AppliedPredictiveModeling)

Exercise 8.1

Recreate the simulated data from Exercise 7.2:

library(mlbench)
set.seed(200)
simulated <- mlbench.friedman1(200, sd = 1)
simulated <- cbind(simulated$x, simulated$y)
simulated <- as.data.frame(simulated)
colnames(simulated)[ncol(simulated)] <- "y"
  1. Fit a random forest model to all of the predictors, then estimate the variable importance scores:
library(randomForest)
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(caret)
model1 <- randomForest(y ~ ., data = simulated,
                      importance = TRUE,
                      ntree = 1000)
rfImp1 <- varImp(model1, scale = FALSE)

print(rfImp1)
##         Overall
## V1   8.62743275
## V2   6.27437240
## V3   0.72305459
## V4   7.50258584
## V5   2.13575650
## V6   0.12395003
## V7   0.02927888
## V8  -0.11724317
## V9  -0.10344797
## V10  0.04312556

Did the random forest model significantly use the uninformative predictors (V6 – V10)?

The random forest model did not use the uninformative predictors as they are lower on the importance score compared to V1 to V5.

  1. Now add an additional predictor that is highly correlated with one of the informative predictors. For example:
simulated$duplicate1 <- simulated$V1 + rnorm(200) * .1
cor(simulated$duplicate1, simulated$V1)
## [1] 0.9485201

Fit another random forest model to these data. Did the importance score for V1 change? What happens when you add another predictor that is also highly correlated with V1?

Then importance score for V1 is getting smaller and smaller as you add in additional predictors.

# Random Forest Model with the new predictor
model2 <- randomForest(y ~ ., data = simulated, 
                       importance = TRUE, 
                       ntree = 1000)
# variable importance
rfImp2 <- varImp(model2, scale = FALSE)
print(rfImp2)
##                 Overall
## V1          6.774034589
## V2          6.426340527
## V3          0.613805379
## V4          7.135941576
## V5          2.135242904
## V6          0.171933358
## V7          0.142238552
## V8         -0.073192083
## V9         -0.098719872
## V10        -0.009701234
## duplicate1  3.084990840
simulated$duplicate2 <- simulated$V1 + rnorm(200) * .1
cor(simulated$duplicate2, simulated$V1)
## [1] 0.9337221
# Random Forest Model adding predictor
model3 <- randomForest(y ~ ., data = simulated, 
                       importance = TRUE, 
                       ntree = 1000)
rfImp3 <- varImp(model3, scale = FALSE)
print(rfImp3)
##                 Overall
## V1          5.908641677
## V2          6.586726939
## V3          0.559845667
## V4          7.373782389
## V5          1.987341138
## V6          0.162417814
## V7          0.038423138
## V8          0.007497423
## V9         -0.001806331
## V10         0.004023755
## duplicate1  2.351543736
## duplicate2  2.305339113
  1. Use the cforest function in the party package to fit a random forest model using conditional inference trees. The party package function varimp can calculate predictor importance. The conditional argument of that function toggles between the traditional importance measure and the modified version described in Strobl et al. (2007). Do these importances show the same pattern as the traditional random forest model?

The importance show the same pattern as the traditional random forest model as the most important predictors are V1, V2 and V4. The difference between the traditional and modified importance was the importance scores were lower in the modified version.

# cforest with conditional inference trees
cforest_model <- cforest(y ~ ., data = simulated,
                         controls = cforest_unbiased(mtry = 3, ntree = 1000))

#importance scores from cforest with traditional importance measure
cforest_traditional <- varimp(cforest_model, conditional = FALSE)
#importance scores from cforest with modified importance measure
cforest_modified <- varimp(cforest_model, conditional = TRUE)

# Print the importance scores
print(cforest_traditional)
##          V1          V2          V3          V4          V5          V6 
##  4.85650670  4.83873965  0.06427383  5.69238840  1.74353145  0.02667855 
##          V7          V8          V9         V10  duplicate1  duplicate2 
##  0.02181069 -0.02407249 -0.02433254 -0.05178070  2.25686987  1.68311274
print(cforest_modified)
##           V1           V2           V3           V4           V5           V6 
##  1.780641419  3.587323075  0.100029765  4.247910855  1.172723715 -0.003434175 
##           V7           V8           V9          V10   duplicate1   duplicate2 
##  0.003453134  0.003450580 -0.001294559 -0.011044080  0.856867376  0.453573306
  1. Repeat this process with different tree models, such as boosted trees and Cubist. Does the same pattern occur?

In the boosted tree and cubist had the most important predictors as V1, V2, V4 and V5.

Boosted Trees

library(gbm)
## Loaded gbm 2.2.2
## This version of gbm is no longer under development. Consider transitioning to gbm3, https://github.com/gbm-developers/gbm3
gbmGrid <- expand.grid(interaction.depth = seq(1, 7, by = 2),
                       n.trees = seq(100, 1000, by = 50),
                       shrinkage = c(0.01, 0.1),
                       n.minobsinnode = c(5, 10))
set.seed(200)
# Fit a boosted tree model
gbmTune <- train(y ~ ., data = simulated, 
                 method = "gbm",
                 tuneGrid = gbmGrid,
                 verbose = FALSE)

# Get variable importance
boosted_importance <- summary(gbmTune, plot = FALSE)
print(boosted_importance)
##                   var    rel.inf
## V4                 V4 27.3493418
## V1                 V1 23.5768113
## V2                 V2 21.6040483
## V5                 V5 10.6337328
## V3                 V3  9.0076391
## duplicate2 duplicate2  2.0705780
## V6                 V6  1.5229452
## duplicate1 duplicate1  1.4176309
## V7                 V7  1.0763947
## V10               V10  0.6902218
## V9                 V9  0.5874660
## V8                 V8  0.4631900

Cubist

library(Cubist)

cubistGrid <- expand.grid(committees = c(1, 10, 20),
                          neighbors = c(0, 5, 9))
set.seed(200)

# Fit a Cubist model
cubist_model<- train(y ~ ., data = simulated, 
                    method = "cubist",
                    tuneGrid = cubistGrid,
                    trControl = trainControl(method = "cv", number = 5))

# importance
cubist_importance <- varImp(cubist_model)
print(cubist_importance)
## cubist variable importance
## 
##             Overall
## V1         100.0000
## V2          93.7984
## V3          78.2946
## V4          74.4186
## V5          50.3876
## V6          23.2558
## duplicate1   7.7519
## duplicate2   3.8760
## V8           3.1008
## V10          0.7752
## V7           0.0000
## V9           0.0000

Exercise 8.2

Use a simulation to show tree bias with different granularities

library(rpart)
library(partykit)
## Loading required package: libcoin
## Registered S3 method overwritten by 'inum':
##   method          from   
##   format.interval tsibble
## 
## Attaching package: 'partykit'
## The following objects are masked from 'package:party':
## 
##     cforest, ctree, ctree_control, edge_simple, mob, mob_control,
##     node_barplot, node_bivplot, node_boxplot, node_inner, node_surv,
##     node_terminal, varimp
# Data with different granularities
set.seed(200)
a <- sample(1:10 / 10, 500, replace = TRUE)        
b <- sample(1:100 / 100, 500, replace = TRUE)      
c <- sample(1:1000 / 1000, 500, replace = TRUE)    
d <- sample(1:10000 / 10000, 500, replace = TRUE)  
e <- sample(1:100000 / 100000, 500, replace = TRUE) 

# Response variable y is a combination of the predictors
y <- a + b + c + d + e
grandata <- data.frame(a, b, c, d, e, y)
grandata
##       a    b     c      d       e       y
## 1   0.6 0.91 0.535 0.0608 0.14553 2.25133
## 2   0.2 0.22 0.564 0.1052 0.85073 1.93993
## 3   0.8 0.13 0.819 0.4126 0.87276 3.03436
## 4   0.7 0.25 0.414 0.0165 0.33877 1.71927
## 5   0.5 0.12 0.497 0.2359 0.50319 1.85609
## 6   1.0 0.86 0.203 0.1302 0.67017 2.86337
## 7   0.2 0.28 0.981 0.7928 0.50497 2.75877
## 8   0.6 0.23 0.244 0.8967 0.93975 2.91045
## 9   0.8 0.10 0.791 0.8624 0.33879 2.89219
## 10  0.8 0.86 0.441 0.0815 0.41908 2.60158
## 11  0.4 1.00 0.841 0.7577 0.80449 3.80319
## 12  0.6 0.48 0.060 0.4320 0.59054 2.16254
## 13  0.8 0.68 0.563 0.1074 0.37325 2.52365
## 14  0.4 0.98 0.272 0.0383 0.41992 2.11022
## 15  0.6 0.69 0.148 0.6939 0.93632 3.06822
## 16  0.6 0.42 0.746 0.8496 0.05141 2.66701
## 17  0.7 0.63 0.441 0.1395 0.93291 2.84341
## 18  0.6 0.26 0.962 0.2703 0.69261 2.78491
## 19  0.3 0.60 0.248 0.9981 0.41604 2.56214
## 20  0.3 0.45 0.648 0.8801 0.79814 3.07624
## 21  0.5 0.88 0.614 0.3002 0.84196 3.13616
## 22  0.1 0.82 0.819 0.1931 0.13167 2.06377
## 23  0.6 0.46 0.498 0.4123 0.46155 2.43185
## 24  0.8 0.59 0.666 0.8953 0.84416 3.79546
## 25  0.7 0.53 0.663 0.1388 0.28847 2.32027
## 26  0.8 0.22 0.877 0.6958 0.06300 2.65580
## 27  0.2 0.95 0.846 0.4818 0.08914 2.56694
## 28  0.9 0.60 0.428 0.9531 0.34935 3.23045
## 29  0.6 0.04 0.474 0.0377 0.45198 1.60368
## 30  0.4 0.72 0.529 0.9627 0.74296 3.35466
## 31  0.6 0.50 0.981 0.2347 0.38710 2.70280
## 32  0.4 0.99 0.481 0.4496 0.50642 2.82702
## 33  0.3 0.63 0.292 0.4066 0.39413 2.02273
## 34  0.4 0.70 0.579 0.3119 0.29659 2.28749
## 35  0.3 0.51 0.439 0.3698 0.47091 2.08971
## 36  0.5 0.03 0.689 0.6837 0.85690 2.75960
## 37  0.8 0.58 0.955 0.4030 0.34088 3.07888
## 38  0.5 0.76 0.453 0.1204 0.03910 1.87250
## 39  0.6 0.86 0.901 0.6147 0.44760 3.42330
## 40  0.2 0.77 0.486 0.4126 0.84747 2.71607
## 41  0.6 0.23 0.473 0.3136 0.25012 1.86672
## 42  0.8 0.58 0.708 0.3452 0.66542 3.09862
## 43  0.6 0.35 0.986 0.3896 0.00730 2.33290
## 44  0.9 0.34 0.804 0.4905 0.86712 3.40162
## 45  0.1 0.64 0.415 0.0287 0.81881 2.00251
## 46  0.5 0.58 0.365 0.9504 0.06727 2.46267
## 47  0.6 0.94 0.493 0.0812 0.96833 3.08253
## 48  0.3 0.51 0.984 0.2047 0.56661 2.56531
## 49  0.9 0.91 0.267 0.8620 0.67180 3.61080
## 50  0.4 0.13 0.203 0.9664 0.38236 2.08176
## 51  0.6 0.32 0.601 0.5244 0.28552 2.33092
## 52  0.5 0.80 0.832 0.2879 0.48728 2.90718
## 53  0.4 0.86 0.492 0.5895 0.44888 2.79038
## 54  0.6 0.34 0.055 0.5285 0.30441 1.82791
## 55  0.8 0.04 0.796 0.8714 0.26806 2.77546
## 56  1.0 0.67 0.836 0.2944 0.35688 3.15728
## 57  0.7 0.08 0.460 0.8769 0.58515 2.70205
## 58  0.2 0.37 0.518 0.1098 0.56798 1.76578
## 59  0.7 0.69 0.958 0.4955 0.20281 3.04631
## 60  0.6 0.98 0.889 0.2144 0.96880 3.65220
## 61  1.0 0.02 0.563 0.4138 0.30818 2.30498
## 62  1.0 0.64 0.913 0.1862 0.59361 3.33281
## 63  0.3 0.47 0.977 0.4676 0.94204 3.15664
## 64  0.7 0.18 0.769 0.1094 0.83712 2.59552
## 65  0.8 0.49 0.419 0.0037 0.70520 2.41790
## 66  0.2 0.64 0.128 0.5777 0.60635 2.15205
## 67  0.1 0.63 0.379 0.5363 0.98651 2.63181
## 68  0.3 0.16 0.574 0.7000 0.52884 2.26284
## 69  0.8 0.28 0.716 0.6934 0.69524 3.18464
## 70  0.1 0.46 0.996 0.8217 0.37343 2.75113
## 71  0.7 0.59 0.307 0.5919 0.93232 3.12122
## 72  0.3 0.52 0.969 0.2196 0.75321 2.76181
## 73  0.5 0.19 0.576 0.3168 0.03031 1.61311
## 74  0.4 0.37 0.813 0.6657 0.92825 3.17695
## 75  0.3 0.34 0.301 0.8898 0.86211 2.69291
## 76  0.5 0.69 0.663 0.8163 0.85626 3.52556
## 77  0.3 0.97 0.386 0.8961 0.46923 3.02133
## 78  0.9 0.55 0.452 0.8043 0.87419 3.58049
## 79  1.0 0.74 0.915 0.0328 0.34693 3.03473
## 80  0.5 0.70 0.790 0.1246 0.12626 2.24086
## 81  0.4 0.68 0.482 0.6772 0.99239 3.23159
## 82  0.8 0.21 0.544 0.3360 0.22830 2.11830
## 83  0.2 0.60 0.645 0.6881 0.61782 2.75092
## 84  0.3 0.36 0.259 0.9604 0.31762 2.19702
## 85  1.0 0.94 0.065 0.5848 0.86315 3.45295
## 86  0.1 0.43 0.495 0.2032 0.38708 1.61528
## 87  1.0 0.38 0.648 0.2501 0.95229 3.23039
## 88  1.0 0.84 0.245 0.9270 0.52500 3.53700
## 89  1.0 0.74 0.769 0.0594 0.72312 3.29152
## 90  0.9 0.25 0.539 0.7874 0.68338 3.15978
## 91  1.0 0.84 0.332 0.6683 0.60543 3.44573
## 92  0.6 0.65 0.858 0.1118 0.05592 2.27572
## 93  0.3 0.22 0.983 0.3682 0.41183 2.28303
## 94  0.8 0.85 0.832 0.7413 0.19909 3.42239
## 95  0.6 0.62 0.188 0.5551 0.08827 2.05137
## 96  0.1 0.74 0.679 0.4118 0.51830 2.44910
## 97  0.8 0.61 0.896 0.3569 0.24953 2.91243
## 98  0.5 0.79 0.181 0.1683 0.76795 2.40725
## 99  0.2 0.22 0.727 0.6089 0.71659 2.47249
## 100 0.7 0.29 0.810 0.3530 0.73828 2.89128
## 101 0.7 0.43 0.352 0.3114 0.77835 2.57175
## 102 0.8 0.58 0.286 0.7256 0.54484 2.93644
## 103 0.5 0.04 0.040 0.0498 0.45043 1.08023
## 104 0.7 0.61 0.725 0.0457 0.17303 2.25373
## 105 0.8 0.32 0.924 0.5747 0.22960 2.84830
## 106 0.4 0.89 0.005 0.5245 0.15167 1.97117
## 107 0.9 0.30 0.359 0.7135 0.35105 2.62355
## 108 0.3 0.65 0.370 0.1144 0.91921 2.35361
## 109 1.0 0.52 0.930 0.3846 0.59560 3.43020
## 110 0.6 0.81 0.866 0.8887 0.94103 4.10573
## 111 0.3 0.98 0.308 0.2659 0.97337 2.82727
## 112 0.4 0.84 0.118 0.8612 0.92476 3.14396
## 113 0.7 0.47 0.897 0.3650 0.92204 3.35404
## 114 0.3 0.80 0.631 0.5662 0.20967 2.50687
## 115 0.4 0.21 0.982 0.3428 0.34045 2.27525
## 116 0.1 0.41 0.362 0.2381 0.79555 1.90565
## 117 0.9 0.94 0.019 0.5312 0.62350 3.01370
## 118 0.8 0.93 0.450 0.5955 0.82713 3.60263
## 119 0.7 0.74 0.281 0.7048 0.62955 3.05535
## 120 1.0 0.60 0.991 0.7671 0.19849 3.55659
## 121 1.0 0.86 0.072 0.6940 0.84094 3.46694
## 122 0.6 0.92 0.421 0.5853 0.10230 2.62860
## 123 0.1 0.13 0.556 0.3026 0.01698 1.10558
## 124 0.3 0.01 0.915 0.3397 0.21129 1.77599
## 125 0.1 0.30 0.421 0.8565 0.08982 1.76732
## 126 0.3 0.28 0.572 0.5895 0.02832 1.76982
## 127 0.7 0.95 0.427 0.0253 0.05021 2.15251
## 128 0.6 0.52 0.622 0.9310 0.17888 2.85188
## 129 0.2 0.88 0.163 0.3461 0.79458 2.38368
## 130 0.3 0.08 0.478 0.2094 0.81054 1.87794
## 131 0.1 0.14 0.654 0.9289 0.51964 2.34254
## 132 0.6 0.01 0.373 0.4451 0.34914 1.77724
## 133 0.5 0.14 0.413 0.6534 0.66665 2.37305
## 134 1.0 0.12 0.126 0.8834 0.52922 2.65862
## 135 0.5 0.42 0.682 0.2651 0.74771 2.61481
## 136 0.7 0.67 0.187 0.4166 0.35473 2.32833
## 137 0.2 0.95 0.720 0.2724 0.26590 2.40830
## 138 0.9 0.85 0.807 0.1324 0.27139 2.96079
## 139 0.4 0.40 0.409 0.0135 0.66526 1.88776
## 140 0.5 0.20 0.538 0.5626 0.01397 1.81457
## 141 0.5 0.11 0.079 0.7163 0.42085 1.82615
## 142 0.7 0.73 0.225 0.6562 0.95495 3.26615
## 143 0.9 0.29 0.350 0.3257 0.19267 2.05837
## 144 0.5 0.59 0.108 0.6787 0.69753 2.57423
## 145 1.0 0.29 0.800 0.0317 0.58967 2.71137
## 146 0.3 0.71 0.757 0.6243 0.08826 2.47956
## 147 0.8 0.27 0.434 0.0011 0.63249 2.13759
## 148 0.2 0.52 0.463 0.6737 0.71784 2.57454
## 149 0.9 0.77 0.101 0.8012 0.02311 2.59531
## 150 0.8 0.41 0.759 0.1246 0.15259 2.24619
## 151 0.1 0.68 0.657 0.2321 0.57885 2.24795
## 152 0.5 0.39 0.112 0.5402 0.29950 1.84170
## 153 1.0 0.04 0.921 0.2906 0.60260 2.85420
## 154 0.7 0.62 0.815 0.1387 0.42637 2.70007
## 155 0.2 0.72 0.422 0.6152 0.12091 2.07811
## 156 0.4 0.39 0.437 0.3363 0.01042 1.57372
## 157 1.0 0.27 0.852 0.9456 0.81584 3.88344
## 158 0.9 0.33 0.415 0.7894 0.40339 2.83779
## 159 0.2 1.00 0.130 0.5910 0.77372 2.69472
## 160 0.5 0.31 0.156 0.6826 0.89700 2.54560
## 161 0.2 0.51 0.416 0.3467 0.01265 1.48535
## 162 0.4 0.66 0.342 0.3936 0.47423 2.26983
## 163 0.2 0.72 0.576 0.5188 0.77670 2.79150
## 164 0.1 0.60 0.622 0.3305 0.63468 2.28718
## 165 0.2 0.86 0.157 0.3906 0.24913 1.85673
## 166 0.1 0.31 0.601 0.0329 0.99020 2.03410
## 167 0.1 0.94 0.541 0.6642 0.44270 2.68790
## 168 0.8 0.52 0.678 0.8706 0.30436 3.17296
## 169 0.7 0.98 0.064 0.0121 0.42157 2.17767
## 170 0.2 0.04 0.515 0.3699 0.79611 1.92101
## 171 1.0 0.46 0.712 0.4862 0.88084 3.53904
## 172 1.0 0.60 0.269 0.9915 0.41986 3.28036
## 173 0.3 0.77 0.170 0.5748 0.67009 2.48489
## 174 0.2 0.04 0.206 0.2796 0.78115 1.50675
## 175 0.9 0.39 0.649 0.8357 0.81774 3.59244
## 176 0.4 0.81 0.279 0.0700 0.98905 2.54805
## 177 0.1 0.26 0.878 0.6521 0.49515 2.38525
## 178 1.0 0.89 0.778 0.3376 0.50187 3.50747
## 179 0.3 0.68 0.357 0.5093 0.99795 2.84425
## 180 0.8 0.64 0.046 0.6325 0.24376 2.36226
## 181 0.8 0.42 0.016 0.5542 0.11380 1.90400
## 182 0.7 0.77 0.406 0.4267 0.15659 2.45929
## 183 0.6 0.44 0.411 0.1707 0.73068 2.35238
## 184 0.8 0.18 0.718 0.5659 0.35762 2.62152
## 185 0.2 0.69 0.605 0.7274 0.77966 3.00206
## 186 1.0 0.13 0.945 0.0788 0.30822 2.46202
## 187 1.0 0.14 0.185 0.8369 0.83687 2.99877
## 188 0.3 0.63 0.471 0.6555 0.76373 2.82023
## 189 0.7 0.83 0.366 0.9857 0.76669 3.64839
## 190 0.5 0.32 0.024 0.1835 0.62247 1.64997
## 191 1.0 0.44 0.310 0.1891 0.67192 2.61102
## 192 0.9 0.74 0.882 0.4973 0.62378 3.64308
## 193 1.0 0.21 0.519 0.6531 0.23405 2.61615
## 194 0.2 0.96 0.333 0.7893 0.60692 2.88922
## 195 0.4 0.96 0.103 0.4899 0.10137 2.05427
## 196 0.2 0.20 0.967 0.7796 0.73428 2.88088
## 197 1.0 0.85 0.342 0.1804 0.76181 3.13421
## 198 0.5 0.74 0.495 0.8172 0.45306 3.00526
## 199 0.2 0.28 0.736 0.2870 0.74623 2.24923
## 200 0.6 0.85 0.563 0.8282 0.86294 3.70414
## 201 0.6 0.29 0.656 0.9630 0.82273 3.33173
## 202 0.4 0.11 0.439 0.9052 0.30823 2.16243
## 203 1.0 0.20 0.027 0.6957 0.65735 2.58005
## 204 0.2 0.07 0.003 0.1021 0.26926 0.64436
## 205 1.0 0.42 0.979 0.4807 0.61781 3.49751
## 206 0.1 0.08 0.257 0.4709 0.49382 1.40172
## 207 0.5 0.05 0.553 0.5735 0.06046 1.73696
## 208 0.7 1.00 0.700 0.7495 0.26946 3.41896
## 209 0.6 0.98 0.121 0.9427 0.10576 2.74946
## 210 0.9 1.00 0.388 0.8626 0.19442 3.34502
## 211 0.4 0.15 0.438 0.0546 0.74300 1.78560
## 212 0.1 0.67 0.115 0.4638 0.89167 2.24047
## 213 0.2 0.19 0.184 0.0115 0.54415 1.12965
## 214 0.7 0.81 0.309 0.0514 0.98872 2.85912
## 215 0.3 0.80 0.517 0.5152 0.09287 2.22507
## 216 0.9 0.75 0.419 0.0619 0.23763 2.36853
## 217 0.2 0.59 0.094 0.5135 0.98981 2.38731
## 218 0.9 0.92 0.142 0.7722 0.26771 3.00191
## 219 0.3 0.74 0.066 0.9490 0.43614 2.49114
## 220 1.0 0.11 0.038 0.6530 0.92233 2.72333
## 221 0.6 0.36 0.192 0.1224 0.33633 1.61073
## 222 0.5 0.68 0.091 0.5925 0.38113 2.24463
## 223 0.6 0.77 0.240 0.5906 0.68499 2.88559
## 224 1.0 0.01 0.626 0.9414 0.33023 2.90763
## 225 0.2 0.63 0.993 0.2366 0.72994 2.78954
## 226 0.4 0.52 0.571 0.6024 0.35257 2.44597
## 227 0.7 0.79 0.116 0.0833 0.13781 1.82711
## 228 0.5 0.47 0.403 0.9803 0.23173 2.58503
## 229 0.7 0.94 0.884 0.8368 0.97026 4.33106
## 230 0.1 0.49 0.959 0.5021 0.67784 2.72894
## 231 0.8 0.50 0.848 0.6688 0.32141 3.13821
## 232 1.0 0.73 0.848 0.4654 0.32016 3.36356
## 233 0.2 0.26 0.296 0.1987 0.61467 1.56937
## 234 0.8 0.12 0.533 0.1313 0.78653 2.37083
## 235 1.0 0.81 0.135 0.3602 0.15495 2.46015
## 236 0.1 0.42 0.224 0.8089 0.87426 2.42716
## 237 0.7 0.13 0.609 0.9296 0.79432 3.16292
## 238 0.4 0.95 0.622 0.6580 0.79901 3.42901
## 239 0.6 0.50 0.966 0.8917 0.56940 3.52710
## 240 0.3 0.62 0.237 0.9019 0.30382 2.36272
## 241 0.8 0.72 0.539 0.2263 0.35291 2.63821
## 242 1.0 0.48 0.735 0.7599 0.64088 3.61578
## 243 0.9 0.63 0.360 0.1597 0.45765 2.50735
## 244 0.3 0.50 0.541 0.2895 0.60889 2.23939
## 245 0.3 0.25 0.020 0.8524 0.53375 1.95615
## 246 0.3 0.71 0.009 0.2658 0.11363 1.39843
## 247 1.0 0.16 0.283 0.5868 0.84093 2.87073
## 248 0.4 0.75 0.651 0.3419 0.94797 3.09087
## 249 0.9 0.29 0.899 0.6718 0.67060 3.43140
## 250 0.3 0.63 0.638 0.3210 0.08902 1.97802
## 251 0.4 0.71 0.864 0.8526 0.02017 2.84677
## 252 0.4 0.30 0.610 0.5103 0.89021 2.71051
## 253 0.8 0.36 0.734 0.1023 0.83134 2.82764
## 254 0.3 0.71 0.254 0.6262 0.15061 2.04081
## 255 0.1 0.86 0.240 0.4017 0.23460 1.83630
## 256 0.9 0.13 0.999 0.2338 0.64414 2.90694
## 257 0.3 0.11 0.621 0.5008 0.19023 1.72203
## 258 0.8 0.12 0.597 0.9389 0.13013 2.58603
## 259 0.8 0.15 0.012 0.8642 0.18065 2.00685
## 260 0.1 0.53 0.354 0.1758 0.56761 1.72741
## 261 1.0 0.65 0.875 0.0042 0.55747 3.08667
## 262 0.9 0.29 0.215 0.8305 0.60806 2.84356
## 263 0.3 0.14 0.982 0.7203 0.93839 3.08069
## 264 0.7 0.43 0.150 0.4108 0.99698 2.68778
## 265 0.7 0.58 0.091 0.0175 0.71255 2.10105
## 266 0.5 0.82 0.619 0.9749 0.59498 3.50888
## 267 0.1 0.20 0.581 0.5560 0.47536 1.91236
## 268 0.5 0.17 0.984 0.2480 0.91147 2.81347
## 269 0.5 0.69 0.599 0.9275 0.20823 2.92473
## 270 0.1 0.56 0.692 0.3876 0.85782 2.59742
## 271 0.2 0.61 0.022 0.0158 0.40846 1.25626
## 272 0.4 0.12 0.477 0.6067 0.27984 1.88354
## 273 0.5 0.85 0.345 0.1798 0.76244 2.63724
## 274 0.8 0.33 0.737 0.3789 0.92930 3.17520
## 275 0.1 0.35 0.911 0.5940 0.07157 2.02657
## 276 0.3 0.02 0.888 0.2260 0.45158 1.88558
## 277 0.5 0.52 0.440 0.1495 0.64264 2.25214
## 278 1.0 0.88 0.106 0.3345 0.24761 2.56811
## 279 0.1 0.02 0.553 0.2366 0.21962 1.12922
## 280 0.7 0.35 0.397 0.2401 0.73432 2.42142
## 281 0.8 0.07 0.010 0.2956 0.25597 1.43157
## 282 0.9 0.51 0.501 0.5592 0.40929 2.87949
## 283 0.7 0.63 0.577 0.8636 0.95051 3.72111
## 284 0.8 0.96 0.097 0.5983 0.09756 2.55286
## 285 0.1 0.69 0.072 0.6939 0.62563 2.18153
## 286 0.4 0.09 0.809 0.6350 0.14354 2.07754
## 287 0.9 0.61 0.108 0.1160 0.66113 2.39513
## 288 0.3 0.17 0.782 0.8122 0.67153 2.73573
## 289 1.0 0.58 0.312 0.3578 0.57188 2.82168
## 290 0.4 0.16 0.657 0.2783 0.48599 1.98129
## 291 0.9 0.13 0.773 0.1167 0.81934 2.73904
## 292 0.3 0.93 0.830 0.5833 0.46613 3.10943
## 293 0.4 0.75 0.348 0.1293 0.17645 1.80375
## 294 0.9 0.46 0.239 0.8093 0.10894 2.51724
## 295 0.8 0.52 0.085 0.3725 0.70329 2.48079
## 296 1.0 0.19 0.256 0.6219 0.82369 2.89159
## 297 0.9 0.24 0.481 0.7271 0.94183 3.28993
## 298 0.7 0.99 0.195 0.3889 0.86935 3.14325
## 299 0.8 0.02 0.171 0.8115 0.46308 2.26558
## 300 0.9 0.61 0.562 0.4445 0.22324 2.73974
## 301 0.1 0.13 0.208 0.8015 0.80847 2.04797
## 302 0.8 0.50 0.091 0.9508 0.94578 3.28758
## 303 0.2 0.58 0.493 0.4225 0.76955 2.46505
## 304 0.8 0.74 0.540 0.7344 0.63920 3.45360
## 305 0.6 0.54 0.724 0.9054 0.07885 2.84825
## 306 0.1 0.05 0.988 0.0014 0.30772 1.44712
## 307 0.7 0.60 0.072 0.5441 0.70935 2.62545
## 308 0.6 0.61 0.106 0.3075 0.53583 2.15933
## 309 0.9 0.26 0.853 0.7787 0.33958 3.13128
## 310 0.8 0.99 0.703 0.8713 0.78398 4.14828
## 311 0.1 0.63 0.991 0.9476 0.95115 3.61975
## 312 0.7 0.52 0.014 0.7625 0.95967 2.95617
## 313 0.5 0.94 0.096 0.7791 0.57290 2.88800
## 314 0.1 0.38 0.776 0.2710 0.70636 2.23336
## 315 1.0 0.96 0.018 0.0537 0.83161 2.86331
## 316 0.9 0.48 0.195 0.9381 0.50678 3.01988
## 317 0.8 0.33 0.460 0.6258 0.25488 2.47068
## 318 0.2 0.35 0.957 0.0616 0.66081 2.22941
## 319 0.8 0.55 0.113 0.4098 0.81740 2.69020
## 320 0.6 0.29 0.895 0.9403 0.95918 3.68448
## 321 0.9 0.05 0.200 0.8572 0.73766 2.74486
## 322 0.8 0.46 0.727 0.9391 0.44142 3.36752
## 323 0.7 0.76 0.545 0.8708 0.55360 3.42940
## 324 0.5 0.38 0.053 0.9945 0.07894 2.00644
## 325 0.9 0.47 0.719 0.2841 0.65038 3.02348
## 326 0.3 0.10 0.352 0.0657 0.30709 1.12479
## 327 0.3 0.09 0.914 0.4996 0.82841 2.63201
## 328 0.9 0.10 0.543 0.0164 0.75099 2.31039
## 329 0.2 0.90 0.621 0.3355 0.71663 2.77313
## 330 0.3 0.53 0.089 0.6571 0.19859 1.77469
## 331 0.9 0.91 0.039 0.3847 0.47239 2.70609
## 332 0.5 0.95 0.888 0.2763 0.26892 2.88322
## 333 0.8 0.34 0.210 0.6557 0.95772 2.96342
## 334 0.5 0.85 0.755 0.1593 0.94369 3.20799
## 335 0.8 0.28 0.150 0.9254 0.17208 2.32748
## 336 1.0 0.26 0.978 0.2448 0.09478 2.57758
## 337 0.4 0.19 0.455 0.6709 0.11007 1.82597
## 338 0.2 0.64 0.985 0.5725 0.97309 3.37059
## 339 0.4 0.25 0.594 0.3583 0.45979 2.06209
## 340 0.4 0.09 0.872 0.3376 0.32001 2.01961
## 341 0.8 0.74 0.450 0.6234 0.97111 3.58451
## 342 0.5 0.81 0.650 0.7529 0.58927 3.30217
## 343 0.2 0.06 0.048 0.5013 0.48552 1.29482
## 344 0.9 0.97 0.712 0.4753 0.50593 3.56323
## 345 0.2 0.30 0.954 0.3386 0.48187 2.27447
## 346 0.2 0.99 0.148 0.9942 0.34354 2.67574
## 347 0.7 0.80 0.252 0.4665 0.76173 2.98023
## 348 0.6 0.70 0.473 0.6482 0.56849 2.98969
## 349 0.1 0.42 0.474 0.4814 0.71122 2.18662
## 350 1.0 0.71 0.436 0.7979 0.34210 3.28600
## 351 0.6 0.96 0.381 0.9532 0.78132 3.67552
## 352 0.4 0.41 0.995 0.1420 0.72330 2.67030
## 353 0.6 0.65 0.899 0.6453 0.04622 2.84052
## 354 0.3 0.60 0.688 0.1061 0.14977 1.84387
## 355 0.9 0.37 0.551 0.8627 0.87558 3.55928
## 356 0.3 0.46 0.434 0.4467 0.88036 2.52106
## 357 0.6 0.13 0.497 0.8815 0.66696 2.77546
## 358 0.3 0.46 0.211 0.6956 0.80885 2.47545
## 359 0.4 0.81 0.025 0.9687 0.36082 2.56452
## 360 0.4 0.88 0.176 0.1062 0.18232 1.74452
## 361 0.2 0.19 0.509 0.0319 0.46000 1.39090
## 362 0.2 0.12 0.623 0.3075 0.55660 1.80710
## 363 0.3 0.75 0.291 0.8638 0.22164 2.42644
## 364 0.6 0.80 0.654 0.3409 0.49369 2.88859
## 365 0.5 0.88 0.606 0.6562 0.95472 3.59692
## 366 0.2 0.31 0.206 0.9366 0.43494 2.08754
## 367 0.8 0.90 0.208 0.1636 0.66520 2.73680
## 368 0.6 0.58 0.213 0.5925 0.45986 2.44536
## 369 1.0 0.85 0.152 0.1218 0.80655 2.93035
## 370 0.8 0.52 0.719 0.4658 0.71645 3.22125
## 371 0.7 0.57 0.833 0.0118 0.35930 2.47410
## 372 0.4 0.13 0.844 0.9734 0.18187 2.52927
## 373 0.5 0.38 0.123 0.0512 0.98643 2.04063
## 374 1.0 0.16 0.629 0.4458 0.41273 2.64753
## 375 0.8 0.87 0.749 0.0040 0.25825 2.68125
## 376 0.4 0.88 0.608 0.8164 0.60608 3.31048
## 377 0.5 0.22 0.469 0.3538 0.87754 2.42034
## 378 0.8 0.31 0.785 0.4836 0.65491 3.03351
## 379 0.7 0.94 0.686 0.4367 0.82944 3.59214
## 380 0.4 0.90 0.469 0.0340 0.02939 1.83239
## 381 0.7 0.45 0.443 0.4565 0.92423 2.97373
## 382 0.4 0.35 0.086 0.7546 0.77206 2.36266
## 383 0.2 0.95 0.390 0.3014 0.39726 2.23866
## 384 0.2 0.25 0.905 0.8328 0.31144 2.49924
## 385 0.7 0.07 0.788 0.9500 0.70974 3.21774
## 386 0.4 0.70 0.859 0.7985 0.79348 3.55098
## 387 0.7 0.40 0.399 0.9612 0.47465 2.93485
## 388 0.9 0.92 0.321 0.3859 0.36831 2.89521
## 389 0.2 0.28 0.429 0.2098 0.79891 1.91771
## 390 0.1 0.61 0.302 0.0931 0.30137 1.40647
## 391 0.5 0.52 0.408 0.7792 0.12040 2.32760
## 392 0.3 0.84 0.105 0.3339 0.63172 2.21062
## 393 0.9 0.50 0.291 0.7580 0.67211 3.12111
## 394 0.9 0.23 0.555 0.3756 0.42758 2.48818
## 395 0.2 0.33 0.784 0.9507 0.12017 2.38487
## 396 0.6 0.75 0.760 0.1658 0.63246 2.90826
## 397 0.1 0.15 0.339 0.1228 0.08755 0.79935
## 398 0.8 0.68 0.105 0.5512 0.92717 3.06337
## 399 0.1 0.34 0.426 0.5298 0.86498 2.26078
## 400 0.6 0.60 0.165 0.7188 0.87193 2.95573
## 401 0.1 0.67 0.621 0.4905 0.48220 2.36370
## 402 0.9 0.30 0.658 0.9933 0.04004 2.89134
## 403 0.3 0.14 0.631 0.0778 0.45801 1.60681
## 404 0.8 0.58 0.682 0.3553 0.33127 2.74857
## 405 0.6 0.93 0.481 0.8629 0.54933 3.42323
## 406 0.3 0.29 0.641 0.9842 0.20412 2.41932
## 407 0.5 0.92 0.249 0.6034 0.40600 2.67840
## 408 0.5 0.37 0.357 0.9543 0.93212 3.11342
## 409 0.3 0.42 0.529 0.9399 0.75335 2.94225
## 410 0.2 0.76 0.083 0.0539 0.59511 1.69201
## 411 0.7 0.97 0.635 0.3935 0.05992 2.75842
## 412 0.2 1.00 0.647 0.8587 0.24254 2.94824
## 413 0.7 0.81 0.446 0.9540 0.61518 3.52518
## 414 1.0 0.87 0.268 0.3653 0.98985 3.49315
## 415 0.8 0.80 0.765 0.6711 0.65482 3.69092
## 416 0.4 0.08 0.091 0.2716 0.54855 1.39115
## 417 0.9 0.67 0.927 0.0326 0.69980 3.22940
## 418 1.0 0.63 0.674 0.8270 0.04801 3.17901
## 419 0.4 0.64 0.101 0.4266 0.47200 2.03960
## 420 0.7 0.36 0.693 0.7895 0.75911 3.30161
## 421 0.9 0.16 0.125 0.9672 0.36324 2.51544
## 422 0.6 0.53 0.248 0.6156 0.09444 2.08804
## 423 0.5 0.36 0.275 0.6429 0.48177 2.25967
## 424 0.6 0.78 0.421 0.2170 0.31723 2.33523
## 425 1.0 0.75 0.371 0.4934 0.67906 3.29346
## 426 0.3 0.09 0.476 0.2629 0.28243 1.41133
## 427 0.3 0.69 0.683 0.3115 0.75093 2.73543
## 428 0.4 0.48 0.231 0.2237 0.83385 2.16855
## 429 0.9 0.97 0.296 0.5688 0.58093 3.31573
## 430 0.4 0.53 0.310 0.4666 0.74542 2.45202
## 431 0.3 0.76 0.776 0.8465 0.13157 2.81407
## 432 0.7 0.70 0.540 0.4008 0.85363 3.19443
## 433 0.3 0.85 0.188 0.1864 0.02070 1.54510
## 434 0.3 0.66 0.351 0.9079 0.47454 2.69344
## 435 1.0 0.36 0.138 0.6457 0.58222 2.72592
## 436 0.1 0.36 0.511 0.7417 0.67173 2.38443
## 437 0.5 0.04 0.719 0.0559 0.62386 1.93876
## 438 0.2 0.81 0.715 0.0408 0.44866 2.21446
## 439 0.6 0.57 0.144 0.2551 0.49969 2.06879
## 440 0.1 0.14 0.514 0.8641 0.91700 2.53510
## 441 0.5 0.60 0.746 0.1498 0.73321 2.72901
## 442 0.9 0.34 0.451 0.3936 0.67846 2.76306
## 443 0.3 0.23 0.491 0.0437 0.97423 2.03893
## 444 0.7 0.69 0.764 0.3254 0.89255 3.37195
## 445 0.1 0.08 0.754 0.3040 0.10921 1.34721
## 446 0.9 0.68 0.511 0.9625 0.86001 3.91351
## 447 0.9 0.02 0.639 0.0008 0.36353 1.92333
## 448 0.6 0.29 0.615 0.9768 0.51982 3.00162
## 449 0.6 0.31 0.573 0.8759 0.34621 2.70511
## 450 0.2 0.92 0.288 0.6481 0.51531 2.57141
## 451 0.3 0.63 0.997 0.1959 0.25871 2.38161
## 452 0.9 0.19 0.705 0.3793 0.31075 2.48505
## 453 1.0 0.03 0.170 0.4433 0.81031 2.45361
## 454 0.2 0.08 0.456 0.6163 0.38784 1.74014
## 455 0.7 0.48 0.981 0.2770 0.62013 3.05813
## 456 0.5 0.50 0.044 0.6476 0.22951 1.92111
## 457 0.5 0.56 0.764 0.3664 0.32996 2.52036
## 458 0.4 0.54 0.103 0.3369 0.67004 2.04994
## 459 0.1 0.93 0.134 0.2232 0.79250 2.17970
## 460 0.8 0.15 0.570 0.8158 0.80615 3.14195
## 461 0.8 0.40 0.856 0.1703 0.79659 3.02289
## 462 0.2 0.94 0.423 0.7030 0.92683 3.19283
## 463 0.8 0.04 0.856 0.9597 0.88615 3.54185
## 464 0.7 0.26 0.219 0.6638 0.33824 2.18104
## 465 0.3 0.11 0.371 0.4688 0.29392 1.54372
## 466 0.5 0.62 0.715 0.4316 0.96427 3.23087
## 467 0.6 0.14 0.947 0.9635 0.94785 3.59835
## 468 0.4 0.42 0.228 0.8647 0.44067 2.35337
## 469 0.6 0.98 0.194 0.9507 0.06125 2.78595
## 470 0.5 0.58 0.704 0.5416 0.65839 2.98399
## 471 0.9 0.10 0.554 0.3082 0.68109 2.54329
## 472 0.9 0.32 0.628 0.5550 0.11474 2.51774
## 473 0.3 0.35 0.689 0.5980 0.40587 2.34287
## 474 0.8 0.51 0.998 0.3041 0.48725 3.09935
## 475 0.3 0.18 0.029 0.0727 0.44523 1.02693
## 476 0.3 0.87 0.677 0.1724 0.49288 2.51228
## 477 0.2 0.05 0.345 0.2471 0.78399 1.62609
## 478 0.8 0.81 0.866 0.9531 0.21088 3.63998
## 479 0.9 0.81 0.448 0.4336 0.07253 2.66413
## 480 0.1 0.70 0.391 0.1532 0.49756 1.84176
## 481 0.1 0.66 0.251 0.4894 0.65835 2.15875
## 482 0.2 0.41 0.895 0.4487 0.86679 2.82049
## 483 0.8 0.46 0.615 0.3390 0.01878 2.23278
## 484 1.0 0.34 0.433 0.8268 0.49142 3.09122
## 485 0.9 0.75 0.377 0.6856 0.47741 3.19001
## 486 0.3 0.34 0.002 0.2172 0.77943 1.63863
## 487 0.1 0.95 0.917 0.2571 0.40062 2.62472
## 488 0.4 0.99 0.827 0.8081 0.10608 3.13118
## 489 0.6 0.26 0.535 0.7081 0.77882 2.88192
## 490 0.4 0.24 0.810 0.9159 0.11135 2.47725
## 491 0.4 0.17 0.848 0.1187 0.51656 2.05326
## 492 0.5 0.87 0.078 0.6606 0.27926 2.38786
## 493 0.9 0.34 0.881 0.0520 0.66762 2.84062
## 494 0.6 0.06 0.316 0.4876 0.57704 2.04064
## 495 0.2 0.45 0.713 0.5480 0.88107 2.79207
## 496 0.5 0.44 0.900 0.0731 0.79371 2.70681
## 497 0.2 0.90 0.074 0.2907 0.82769 2.29239
## 498 0.3 0.12 0.957 0.2661 0.04241 1.68551
## 499 1.0 0.18 0.396 0.8513 0.63458 3.06188
## 500 0.9 0.52 0.051 0.5073 0.09542 2.07372
# Decision tree model
rpartTree <- rpart(y ~ ., data = grandata)
tree_party <- as.party(rpartTree)

# Plot the tree 
plot(tree_party, gp = gpar(fontsize = 7))

Exercise 8.3

In stochastic gradient boosting the bagging fraction and learning rate will govern the construction of the trees as they are guided by the gradient. Although the optimal values of these parameters should be obtained through the tuning process, it is helpful to understand how the magnitudes of these parameters affect magnitudes of variable importance. Figure 8.24 provides the variable importance plots for boosting using two extreme values for the bagging fraction (0.1 and 0.9) and the learning rate (0.1 and 0.9) for the solubility data. The left-hand plot has both parameters set to 0.1, and the right-hand plot has both set to 0.9:

  1. Why does the model on the right focus its importance on just the first few of predictors, whereas the model on the left spreads importance across more predictors?

The right model focuses on a few predictors due to the high bagging faction and learning rate which makes it focus on the top predictors and less chance for other predictors. . The left model has lower bagging faction and learning rate which takes in more variables and spreads across the tree therefore showing more predictors as importance.

  1. Which model do you think would be more predictive of other samples?

The left model would be more predictive of other samples since it’s taking in more variables which creates more importance predictors versus the right one who only focuses on a few predictors which may cause overfitting.

  1. How would increasing interaction depth affect the slope of predictor importance for either model in Fig. 8.24?

Increasing the interaction dept will make the importance of the top predictor stand out more in the models. It makes the top predictor have a bigger difference between the other predictors.

Exercise 8.7

Refer to Exercises 6.3 and 7.5 which describe a chemical manufacturing process. Use the same data imputation, data splitting, and pre-processing steps as before and train several tree-based models:

set.seed(200)
# Data Splitting
data("ChemicalManufacturingProcess")
trainIndex <- createDataPartition(ChemicalManufacturingProcess$Yield, p = 0.8, list = FALSE)
train_data <- ChemicalManufacturingProcess[trainIndex, ]
test_data <- ChemicalManufacturingProcess[-trainIndex, ]

# Imputation and Preprocess
preprocess_data<- preProcess(train_data, method = "knnImpute")
trainData <- predict(preprocess_data, train_data)
testData <- predict(preprocess_data, test_data)
  1. Which tree-based regression model gives the optimal resampling and test set performance?

The cubist perform the best since it has the highest Rsquared and lowest RMSE.

Random Forest Model

set.seed(200)
rain_forest <- train(Yield ~ ., data = trainData,
                 method = "rf",
                 trControl = trainControl(method = "cv", number = 5),
                 importance = TRUE)

rainforest_pred <- predict(rain_forest, newdata = testData)
rf_result <- postResample(rainforest_pred, testData$Yield)

Cubist Model

set.seed(200)
cubist_model <- train(Yield ~ ., data = trainData,
                     method = "cubist",
                     trControl = trainControl(method = "cv", number = 5))

cubist_pred <- predict(cubist_model, newdata = testData)
c_result <- postResample(cubist_pred, testData$Yield)

Gradient Boosting Model (GBM) Model

set.seed(200)
gbm_model <- train(Yield ~ ., data = trainData,
                  method = "gbm",
                  trControl = trainControl(method = "cv", number = 5),
                  verbose = FALSE)
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
gbm_pred <- predict(gbm_model, newdata = testData)
gbm_result <- postResample(gbm_pred, testData$Yield)
# Compare performances
performance_results <- rbind(
  RF = rf_result,
  Cubist = c_result,
  GBM = gbm_result)

print(performance_results)
##             RMSE  Rsquared       MAE
## RF     0.7693580 0.5290745 0.5391135
## Cubist 0.6649941 0.6781984 0.5392419
## GBM    0.7250877 0.5792192 0.5375876
  1. Which predictors are most important in the optimal tree-based regression model? Do either the biological or process variables dominate the list? How do the top 10 important predictors compare to the top 10 predictors from the optimal linear and nonlinear models?

The most important predictors are ManufacturingProcess32 in Rainforest and GM. ManufacturingProcess17 was the most importance predictor in Cubist. The process variables dominate the list as they remain typically the top predictors on the models. Tree based models can capture complex information that linear models can not and the data may differ.

# Importance for Random Forest
rf_importance <- varImp(rain_forest, scale = FALSE)
print(rf_importance)
## rf variable importance
## 
##   only 20 most important variables shown (out of 57)
## 
##                        Overall
## ManufacturingProcess32  42.411
## ManufacturingProcess17  13.903
## BiologicalMaterial11    13.171
## ManufacturingProcess31   9.102
## BiologicalMaterial12     8.489
## ManufacturingProcess06   7.346
## ManufacturingProcess39   7.138
## ManufacturingProcess13   6.893
## ManufacturingProcess10   6.787
## BiologicalMaterial03     6.353
## BiologicalMaterial04     6.151
## BiologicalMaterial06     5.781
## ManufacturingProcess19   5.617
## BiologicalMaterial01     5.384
## ManufacturingProcess28   4.848
## ManufacturingProcess09   4.833
## ManufacturingProcess34   4.737
## BiologicalMaterial02     4.613
## ManufacturingProcess20   4.584
## ManufacturingProcess26   4.350
plot(rf_importance, top = 10)

# Importance for GBM
gbm_importance <- varImp(gbm_model, scale = FALSE)
print(gbm_importance)
## gbm variable importance
## 
##   only 20 most important variables shown (out of 57)
## 
##                        Overall
## ManufacturingProcess32 172.407
## ManufacturingProcess17  29.798
## ManufacturingProcess31  27.164
## BiologicalMaterial12    26.211
## ManufacturingProcess09  17.944
## BiologicalMaterial08    16.353
## BiologicalMaterial09    15.887
## ManufacturingProcess06  15.110
## ManufacturingProcess30  14.934
## ManufacturingProcess27  14.367
## ManufacturingProcess13  13.826
## BiologicalMaterial03    12.122
## ManufacturingProcess05  12.052
## BiologicalMaterial11    11.891
## ManufacturingProcess23  11.490
## ManufacturingProcess34  10.006
## ManufacturingProcess16   9.911
## ManufacturingProcess01   9.675
## ManufacturingProcess14   9.487
## ManufacturingProcess15   9.052
plot(gbm_importance, top = 10)

# Importance for Cubist
cubist_importance <- varImp(cubist_model, scale = FALSE)
print(cubist_importance)
## cubist variable importance
## 
##   only 20 most important variables shown (out of 57)
## 
##                        Overall
## ManufacturingProcess17    47.0
## ManufacturingProcess32    44.5
## ManufacturingProcess13    20.5
## ManufacturingProcess39    16.5
## ManufacturingProcess04    15.5
## BiologicalMaterial12      14.5
## BiologicalMaterial06      12.0
## ManufacturingProcess26    10.5
## ManufacturingProcess33    10.0
## ManufacturingProcess10    10.0
## BiologicalMaterial02       9.5
## ManufacturingProcess29     9.5
## BiologicalMaterial09       9.5
## ManufacturingProcess09     8.0
## ManufacturingProcess30     8.0
## BiologicalMaterial08       7.0
## ManufacturingProcess01     6.5
## BiologicalMaterial10       6.0
## ManufacturingProcess14     5.5
## ManufacturingProcess21     5.0
plot(cubist_importance, top = 10)

  1. Plot the optimal single tree with the distribution of yield in the terminal nodes. Does this view of the data provide additional knowledge about the biological or process predictors and their relationship with yield?

The view of this data allows for us to see the relationship between the predictors and yield.It shows biologicaland process variables influence yield with ManufacturingProcess32 as the most important predictor.

# Single decision tree
set.seed(200)
singleTree <- rpart(Yield ~ ., data = trainData)

treeParty <- as.party(singleTree)

# Plot the tree
plot(treeParty, main = "Tree with Yield Distribution", gp=gpar(fontsize = 6))