library(AppliedPredictiveModeling)
library(psych)
library(reshape2)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x ggplot2::%+%()   masks psych::%+%()
## x ggplot2::alpha() masks psych::alpha()
## x dplyr::filter()  masks stats::filter()
## x dplyr::lag()     masks stats::lag()
library(mice)
## 
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
## 
##     filter
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
library(caTools)
library(elasticnet)
## Loading required package: lars
## Loaded lars 1.2
## 
## Attaching package: 'lars'
## The following object is masked from 'package:psych':
## 
##     error.bars
library(pls)
## 
## Attaching package: 'pls'
## The following object is masked from 'package:caret':
## 
##     R2
## The following object is masked from 'package:stats':
## 
##     loadings
library(mlbench)
library(earth)
## Loading required package: Formula
## Loading required package: plotmo
## Loading required package: plotrix
## 
## Attaching package: 'plotrix'
## The following object is masked from 'package:psych':
## 
##     rescale
## Loading required package: TeachingDemos
library(kernlab)
## 
## Attaching package: 'kernlab'
## The following object is masked from 'package:purrr':
## 
##     cross
## The following object is masked from 'package:ggplot2':
## 
##     alpha
## The following object is masked from 'package:psych':
## 
##     alpha
library(nnet)
library(party)
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## 
## Attaching package: 'modeltools'
## The following object is masked from 'package:kernlab':
## 
##     prior
## Loading required package: strucchange
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: sandwich
## 
## Attaching package: 'strucchange'
## The following object is masked from 'package:stringr':
## 
##     boundary
library(gbm)
## Loaded gbm 2.1.8
library(rpart)
library(Cubist)

Exercise 8.1

Recreate the simulated data from Exercise 7.2:

library(mlbench)
set.seed(200)
simulated<-mlbench.friedman1(200, sd=1)
simulated<-cbind(simulated$x, simulated$y)
simulated<-as.data.frame(simulated)
colnames(simulated)[ncol(simulated)]<-"y"

(a)

Fit a random forest model to all of the predictors, then estimate the variable importance scores:

library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin
## The following object is masked from 'package:psych':
## 
##     outlier
library(caret)
model1<-randomForest(y~., data=simulated,
                     importance=TRUE,
                     ntree=1000)

rfImp1<-varImp(model1, scale=FALSE)

model1
## 
## Call:
##  randomForest(formula = y ~ ., data = simulated, importance = TRUE,      ntree = 1000) 
##                Type of random forest: regression
##                      Number of trees: 1000
## No. of variables tried at each split: 3
## 
##           Mean of squared residuals: 6.754258
##                     % Var explained: 72.3
rfImp1
##          Overall
## V1   8.732235404
## V2   6.415369387
## V3   0.763591825
## V4   7.615118809
## V5   2.023524577
## V6   0.165111172
## V7  -0.005961659
## V8  -0.166362581
## V9  -0.095292651
## V10 -0.074944788
varImpPlot(model1)

Did the random forest model significantly use the uninformative predictors (v6-V10)?

DISCUSSION:

The importance scores of v6-v10 are much smaller than the most important predictors v1-15. The random forest model did not significanly use the uninformative predictors v6-10.

(b)

Now add an additional predictor that is highly correlated with one of the informative predictors. For example:

simulated$duplicate1<-simulated$V1 +rnorm(200)*.1
cor(simulated$duplicate1, simulated$V1)
## [1] 0.9460206

Fit another random forest model to these data.

model2<-randomForest(y~., data=simulated,
                     importance=TRUE,
                     ntree=1000)

rfImp2<-varImp(model2, scale=FALSE)

model2
## 
## Call:
##  randomForest(formula = y ~ ., data = simulated, importance = TRUE,      ntree = 1000) 
##                Type of random forest: regression
##                      Number of trees: 1000
## No. of variables tried at each split: 3
## 
##           Mean of squared residuals: 6.922537
##                     % Var explained: 71.61
rfImp2
##                Overall
## V1          5.69119973
## V2          6.06896061
## V3          0.62970218
## V4          7.04752238
## V5          1.87238438
## V6          0.13569065
## V7         -0.01345645
## V8         -0.04370565
## V9          0.00840438
## V10         0.02894814
## duplicate1  4.28331581
varImpPlot(model2)

Did the importance score for V1 change?

DISCUSSION:

The importance score for V1 dropped from 8.7 to 5.8 when adding the highly correlated variable. The duplicate variable was named an important variables and uninformative variables became more uninformative.

(b) cont

What happens when you add another predictor that is also highly correlated with V1?

simulated$duplicate2<-simulated$V1 +(rnorm(200)*.1+3)
cor(simulated$duplicate2, simulated$V1)
## [1] 0.9408631
model3<-randomForest(y~., data=simulated,
                     importance=TRUE,
                     ntree=1000)

rfImp3<-varImp(model3, scale=FALSE)

model3
## 
## Call:
##  randomForest(formula = y ~ ., data = simulated, importance = TRUE,      ntree = 1000) 
##                Type of random forest: regression
##                      Number of trees: 1000
## No. of variables tried at each split: 4
## 
##           Mean of squared residuals: 6.784205
##                     % Var explained: 72.18
rfImp3
##                Overall
## V1          4.91687329
## V2          6.52816504
## V3          0.58711552
## V4          7.04870917
## V5          2.03115561
## V6          0.14213148
## V7          0.10991985
## V8         -0.08405687
## V9         -0.01075028
## V10         0.09230576
## duplicate1  3.80068234
## duplicate2  1.87721959
varImpPlot(model3)

What happens when you add another predictor that is also highly correlated with V1?

DISCUSSION:

Because V1 was highly imporant, its highly correlated varable is highly important as well. Adding this, took some importance away from the V1 and a smaller amount of importance from dup1.

(c)

Use the cforest function in the party package to fit a random forest model using conditional inference trees. The party package function varimp can calculate predictor importance. The conditional argument of that function toggles between the traditional immportance measure and the modified version described in STRobl et al. (2007). Do these importances show the same pattern as the traditional random forest model?

simulated_orig <- subset(simulated, select =c( -duplicate1,-duplicate2))

modelCF <- cforest(y ~ ., data = simulated_orig)
CF_imp <- varimp(modelCF)


CF_imp<-varImp(modelCF)

modelCF
## 
##   Random Forest using Conditional Inference Trees
## 
## Number of trees:  500 
## 
## Response:  y 
## Inputs:  V1, V2, V3, V4, V5, V6, V7, V8, V9, V10 
## Number of observations:  200
CF_imp
##          Overall
## V1   9.088406489
## V2   6.616405032
## V3  -0.007299805
## V4   8.193748339
## V5   1.879879716
## V6  -0.058979879
## V7   0.028906702
## V8  -0.011912122
## V9  -0.046523639
## V10 -0.041972268
# Vs
rfImp1
##          Overall
## V1   8.732235404
## V2   6.415369387
## V3   0.763591825
## V4   7.615118809
## V5   2.023524577
## V6   0.165111172
## V7  -0.005961659
## V8  -0.166362581
## V9  -0.095292651
## V10 -0.074944788

DISCUSSION:

The top 5 variables remained important, however, this random forest model using conditional inference trees put more emphasis on variable 4 and less on variable 3.

(d)

Repeat this process with different tree models, such as boosted trees and Cubist. Does the same pattern occur?

#boosted trees
model_Boost <- gbm(y ~ ., data = simulated_orig, distribution="gaussian")
summary(model_Boost)

##     var    rel.inf
## V4   V4 33.1328535
## V1   V1 27.7045971
## V2   V2 21.8020792
## V5   V5  9.6321219
## V3   V3  7.5195471
## V6   V6  0.2088014
## V7   V7  0.0000000
## V8   V8  0.0000000
## V9   V9  0.0000000
## V10 V10  0.0000000

DISCUSSION

While the top 5 remain important in the boosted model, Var4 is the most important.

simulated_orig2<-subset(simulated, select =c(-y))
model_cubist <- cubist(simulated_orig2, simulated_orig$y)
model_cubist
## 
## Call:
## cubist.default(x = simulated_orig2, y = simulated_orig$y)
## 
## Number of samples: 200 
## Number of predictors: 12 
## 
## Number of committees: 1 
## Number of rules: 1
summary(model_cubist)
## 
## Call:
## cubist.default(x = simulated_orig2, y = simulated_orig$y)
## 
## 
## Cubist [Release 2.07 GPL Edition]  Sun Nov 21 16:47:24 2021
## ---------------------------------
## 
##     Target attribute `outcome'
## 
## Read 200 cases (13 attributes) from undefined.data
## 
## Model:
## 
##   Rule 1: [200 cases, mean 14.416183, range 3.55596 to 28.38167, est err 1.936506]
## 
##  outcome = 0.269253 + 8.9 V4 + 7.1 V2 + 5.1 V5 + 4.8 V1 + 3.2 duplicate1
## 
## 
## Evaluation on training data (200 cases):
## 
##     Average  |error|           2.012236
##     Relative |error|               0.50
##     Correlation coefficient        0.87
## 
## 
##  Attribute usage:
##    Conds  Model
## 
##           100%    V1
##           100%    V2
##           100%    V4
##           100%    V5
##           100%    duplicate1
## 
## 
## Time: 0.0 secs
varImp(model_cubist)
##            Overall
## V1              50
## V2              50
## V4              50
## V5              50
## duplicate1      50
## V3               0
## V6               0
## V7               0
## V8               0
## V9               0
## V10              0
## duplicate2       0

DISCUSSION:

Cubist only uses variables: 1,2,4,5

Exercise 8.2

Use simulation to show tree bias with different granularities.

library(rpart.plot)
example <- twoClassSim(100, linearVars=10,
                       noiseVars = 6,
                       corrVar=4,
                       corrValue = 0.8,
                        mislabel= 0) 

fit <- rpart(Linear01 ~ ., example)
rpart.plot(fit)

### DISCUSSION: FROM the text “Finally, these trees suffer from selection bias: predictors with a higher number of distinct values are favored over more granular predictors….”The danger occurs when a data set consists of a mix of informative and noise variables and the noise variables have many more splits than the informative variables. Then there is a high probability that the noise variables will be chosen to split the top nodes of the tree. Pruning will produce either a tree with misleading struture or no tree at all."

Exercise 8.3

In stochastic gradient boosting the bagging fraction and learning rate will govern the construction of the trees as they are guided by the gradient. Although the optimal values of these parameters should be obtained through the tuning process, it is helpful to understand how the magnitudes of these parameters affect magnitudes of variable importance. Figure 8.24 provides the variable importance plots for boosting using two extreme values for the bagging fraction (0.1 nad 0.9) and the learning rate (0.1 and 0.9) for the solubility data. The left-had plot has both paramters set to 0.1, and the right-hand plot has both set to 0.9:

  1. Why does the model on the right focus its importance on just the first few predictors, whereas the model on the left spreads importance across more predictors?

DISCUSSION:

The learning rate can take values [0,1] and is a tuning parameter for the model. The gradient boosting could be subject to overfitting by selecting the optimal learner. Some suggest small values of the learning parameter work best, but this requires computing time.

Random sampling of bagging reduced prediction variance. Friedman inserted the bagging fraction, the fraction of the training data used, as a tuning parameter for the model. This modification improved prediction while reducing computing sources and Friedman suggests a bagging fraction of .5.

The effect of a small learning rate may tend to overfit the model.

The effect of a small bagging rate may allow other variables to be randomly selected.

The right hand plot is a depiction of a large bagging rate and large learning rate.

  1. Which model do you think would be more predictive of other samples?

DISCUSSION:

These two models are the extremes. A review of the RMSE would assist in picking a model. However, Ridgeway suggests that small values of the learning parameter are best, with Friedman suggesting a bagging fracture of .5, the left-hand model may serve better.

  1. How would increasing interaction depth affect the slope of predictor importance for either model in Fig. 8.24?

DISCUSSION:

Increasing interaction depth is tree depths or nodes. If the tree is grown it may overfit and may need to be pruned. To further increase nodes will add complexity. Increasing the nodes may bring more predictors in or form partitions on existing predictors that overlap.

Exercise 8.7

Refer to Exercises 6.3 and 7.5 which describe a chemical manufacturing process. Us the same data imputation, data splitting and pre-processing steps as before and train several tree-based models.

Exercise 6.3 describes data for a chemical manufacturing process. Use the same data imputation, data splitting, and pre-processing steps as before and train several non linear regression models.

Impute, datasplit and preprocess…..

library(AppliedPredictiveModeling)
data("ChemicalManufacturingProcess")
dim(ChemicalManufacturingProcess)
## [1] 176  58
str(ChemicalManufacturingProcess)
## 'data.frame':    176 obs. of  58 variables:
##  $ Yield                 : num  38 42.4 42 41.4 42.5 ...
##  $ BiologicalMaterial01  : num  6.25 8.01 8.01 8.01 7.47 6.12 7.48 6.94 6.94 6.94 ...
##  $ BiologicalMaterial02  : num  49.6 61 61 61 63.3 ...
##  $ BiologicalMaterial03  : num  57 67.5 67.5 67.5 72.2 ...
##  $ BiologicalMaterial04  : num  12.7 14.6 14.6 14.6 14 ...
##  $ BiologicalMaterial05  : num  19.5 19.4 19.4 19.4 17.9 ...
##  $ BiologicalMaterial06  : num  43.7 53.1 53.1 53.1 54.7 ...
##  $ BiologicalMaterial07  : num  100 100 100 100 100 100 100 100 100 100 ...
##  $ BiologicalMaterial08  : num  16.7 19 19 19 18.2 ...
##  $ BiologicalMaterial09  : num  11.4 12.6 12.6 12.6 12.8 ...
##  $ BiologicalMaterial10  : num  3.46 3.46 3.46 3.46 3.05 3.78 3.04 3.85 3.85 3.85 ...
##  $ BiologicalMaterial11  : num  138 154 154 154 148 ...
##  $ BiologicalMaterial12  : num  18.8 21.1 21.1 21.1 21.1 ...
##  $ ManufacturingProcess01: num  NA 0 0 0 10.7 12 11.5 12 12 12 ...
##  $ ManufacturingProcess02: num  NA 0 0 0 0 0 0 0 0 0 ...
##  $ ManufacturingProcess03: num  NA NA NA NA NA NA 1.56 1.55 1.56 1.55 ...
##  $ ManufacturingProcess04: num  NA 917 912 911 918 924 933 929 928 938 ...
##  $ ManufacturingProcess05: num  NA 1032 1004 1015 1028 ...
##  $ ManufacturingProcess06: num  NA 210 207 213 206 ...
##  $ ManufacturingProcess07: num  NA 177 178 177 178 178 177 178 177 177 ...
##  $ ManufacturingProcess08: num  NA 178 178 177 178 178 178 178 177 177 ...
##  $ ManufacturingProcess09: num  43 46.6 45.1 44.9 45 ...
##  $ ManufacturingProcess10: num  NA NA NA NA NA NA 11.6 10.2 9.7 10.1 ...
##  $ ManufacturingProcess11: num  NA NA NA NA NA NA 11.5 11.3 11.1 10.2 ...
##  $ ManufacturingProcess12: num  NA 0 0 0 0 0 0 0 0 0 ...
##  $ ManufacturingProcess13: num  35.5 34 34.8 34.8 34.6 34 32.4 33.6 33.9 34.3 ...
##  $ ManufacturingProcess14: num  4898 4869 4878 4897 4992 ...
##  $ ManufacturingProcess15: num  6108 6095 6087 6102 6233 ...
##  $ ManufacturingProcess16: num  4682 4617 4617 4635 4733 ...
##  $ ManufacturingProcess17: num  35.5 34 34.8 34.8 33.9 33.4 33.8 33.6 33.9 35.3 ...
##  $ ManufacturingProcess18: num  4865 4867 4877 4872 4886 ...
##  $ ManufacturingProcess19: num  6049 6097 6078 6073 6102 ...
##  $ ManufacturingProcess20: num  4665 4621 4621 4611 4659 ...
##  $ ManufacturingProcess21: num  0 0 0 0 -0.7 -0.6 1.4 0 0 1 ...
##  $ ManufacturingProcess22: num  NA 3 4 5 8 9 1 2 3 4 ...
##  $ ManufacturingProcess23: num  NA 0 1 2 4 1 1 2 3 1 ...
##  $ ManufacturingProcess24: num  NA 3 4 5 18 1 1 2 3 4 ...
##  $ ManufacturingProcess25: num  4873 4869 4897 4892 4930 ...
##  $ ManufacturingProcess26: num  6074 6107 6116 6111 6151 ...
##  $ ManufacturingProcess27: num  4685 4630 4637 4630 4684 ...
##  $ ManufacturingProcess28: num  10.7 11.2 11.1 11.1 11.3 11.4 11.2 11.1 11.3 11.4 ...
##  $ ManufacturingProcess29: num  21 21.4 21.3 21.3 21.6 21.7 21.2 21.2 21.5 21.7 ...
##  $ ManufacturingProcess30: num  9.9 9.9 9.4 9.4 9 10.1 11.2 10.9 10.5 9.8 ...
##  $ ManufacturingProcess31: num  69.1 68.7 69.3 69.3 69.4 68.2 67.6 67.9 68 68.5 ...
##  $ ManufacturingProcess32: num  156 169 173 171 171 173 159 161 160 164 ...
##  $ ManufacturingProcess33: num  66 66 66 68 70 70 65 65 65 66 ...
##  $ ManufacturingProcess34: num  2.4 2.6 2.6 2.5 2.5 2.5 2.5 2.5 2.5 2.5 ...
##  $ ManufacturingProcess35: num  486 508 509 496 468 490 475 478 491 488 ...
##  $ ManufacturingProcess36: num  0.019 0.019 0.018 0.018 0.017 0.018 0.019 0.019 0.019 0.019 ...
##  $ ManufacturingProcess37: num  0.5 2 0.7 1.2 0.2 0.4 0.8 1 1.2 1.8 ...
##  $ ManufacturingProcess38: num  3 2 2 2 2 2 2 2 3 3 ...
##  $ ManufacturingProcess39: num  7.2 7.2 7.2 7.2 7.3 7.2 7.3 7.3 7.4 7.1 ...
##  $ ManufacturingProcess40: num  NA 0.1 0 0 0 0 0 0 0 0 ...
##  $ ManufacturingProcess41: num  NA 0.15 0 0 0 0 0 0 0 0 ...
##  $ ManufacturingProcess42: num  11.6 11.1 12 10.6 11 11.5 11.7 11.4 11.4 11.3 ...
##  $ ManufacturingProcess43: num  3 0.9 1 1.1 1.1 2.2 0.7 0.8 0.9 0.8 ...
##  $ ManufacturingProcess44: num  1.8 1.9 1.8 1.8 1.7 1.8 2 2 1.9 1.9 ...
##  $ ManufacturingProcess45: num  2.4 2.2 2.3 2.1 2.1 2 2.2 2.2 2.1 2.4 ...
md.pattern(ChemicalManufacturingProcess)

##     Yield BiologicalMaterial01 BiologicalMaterial02 BiologicalMaterial03
## 152     1                    1                    1                    1
## 6       1                    1                    1                    1
## 1       1                    1                    1                    1
## 7       1                    1                    1                    1
## 5       1                    1                    1                    1
## 2       1                    1                    1                    1
## 1       1                    1                    1                    1
## 1       1                    1                    1                    1
## 1       1                    1                    1                    1
##         0                    0                    0                    0
##     BiologicalMaterial04 BiologicalMaterial05 BiologicalMaterial06
## 152                    1                    1                    1
## 6                      1                    1                    1
## 1                      1                    1                    1
## 7                      1                    1                    1
## 5                      1                    1                    1
## 2                      1                    1                    1
## 1                      1                    1                    1
## 1                      1                    1                    1
## 1                      1                    1                    1
##                        0                    0                    0
##     BiologicalMaterial07 BiologicalMaterial08 BiologicalMaterial09
## 152                    1                    1                    1
## 6                      1                    1                    1
## 1                      1                    1                    1
## 7                      1                    1                    1
## 5                      1                    1                    1
## 2                      1                    1                    1
## 1                      1                    1                    1
## 1                      1                    1                    1
## 1                      1                    1                    1
##                        0                    0                    0
##     BiologicalMaterial10 BiologicalMaterial11 BiologicalMaterial12
## 152                    1                    1                    1
## 6                      1                    1                    1
## 1                      1                    1                    1
## 7                      1                    1                    1
## 5                      1                    1                    1
## 2                      1                    1                    1
## 1                      1                    1                    1
## 1                      1                    1                    1
## 1                      1                    1                    1
##                        0                    0                    0
##     ManufacturingProcess09 ManufacturingProcess13 ManufacturingProcess15
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
##                          0                      0                      0
##     ManufacturingProcess16 ManufacturingProcess17 ManufacturingProcess18
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
##                          0                      0                      0
##     ManufacturingProcess19 ManufacturingProcess20 ManufacturingProcess21
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
##                          0                      0                      0
##     ManufacturingProcess32 ManufacturingProcess37 ManufacturingProcess38
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
##                          0                      0                      0
##     ManufacturingProcess39 ManufacturingProcess42 ManufacturingProcess43
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
##                          0                      0                      0
##     ManufacturingProcess44 ManufacturingProcess45 ManufacturingProcess01
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      0
##                          0                      0                      1
##     ManufacturingProcess04 ManufacturingProcess05 ManufacturingProcess07
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        0                      0                      0
##                          1                      1                      1
##     ManufacturingProcess08 ManufacturingProcess12 ManufacturingProcess14
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      0
## 1                        0                      0                      1
##                          1                      1                      1
##     ManufacturingProcess22 ManufacturingProcess23 ManufacturingProcess24
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        0                      0                      0
##                          1                      1                      1
##     ManufacturingProcess40 ManufacturingProcess41 ManufacturingProcess06
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      0
## 1                        1                      1                      1
## 1                        0                      0                      0
##                          1                      1                      2
##     ManufacturingProcess02 ManufacturingProcess25 ManufacturingProcess26
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      0                      0
## 2                        0                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        0                      1                      1
##                          3                      5                      5
##     ManufacturingProcess27 ManufacturingProcess28 ManufacturingProcess29
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        0                      0                      0
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
##                          5                      5                      5
##     ManufacturingProcess30 ManufacturingProcess31 ManufacturingProcess33
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        0                      0                      0
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
##                          5                      5                      5
##     ManufacturingProcess34 ManufacturingProcess35 ManufacturingProcess36
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        0                      0                      0
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
##                          5                      5                      5
##     ManufacturingProcess10 ManufacturingProcess11 ManufacturingProcess03    
## 152                      1                      1                      1   0
## 6                        1                      1                      0   1
## 1                        1                      0                      1   1
## 7                        0                      0                      0   3
## 5                        1                      1                      1  11
## 2                        1                      1                      1   1
## 1                        1                      1                      1   1
## 1                        0                      0                      0   4
## 1                        0                      0                      0  16
##                          9                     10                     15 106
countNA<-colSums(is.na(ChemicalManufacturingProcess))

countNA
##                  Yield   BiologicalMaterial01   BiologicalMaterial02 
##                      0                      0                      0 
##   BiologicalMaterial03   BiologicalMaterial04   BiologicalMaterial05 
##                      0                      0                      0 
##   BiologicalMaterial06   BiologicalMaterial07   BiologicalMaterial08 
##                      0                      0                      0 
##   BiologicalMaterial09   BiologicalMaterial10   BiologicalMaterial11 
##                      0                      0                      0 
##   BiologicalMaterial12 ManufacturingProcess01 ManufacturingProcess02 
##                      0                      1                      3 
## ManufacturingProcess03 ManufacturingProcess04 ManufacturingProcess05 
##                     15                      1                      1 
## ManufacturingProcess06 ManufacturingProcess07 ManufacturingProcess08 
##                      2                      1                      1 
## ManufacturingProcess09 ManufacturingProcess10 ManufacturingProcess11 
##                      0                      9                     10 
## ManufacturingProcess12 ManufacturingProcess13 ManufacturingProcess14 
##                      1                      0                      1 
## ManufacturingProcess15 ManufacturingProcess16 ManufacturingProcess17 
##                      0                      0                      0 
## ManufacturingProcess18 ManufacturingProcess19 ManufacturingProcess20 
##                      0                      0                      0 
## ManufacturingProcess21 ManufacturingProcess22 ManufacturingProcess23 
##                      0                      1                      1 
## ManufacturingProcess24 ManufacturingProcess25 ManufacturingProcess26 
##                      1                      5                      5 
## ManufacturingProcess27 ManufacturingProcess28 ManufacturingProcess29 
##                      5                      5                      5 
## ManufacturingProcess30 ManufacturingProcess31 ManufacturingProcess32 
##                      5                      5                      0 
## ManufacturingProcess33 ManufacturingProcess34 ManufacturingProcess35 
##                      5                      5                      5 
## ManufacturingProcess36 ManufacturingProcess37 ManufacturingProcess38 
##                      5                      0                      0 
## ManufacturingProcess39 ManufacturingProcess40 ManufacturingProcess41 
##                      0                      1                      1 
## ManufacturingProcess42 ManufacturingProcess43 ManufacturingProcess44 
##                      0                      0                      0 
## ManufacturingProcess45 
##                      0
#Look at predictor part of dataset
pred <- ChemicalManufacturingProcess[,-c(1)]

#Imput with KNN

Imp_pred <- preProcess(pred, method="knnImpute") 

#predict function
pred1 <- predict(Imp_pred, pred)

pred2 <- preProcess(pred1, method=c("center", "scale"))
pred3 <- predict(pred2, pred1)

Split .75/.25

#split

set.seed(200)
trainingRows <- createDataPartition(ChemicalManufacturingProcess$Yield, 
                                    p=0.75, list=FALSE) 
train_X2 <- pred3[trainingRows, ]
train_Y2 <- ChemicalManufacturingProcess$Yield[trainingRows]
test_X2 <- pred3[-trainingRows, ]
test_Y2 <- ChemicalManufacturingProcess$Yield[-trainingRows]

(a)

Which tree-based regression model gives the optimal resampling and test set performance?

#RANDOM FOREST

set.seed(200)

rf_Model <- train(x = train_X2, y = train_Y2, method = "rf", tuneLength = 10)
rf_Model
## Random Forest 
## 
## 132 samples
##  57 predictor
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 132, 132, 132, 132, 132, 132, ... 
## Resampling results across tuning parameters:
## 
##   mtry  RMSE      Rsquared   MAE      
##    2    1.348893  0.5293886  1.0696605
##    8    1.274199  0.5575346  0.9932945
##   14    1.251322  0.5630105  0.9659533
##   20    1.240094  0.5666342  0.9528162
##   26    1.235294  0.5657072  0.9453581
##   32    1.230988  0.5652645  0.9410238
##   38    1.232093  0.5626957  0.9411426
##   44    1.229151  0.5613501  0.9394080
##   50    1.235517  0.5549264  0.9452981
##   57    1.235908  0.5540221  0.9456433
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 44.
rf_ModelImp<-varImp(rf_Model, scale=FALSE)
rf_ModelImp
## rf variable importance
## 
##   only 20 most important variables shown (out of 57)
## 
##                        Overall
## ManufacturingProcess32 160.094
## ManufacturingProcess17  30.294
## BiologicalMaterial12    22.691
## ManufacturingProcess13  14.908
## ManufacturingProcess31  14.468
## BiologicalMaterial03    13.588
## ManufacturingProcess06  13.507
## ManufacturingProcess09  12.020
## BiologicalMaterial11    11.958
## BiologicalMaterial06     9.814
## BiologicalMaterial02     6.027
## BiologicalMaterial04     5.919
## ManufacturingProcess15   5.850
## ManufacturingProcess11   5.820
## ManufacturingProcess21   5.464
## BiologicalMaterial05     5.275
## ManufacturingProcess28   5.010
## ManufacturingProcess36   4.577
## ManufacturingProcess24   4.487
## ManufacturingProcess39   4.272
rf_Pred <- predict(rf_Model, newdata = test_X2)
postResample(pred = rf_Pred, obs = test_Y2)
##      RMSE  Rsquared       MAE 
## 1.1304686 0.6491234 0.8625628

DISCUSSION:

Random forest: 44 RMSE=1.229151

test:

RMSE Rsquared MAE 1.1304686 0.6491234 0.8625628

set.seed(200)

gbmGrid <- expand.grid(interaction.depth=seq(1,7,by=2),
                       n.trees=seq(100,1000,by=50),
                       shrinkage=c(0.01,0.1),
                       n.minobsinnode=c(5,10))
gb_Model <- train(x = train_X2, y = train_Y2, method = "gbm", tuneGrid = gbmGrid, verbose=FALSE)
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
gb_Model
## Stochastic Gradient Boosting 
## 
## 132 samples
##  57 predictor
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 132, 132, 132, 132, 132, 132, ... 
## Resampling results across tuning parameters:
## 
##   shrinkage  interaction.depth  n.minobsinnode  n.trees  RMSE      Rsquared 
##   0.01       1                   5               100     1.499632  0.4947512
##   0.01       1                   5               150     1.417461  0.5105176
##   0.01       1                   5               200     1.360321  0.5260203
##   0.01       1                   5               250     1.323412  0.5322827
##   0.01       1                   5               300     1.296580  0.5385795
##   0.01       1                   5               350     1.279495  0.5414004
##   0.01       1                   5               400     1.268340  0.5433324
##   0.01       1                   5               450     1.262272  0.5431086
##   0.01       1                   5               500     1.258412  0.5429886
##   0.01       1                   5               550     1.254102  0.5439175
##   0.01       1                   5               600     1.251736  0.5438272
##   0.01       1                   5               650     1.250824  0.5436833
##   0.01       1                   5               700     1.250649  0.5424084
##   0.01       1                   5               750     1.250381  0.5417903
##   0.01       1                   5               800     1.249261  0.5422278
##   0.01       1                   5               850     1.249046  0.5421169
##   0.01       1                   5               900     1.249040  0.5420316
##   0.01       1                   5               950     1.247811  0.5427731
##   0.01       1                   5              1000     1.247777  0.5429090
##   0.01       1                  10               100     1.501809  0.4864556
##   0.01       1                  10               150     1.416911  0.5043683
##   0.01       1                  10               200     1.360048  0.5164907
##   0.01       1                  10               250     1.324937  0.5227843
##   0.01       1                  10               300     1.301207  0.5278352
##   0.01       1                  10               350     1.286024  0.5301827
##   0.01       1                  10               400     1.276005  0.5325708
##   0.01       1                  10               450     1.268655  0.5344481
##   0.01       1                  10               500     1.263677  0.5362141
##   0.01       1                  10               550     1.260768  0.5365805
##   0.01       1                  10               600     1.258678  0.5372867
##   0.01       1                  10               650     1.256590  0.5382483
##   0.01       1                  10               700     1.255561  0.5384441
##   0.01       1                  10               750     1.254105  0.5387985
##   0.01       1                  10               800     1.253554  0.5391463
##   0.01       1                  10               850     1.252346  0.5398259
##   0.01       1                  10               900     1.252651  0.5397289
##   0.01       1                  10               950     1.252674  0.5398174
##   0.01       1                  10              1000     1.253202  0.5395628
##   0.01       3                   5               100     1.403157  0.5216697
##   0.01       3                   5               150     1.327696  0.5316870
##   0.01       3                   5               200     1.289239  0.5378374
##   0.01       3                   5               250     1.265997  0.5433461
##   0.01       3                   5               300     1.251552  0.5478952
##   0.01       3                   5               350     1.243133  0.5506859
##   0.01       3                   5               400     1.237429  0.5530759
##   0.01       3                   5               450     1.234209  0.5544853
##   0.01       3                   5               500     1.230168  0.5565592
##   0.01       3                   5               550     1.227400  0.5583432
##   0.01       3                   5               600     1.226493  0.5588772
##   0.01       3                   5               650     1.225415  0.5595342
##   0.01       3                   5               700     1.223757  0.5605969
##   0.01       3                   5               750     1.222957  0.5611199
##   0.01       3                   5               800     1.221843  0.5617946
##   0.01       3                   5               850     1.220169  0.5630096
##   0.01       3                   5               900     1.219520  0.5635094
##   0.01       3                   5               950     1.218799  0.5639823
##   0.01       3                   5              1000     1.218722  0.5640007
##   0.01       3                  10               100     1.398422  0.5211494
##   0.01       3                  10               150     1.326115  0.5291885
##   0.01       3                  10               200     1.289345  0.5354014
##   0.01       3                  10               250     1.268787  0.5387175
##   0.01       3                  10               300     1.257548  0.5411297
##   0.01       3                  10               350     1.251224  0.5427314
##   0.01       3                  10               400     1.246711  0.5443658
##   0.01       3                  10               450     1.244690  0.5453073
##   0.01       3                  10               500     1.242739  0.5459452
##   0.01       3                  10               550     1.241280  0.5470684
##   0.01       3                  10               600     1.240714  0.5471145
##   0.01       3                  10               650     1.240585  0.5470726
##   0.01       3                  10               700     1.240211  0.5473304
##   0.01       3                  10               750     1.239491  0.5477385
##   0.01       3                  10               800     1.239723  0.5474466
##   0.01       3                  10               850     1.240295  0.5471081
##   0.01       3                  10               900     1.240570  0.5469857
##   0.01       3                  10               950     1.240199  0.5471966
##   0.01       3                  10              1000     1.239808  0.5476067
##   0.01       5                   5               100     1.377279  0.5325597
##   0.01       5                   5               150     1.303276  0.5439147
##   0.01       5                   5               200     1.268279  0.5489251
##   0.01       5                   5               250     1.249024  0.5527714
##   0.01       5                   5               300     1.238296  0.5555155
##   0.01       5                   5               350     1.230647  0.5582922
##   0.01       5                   5               400     1.225794  0.5606792
##   0.01       5                   5               450     1.221577  0.5630869
##   0.01       5                   5               500     1.218498  0.5647443
##   0.01       5                   5               550     1.216291  0.5660116
##   0.01       5                   5               600     1.214989  0.5669343
##   0.01       5                   5               650     1.212653  0.5683785
##   0.01       5                   5               700     1.211540  0.5690693
##   0.01       5                   5               750     1.210538  0.5697310
##   0.01       5                   5               800     1.209784  0.5703990
##   0.01       5                   5               850     1.208856  0.5710570
##   0.01       5                   5               900     1.207994  0.5716023
##   0.01       5                   5               950     1.207048  0.5721661
##   0.01       5                   5              1000     1.206383  0.5726665
##   0.01       5                  10               100     1.386371  0.5280223
##   0.01       5                  10               150     1.317433  0.5341875
##   0.01       5                  10               200     1.281341  0.5397960
##   0.01       5                  10               250     1.261588  0.5438357
##   0.01       5                  10               300     1.252411  0.5447899
##   0.01       5                  10               350     1.247644  0.5455782
##   0.01       5                  10               400     1.243160  0.5477063
##   0.01       5                  10               450     1.240467  0.5486866
##   0.01       5                  10               500     1.237982  0.5496623
##   0.01       5                  10               550     1.236580  0.5502976
##   0.01       5                  10               600     1.236836  0.5499420
##   0.01       5                  10               650     1.236150  0.5502862
##   0.01       5                  10               700     1.236359  0.5500084
##   0.01       5                  10               750     1.235464  0.5503440
##   0.01       5                  10               800     1.234867  0.5506802
##   0.01       5                  10               850     1.234515  0.5510357
##   0.01       5                  10               900     1.235091  0.5507661
##   0.01       5                  10               950     1.234740  0.5510170
##   0.01       5                  10              1000     1.234985  0.5509279
##   0.01       7                   5               100     1.369084  0.5391824
##   0.01       7                   5               150     1.298645  0.5463421
##   0.01       7                   5               200     1.260908  0.5533550
##   0.01       7                   5               250     1.242951  0.5564316
##   0.01       7                   5               300     1.230060  0.5616252
##   0.01       7                   5               350     1.222710  0.5644305
##   0.01       7                   5               400     1.218812  0.5662773
##   0.01       7                   5               450     1.214538  0.5684453
##   0.01       7                   5               500     1.211571  0.5701823
##   0.01       7                   5               550     1.209251  0.5715768
##   0.01       7                   5               600     1.206425  0.5732941
##   0.01       7                   5               650     1.204975  0.5740483
##   0.01       7                   5               700     1.203340  0.5749015
##   0.01       7                   5               750     1.201958  0.5758507
##   0.01       7                   5               800     1.200951  0.5765829
##   0.01       7                   5               850     1.200257  0.5770871
##   0.01       7                   5               900     1.199154  0.5778294
##   0.01       7                   5               950     1.198399  0.5783426
##   0.01       7                   5              1000     1.197772  0.5786886
##   0.01       7                  10               100     1.390716  0.5224340
##   0.01       7                  10               150     1.319261  0.5322520
##   0.01       7                  10               200     1.284965  0.5358154
##   0.01       7                  10               250     1.265928  0.5389826
##   0.01       7                  10               300     1.254788  0.5420886
##   0.01       7                  10               350     1.248902  0.5435873
##   0.01       7                  10               400     1.244693  0.5449893
##   0.01       7                  10               450     1.244009  0.5442071
##   0.01       7                  10               500     1.242475  0.5451513
##   0.01       7                  10               550     1.242331  0.5449894
##   0.01       7                  10               600     1.242656  0.5445368
##   0.01       7                  10               650     1.240951  0.5456534
##   0.01       7                  10               700     1.240243  0.5461927
##   0.01       7                  10               750     1.240354  0.5459520
##   0.01       7                  10               800     1.240710  0.5458000
##   0.01       7                  10               850     1.240776  0.5457145
##   0.01       7                  10               900     1.241014  0.5454642
##   0.01       7                  10               950     1.241368  0.5452337
##   0.01       7                  10              1000     1.240766  0.5456602
##   0.10       1                   5               100     1.265785  0.5286735
##   0.10       1                   5               150     1.264905  0.5333988
##   0.10       1                   5               200     1.262661  0.5358153
##   0.10       1                   5               250     1.264866  0.5353327
##   0.10       1                   5               300     1.267426  0.5338333
##   0.10       1                   5               350     1.272374  0.5308585
##   0.10       1                   5               400     1.275443  0.5293441
##   0.10       1                   5               450     1.279062  0.5273558
##   0.10       1                   5               500     1.282426  0.5258003
##   0.10       1                   5               550     1.283628  0.5257270
##   0.10       1                   5               600     1.284446  0.5255097
##   0.10       1                   5               650     1.285621  0.5251681
##   0.10       1                   5               700     1.286501  0.5246601
##   0.10       1                   5               750     1.289708  0.5226108
##   0.10       1                   5               800     1.290668  0.5223274
##   0.10       1                   5               850     1.292281  0.5213446
##   0.10       1                   5               900     1.294383  0.5201654
##   0.10       1                   5               950     1.295763  0.5194885
##   0.10       1                   5              1000     1.295946  0.5193919
##   0.10       1                  10               100     1.272727  0.5244975
##   0.10       1                  10               150     1.273977  0.5254529
##   0.10       1                  10               200     1.279504  0.5255056
##   0.10       1                  10               250     1.288944  0.5210394
##   0.10       1                  10               300     1.296661  0.5185810
##   0.10       1                  10               350     1.298479  0.5177224
##   0.10       1                  10               400     1.305641  0.5149803
##   0.10       1                  10               450     1.310729  0.5129133
##   0.10       1                  10               500     1.314172  0.5118746
##   0.10       1                  10               550     1.318305  0.5101500
##   0.10       1                  10               600     1.320167  0.5102354
##   0.10       1                  10               650     1.322979  0.5091117
##   0.10       1                  10               700     1.325478  0.5083940
##   0.10       1                  10               750     1.327746  0.5073821
##   0.10       1                  10               800     1.329752  0.5067096
##   0.10       1                  10               850     1.330285  0.5067085
##   0.10       1                  10               900     1.331095  0.5064901
##   0.10       1                  10               950     1.332304  0.5060501
##   0.10       1                  10              1000     1.333372  0.5060864
##   0.10       3                   5               100     1.272759  0.5265740
##   0.10       3                   5               150     1.271500  0.5289225
##   0.10       3                   5               200     1.269408  0.5310128
##   0.10       3                   5               250     1.268993  0.5313417
##   0.10       3                   5               300     1.269396  0.5314449
##   0.10       3                   5               350     1.268542  0.5320807
##   0.10       3                   5               400     1.267986  0.5325204
##   0.10       3                   5               450     1.267900  0.5325379
##   0.10       3                   5               500     1.267758  0.5326887
##   0.10       3                   5               550     1.267755  0.5326764
##   0.10       3                   5               600     1.267682  0.5327358
##   0.10       3                   5               650     1.267603  0.5327943
##   0.10       3                   5               700     1.267561  0.5328208
##   0.10       3                   5               750     1.267524  0.5328497
##   0.10       3                   5               800     1.267525  0.5328499
##   0.10       3                   5               850     1.267522  0.5328521
##   0.10       3                   5               900     1.267520  0.5328534
##   0.10       3                   5               950     1.267517  0.5328564
##   0.10       3                   5              1000     1.267518  0.5328562
##   0.10       3                  10               100     1.266026  0.5352758
##   0.10       3                  10               150     1.271463  0.5328325
##   0.10       3                  10               200     1.275464  0.5315194
##   0.10       3                  10               250     1.278644  0.5298383
##   0.10       3                  10               300     1.280758  0.5291379
##   0.10       3                  10               350     1.281332  0.5291723
##   0.10       3                  10               400     1.282097  0.5290674
##   0.10       3                  10               450     1.282747  0.5288042
##   0.10       3                  10               500     1.283388  0.5286558
##   0.10       3                  10               550     1.283546  0.5286606
##   0.10       3                  10               600     1.283919  0.5285656
##   0.10       3                  10               650     1.284115  0.5286389
##   0.10       3                  10               700     1.284562  0.5284787
##   0.10       3                  10               750     1.284766  0.5284683
##   0.10       3                  10               800     1.285252  0.5282684
##   0.10       3                  10               850     1.285444  0.5282580
##   0.10       3                  10               900     1.285549  0.5282573
##   0.10       3                  10               950     1.285646  0.5282253
##   0.10       3                  10              1000     1.285779  0.5281892
##   0.10       5                   5               100     1.256488  0.5367741
##   0.10       5                   5               150     1.253949  0.5393069
##   0.10       5                   5               200     1.252471  0.5408591
##   0.10       5                   5               250     1.250927  0.5423133
##   0.10       5                   5               300     1.250158  0.5430085
##   0.10       5                   5               350     1.249968  0.5433103
##   0.10       5                   5               400     1.249638  0.5435811
##   0.10       5                   5               450     1.249477  0.5437474
##   0.10       5                   5               500     1.249388  0.5438610
##   0.10       5                   5               550     1.249371  0.5439014
##   0.10       5                   5               600     1.249323  0.5439347
##   0.10       5                   5               650     1.249308  0.5439522
##   0.10       5                   5               700     1.249318  0.5439548
##   0.10       5                   5               750     1.249310  0.5439692
##   0.10       5                   5               800     1.249301  0.5439790
##   0.10       5                   5               850     1.249303  0.5439792
##   0.10       5                   5               900     1.249295  0.5439861
##   0.10       5                   5               950     1.249293  0.5439885
##   0.10       5                   5              1000     1.249296  0.5439865
##   0.10       5                  10               100     1.262315  0.5266417
##   0.10       5                  10               150     1.264619  0.5272806
##   0.10       5                  10               200     1.264685  0.5289003
##   0.10       5                  10               250     1.267190  0.5278543
##   0.10       5                  10               300     1.267591  0.5279323
##   0.10       5                  10               350     1.268888  0.5272508
##   0.10       5                  10               400     1.269767  0.5269480
##   0.10       5                  10               450     1.270295  0.5267430
##   0.10       5                  10               500     1.271157  0.5265118
##   0.10       5                  10               550     1.271586  0.5263102
##   0.10       5                  10               600     1.272347  0.5259151
##   0.10       5                  10               650     1.273000  0.5256591
##   0.10       5                  10               700     1.273784  0.5252669
##   0.10       5                  10               750     1.274088  0.5251450
##   0.10       5                  10               800     1.274596  0.5249240
##   0.10       5                  10               850     1.274818  0.5248358
##   0.10       5                  10               900     1.275099  0.5247348
##   0.10       5                  10               950     1.275380  0.5246277
##   0.10       5                  10              1000     1.275639  0.5245199
##   0.10       7                   5               100     1.231998  0.5493671
##   0.10       7                   5               150     1.228463  0.5522580
##   0.10       7                   5               200     1.227557  0.5533134
##   0.10       7                   5               250     1.227144  0.5537407
##   0.10       7                   5               300     1.226696  0.5541309
##   0.10       7                   5               350     1.226404  0.5545032
##   0.10       7                   5               400     1.226553  0.5544547
##   0.10       7                   5               450     1.226693  0.5544716
##   0.10       7                   5               500     1.226693  0.5545135
##   0.10       7                   5               550     1.226736  0.5545266
##   0.10       7                   5               600     1.226767  0.5545267
##   0.10       7                   5               650     1.226792  0.5545323
##   0.10       7                   5               700     1.226799  0.5545285
##   0.10       7                   5               750     1.226804  0.5545257
##   0.10       7                   5               800     1.226804  0.5545322
##   0.10       7                   5               850     1.226811  0.5545272
##   0.10       7                   5               900     1.226819  0.5545221
##   0.10       7                   5               950     1.226818  0.5545242
##   0.10       7                   5              1000     1.226821  0.5545232
##   0.10       7                  10               100     1.272979  0.5252150
##   0.10       7                  10               150     1.275732  0.5232129
##   0.10       7                  10               200     1.275471  0.5243331
##   0.10       7                  10               250     1.275679  0.5248403
##   0.10       7                  10               300     1.275894  0.5249240
##   0.10       7                  10               350     1.276357  0.5248143
##   0.10       7                  10               400     1.276732  0.5250323
##   0.10       7                  10               450     1.276978  0.5251458
##   0.10       7                  10               500     1.277021  0.5251952
##   0.10       7                  10               550     1.277488  0.5250215
##   0.10       7                  10               600     1.278145  0.5246933
##   0.10       7                  10               650     1.278404  0.5245881
##   0.10       7                  10               700     1.278435  0.5246454
##   0.10       7                  10               750     1.278813  0.5245265
##   0.10       7                  10               800     1.279001  0.5244094
##   0.10       7                  10               850     1.279166  0.5243588
##   0.10       7                  10               900     1.279443  0.5242565
##   0.10       7                  10               950     1.279416  0.5242819
##   0.10       7                  10              1000     1.279562  0.5242121
##   MAE      
##   1.2051136
##   1.1318774
##   1.0785588
##   1.0409058
##   1.0124928
##   0.9934169
##   0.9823291
##   0.9754767
##   0.9706661
##   0.9655137
##   0.9617848
##   0.9590147
##   0.9577952
##   0.9569008
##   0.9547784
##   0.9536253
##   0.9523793
##   0.9507849
##   0.9498266
##   1.2075958
##   1.1289186
##   1.0729049
##   1.0361063
##   1.0096329
##   0.9928228
##   0.9810674
##   0.9729514
##   0.9668001
##   0.9636250
##   0.9609879
##   0.9578927
##   0.9564796
##   0.9546976
##   0.9537115
##   0.9519070
##   0.9517902
##   0.9518266
##   0.9512637
##   1.1186603
##   1.0432764
##   1.0039773
##   0.9801778
##   0.9649798
##   0.9550787
##   0.9483765
##   0.9432284
##   0.9382808
##   0.9335654
##   0.9321010
##   0.9302355
##   0.9275181
##   0.9263163
##   0.9242567
##   0.9231492
##   0.9223860
##   0.9218004
##   0.9216254
##   1.1139905
##   1.0398546
##   0.9987168
##   0.9765132
##   0.9643419
##   0.9564416
##   0.9519411
##   0.9491393
##   0.9463533
##   0.9448007
##   0.9433792
##   0.9424675
##   0.9419181
##   0.9409738
##   0.9412925
##   0.9417486
##   0.9417425
##   0.9417804
##   0.9417040
##   1.0932106
##   1.0202840
##   0.9823847
##   0.9607460
##   0.9482726
##   0.9377023
##   0.9304586
##   0.9250899
##   0.9209949
##   0.9178884
##   0.9161386
##   0.9139342
##   0.9124960
##   0.9113960
##   0.9105176
##   0.9096916
##   0.9087797
##   0.9080170
##   0.9074920
##   1.0993289
##   1.0303457
##   0.9913724
##   0.9695580
##   0.9578954
##   0.9513281
##   0.9465517
##   0.9436210
##   0.9406683
##   0.9390522
##   0.9388004
##   0.9376664
##   0.9372237
##   0.9367517
##   0.9363602
##   0.9359764
##   0.9359021
##   0.9354277
##   0.9355993
##   1.0882867
##   1.0169080
##   0.9766609
##   0.9549155
##   0.9390700
##   0.9293287
##   0.9237789
##   0.9191153
##   0.9158142
##   0.9133948
##   0.9106731
##   0.9090327
##   0.9074102
##   0.9063222
##   0.9053007
##   0.9046468
##   0.9036279
##   0.9030691
##   0.9026707
##   1.1052381
##   1.0312196
##   0.9925489
##   0.9710257
##   0.9583923
##   0.9511555
##   0.9460285
##   0.9452072
##   0.9430458
##   0.9426622
##   0.9426144
##   0.9416499
##   0.9408055
##   0.9412515
##   0.9416895
##   0.9417553
##   0.9420172
##   0.9419374
##   0.9416010
##   0.9650825
##   0.9605987
##   0.9561264
##   0.9572718
##   0.9579809
##   0.9599097
##   0.9621161
##   0.9652683
##   0.9671470
##   0.9684269
##   0.9706056
##   0.9715003
##   0.9721887
##   0.9743213
##   0.9755779
##   0.9763642
##   0.9778163
##   0.9785409
##   0.9785893
##   0.9637605
##   0.9661677
##   0.9684562
##   0.9788428
##   0.9860489
##   0.9868310
##   0.9922677
##   0.9958767
##   0.9985568
##   1.0030230
##   1.0048913
##   1.0072345
##   1.0089213
##   1.0107214
##   1.0131644
##   1.0140346
##   1.0147737
##   1.0158271
##   1.0173670
##   0.9626265
##   0.9624324
##   0.9602410
##   0.9592744
##   0.9594281
##   0.9589471
##   0.9584877
##   0.9584212
##   0.9582477
##   0.9582920
##   0.9582773
##   0.9582411
##   0.9582107
##   0.9581893
##   0.9581956
##   0.9581994
##   0.9581984
##   0.9581957
##   0.9581997
##   0.9672163
##   0.9748987
##   0.9793514
##   0.9824668
##   0.9843933
##   0.9852521
##   0.9854203
##   0.9860380
##   0.9865861
##   0.9868320
##   0.9872792
##   0.9875621
##   0.9878971
##   0.9880899
##   0.9885050
##   0.9888291
##   0.9890563
##   0.9892266
##   0.9894029
##   0.9566232
##   0.9542732
##   0.9525324
##   0.9512686
##   0.9507258
##   0.9504082
##   0.9501267
##   0.9499844
##   0.9499349
##   0.9499153
##   0.9498459
##   0.9498417
##   0.9498493
##   0.9498470
##   0.9498320
##   0.9498307
##   0.9498257
##   0.9498195
##   0.9498215
##   0.9625243
##   0.9643228
##   0.9650804
##   0.9666755
##   0.9673584
##   0.9682105
##   0.9689919
##   0.9694512
##   0.9699356
##   0.9703704
##   0.9710086
##   0.9714197
##   0.9720841
##   0.9723179
##   0.9727676
##   0.9729902
##   0.9731977
##   0.9734864
##   0.9737665
##   0.9504910
##   0.9440653
##   0.9428057
##   0.9421130
##   0.9417730
##   0.9414214
##   0.9413808
##   0.9414553
##   0.9415004
##   0.9414834
##   0.9414622
##   0.9414868
##   0.9414980
##   0.9414919
##   0.9414894
##   0.9414935
##   0.9415040
##   0.9415050
##   0.9415082
##   0.9705664
##   0.9730958
##   0.9731751
##   0.9722419
##   0.9726411
##   0.9735421
##   0.9740486
##   0.9744827
##   0.9746127
##   0.9751904
##   0.9756646
##   0.9760272
##   0.9761953
##   0.9764737
##   0.9768073
##   0.9769678
##   0.9772846
##   0.9773485
##   0.9775540
## 
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were n.trees = 1000, interaction.depth =
##  7, shrinkage = 0.01 and n.minobsinnode = 5.
varImp(gb_Model)
## gbm variable importance
## 
##   only 20 most important variables shown (out of 57)
## 
##                        Overall
## ManufacturingProcess32 100.000
## ManufacturingProcess17  27.047
## ManufacturingProcess13  16.090
## BiologicalMaterial12    14.913
## ManufacturingProcess06  14.888
## ManufacturingProcess09  13.583
## BiologicalMaterial03    12.066
## ManufacturingProcess31  11.651
## BiologicalMaterial09     8.734
## BiologicalMaterial11     8.325
## ManufacturingProcess24   6.523
## ManufacturingProcess01   6.310
## BiologicalMaterial05     6.292
## ManufacturingProcess04   5.900
## ManufacturingProcess14   5.586
## BiologicalMaterial08     5.571
## BiologicalMaterial04     5.468
## BiologicalMaterial06     5.467
## ManufacturingProcess05   5.274
## ManufacturingProcess11   4.948
gb_Pred <- predict(gb_Model, newdata = test_X2)
postResample(pred = gb_Pred, obs = test_Y2)
##      RMSE  Rsquared       MAE 
## 1.1202764 0.6565479 0.8919856

DISCUSSION:

BOOST

RMSE was used to select the optimal model using the smallest value. The final values used for the model were n.trees = 1000, interaction.depth = 7, shrinkage = 0.01 and n.minobsinnode = 5. RMSE = 1.197772

test RMSE Rsquared MAE 1.1202764 0.6565479 0.8919856

DISCUSSION:

BOOST

RMSE=1.206580

test: RMSE Rsquared MAE 1.1202764 0.6565479 0.8919856

#CUBIST
set.seed(200)

cubist_Model <- train(x = train_X2, y = train_Y2, method = "cubist")
## Warning in cubist.default(x, y, committees = param$committees, ...): NAs
## introduced by coercion

## Warning in cubist.default(x, y, committees = param$committees, ...): NAs
## introduced by coercion

## Warning in cubist.default(x, y, committees = param$committees, ...): NAs
## introduced by coercion

## Warning in cubist.default(x, y, committees = param$committees, ...): NAs
## introduced by coercion

## Warning in cubist.default(x, y, committees = param$committees, ...): NAs
## introduced by coercion

## Warning in cubist.default(x, y, committees = param$committees, ...): NAs
## introduced by coercion

## Warning in cubist.default(x, y, committees = param$committees, ...): NAs
## introduced by coercion

## Warning in cubist.default(x, y, committees = param$committees, ...): NAs
## introduced by coercion

## Warning in cubist.default(x, y, committees = param$committees, ...): NAs
## introduced by coercion

## Warning in cubist.default(x, y, committees = param$committees, ...): NAs
## introduced by coercion

## Warning in cubist.default(x, y, committees = param$committees, ...): NAs
## introduced by coercion

## Warning in cubist.default(x, y, committees = param$committees, ...): NAs
## introduced by coercion

## Warning in cubist.default(x, y, committees = param$committees, ...): NAs
## introduced by coercion

## Warning in cubist.default(x, y, committees = param$committees, ...): NAs
## introduced by coercion

## Warning in cubist.default(x, y, committees = param$committees, ...): NAs
## introduced by coercion

## Warning in cubist.default(x, y, committees = param$committees, ...): NAs
## introduced by coercion

## Warning in cubist.default(x, y, committees = param$committees, ...): NAs
## introduced by coercion

## Warning in cubist.default(x, y, committees = param$committees, ...): NAs
## introduced by coercion

## Warning in cubist.default(x, y, committees = param$committees, ...): NAs
## introduced by coercion

## Warning in cubist.default(x, y, committees = param$committees, ...): NAs
## introduced by coercion

## Warning in cubist.default(x, y, committees = param$committees, ...): NAs
## introduced by coercion
cubist_Model
## Cubist 
## 
## 132 samples
##  57 predictor
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 132, 132, 132, 132, 132, 132, ... 
## Resampling results across tuning parameters:
## 
##   committees  neighbors  RMSE      Rsquared   MAE      
##    1          0          1.914375  0.3209284  1.3469601
##    1          5          1.878481  0.3374995  1.3129553
##    1          9          1.885257  0.3330481  1.3186288
##   10          0          1.294170  0.5354825  0.9938812
##   10          5          1.266547  0.5538839  0.9683847
##   10          9          1.269738  0.5507295  0.9721896
##   20          0          1.232534  0.5685524  0.9510921
##   20          5          1.206580  0.5842537  0.9265917
##   20          9          1.210972  0.5808630  0.9305998
## 
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were committees = 20 and neighbors = 5.
varImp(cubist_Model)
## cubist variable importance
## 
##   only 20 most important variables shown (out of 57)
## 
##                        Overall
## ManufacturingProcess32  100.00
## ManufacturingProcess17   64.84
## ManufacturingProcess28   56.04
## ManufacturingProcess39   40.66
## ManufacturingProcess33   37.36
## BiologicalMaterial06     28.57
## ManufacturingProcess09   26.37
## ManufacturingProcess04   23.08
## BiologicalMaterial02     19.78
## ManufacturingProcess01   15.38
## BiologicalMaterial03     15.38
## ManufacturingProcess24   15.38
## BiologicalMaterial11     14.29
## ManufacturingProcess27   13.19
## BiologicalMaterial12     13.19
## ManufacturingProcess06   12.09
## ManufacturingProcess13   10.99
## ManufacturingProcess29   10.99
## ManufacturingProcess11   10.99
## ManufacturingProcess31   10.99
cubist_Pred <- predict(cubist_Model, newdata = test_X2)
postResample(pred = cubist_Pred, obs = test_Y2)
##      RMSE  Rsquared       MAE 
## 0.8929243 0.7793957 0.6829956

DISCUSSION:

CUBIST: RMSE=1.206861 test: RMSE Rsquared MAE 0.8957785 0.7772698 0.6900375

##DISCUSSION:

The tree based model (random forest, gradient boost, cubist) with the optimal resampling and test set performance is:

CUBIST RMSE=1.206 test RMSE=.8957

(b)

Which predictors are most important in the optimal tree-based regression model? Do either the biological or process variables dominate the list? How do the top 10 important predictors compare to the top 10 predictors from the optimal linear and non linear models?

#CUBIST VARIABLE IMPORTANCE
varImp(cubist_Model)
## cubist variable importance
## 
##   only 20 most important variables shown (out of 57)
## 
##                        Overall
## ManufacturingProcess32  100.00
## ManufacturingProcess17   64.84
## ManufacturingProcess28   56.04
## ManufacturingProcess39   40.66
## ManufacturingProcess33   37.36
## BiologicalMaterial06     28.57
## ManufacturingProcess09   26.37
## ManufacturingProcess04   23.08
## BiologicalMaterial02     19.78
## ManufacturingProcess24   15.38
## BiologicalMaterial03     15.38
## ManufacturingProcess01   15.38
## BiologicalMaterial11     14.29
## ManufacturingProcess27   13.19
## BiologicalMaterial12     13.19
## ManufacturingProcess06   12.09
## ManufacturingProcess29   10.99
## ManufacturingProcess11   10.99
## ManufacturingProcess13   10.99
## ManufacturingProcess31   10.99

DISCUSSION:

Manufacturing predictors dominate the list with 8 of the 10 spots.

Recall SVM non-Linear Var IMP: ## Overall ## ManufacturingProcess32 100.00 ## ManufacturingProcess17 80.31 ## BiologicalMaterial06 75.09 ## ManufacturingProcess13 74.47 ## BiologicalMaterial03 67.11 ## ManufacturingProcess06 66.44 ## ManufacturingProcess36 66.30 ## BiologicalMaterial12 65.94 ## ManufacturingProcess09 65.29 ## BiologicalMaterial02 55.94 ## ManufacturingProcess31 53.57 ## ManufacturingProcess29 47.17 ## ManufacturingProcess33 45.79 ## BiologicalMaterial08 44.98 ## ManufacturingProcess11 41.72 ## ManufacturingProcess02 41.58 ## BiologicalMaterial11 41.18 ## BiologicalMaterial04 40.31 ## BiologicalMaterial09 35.12 ## BiologicalMaterial01 34.15

AND the linear Elastic net….

The nonlinear SVM (6 of 10 Manufacturing) and linear Elastic net: 6 of 10 Manufacturingpredictors are manufacturing.

AND the linear Elastic net…. The top 20 from the linear model elastnet were: Overall Elastic net var importance ManufacturingProcess13 100.00 ManufacturingProcess32 93.67 BiologicalMaterial03 92.86 BiologicalMaterial06 86.68 ManufacturingProcess17 80.34 BiologicalMaterial12 76.76 ManufacturingProcess09 76.15 ManufacturingProcess36 75.95 ManufacturingProcess06 63.29 BiologicalMaterial02 59.13 ManufacturingProcess11 54.38 BiologicalMaterial11 53.65 ManufacturingProcess31 51.81 BiologicalMaterial04 48.83 BiologicalMaterial09 47.43 ManufacturingProcess18 43.88 ManufacturingProcess30 42.31 BiologicalMaterial0

(c)

Plot the optimal single tree with the distribution of yield in the terminal nodes. Does this view of the data provide additional knowledge about the biological or process predictors and their relationship with yield?

set.seed(200)
st_Model <- train(x = train_X2, y = train_Y2, method = "rpart",
                  tuneLength = 10, control=rpart.control(maxdepth=2))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
st_Model
## CART 
## 
## 132 samples
##  57 predictor
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 132, 132, 132, 132, 132, 132, ... 
## Resampling results across tuning parameters:
## 
##   cp          RMSE      Rsquared   MAE     
##   0.01329166  1.524074  0.3550603  1.213918
##   0.01915705  1.524074  0.3550603  1.213918
##   0.02096592  1.524074  0.3550603  1.213918
##   0.02929826  1.524074  0.3550603  1.213918
##   0.03394084  1.524074  0.3550603  1.213918
##   0.04299920  1.522016  0.3566972  1.213177
##   0.04755934  1.522458  0.3563845  1.211768
##   0.06066735  1.529012  0.3534197  1.215071
##   0.09672549  1.541091  0.3375758  1.221246
##   0.39433963  1.698655  0.2806679  1.366262
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was cp = 0.0429992.
rpart.plot::rpart.plot(st_Model$finalModel)

##DISCUSSION:

Manufacturing32 is at the top of all models. Manufacturing 17 is at number 2 or 3 in other models.

Certainly, the analysis gives you a feel that certain manufacturing variables are good predictors for yield as opposed to their biological counterparts.