8.1, 8.2, 8.3, and 8.7
8.1
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.3
## -- Attaching packages -------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1 v purrr 0.3.4
## v tibble 3.0.1 v dplyr 0.8.5
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## Warning: package 'tibble' was built under R version 3.6.3
## Warning: package 'purrr' was built under R version 3.6.3
## Warning: package 'dplyr' was built under R version 3.6.3
## -- Conflicts ----------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(mlbench)
## Warning: package 'mlbench' was built under R version 3.6.3
set.seed(200)
simulated <- mlbench.friedman1(200, sd = 1)
simulated <- cbind(simulated$x, simulated$y)
simulated <- as.data.frame(simulated)
colnames(simulated)[ncol(simulated)] <- "y"
a
library(randomForest)
## Warning: package 'randomForest' was built under R version 3.6.2
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:ggplot2':
##
## margin
library(caret)
## Warning: package 'caret' was built under R version 3.6.3
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
model1 <- randomForest(y ~ ., data = simulated,
importance = TRUE,
ntree = 1000)
rfImp1 <- varImp(model1, scale = FALSE)
rfImp1
## Overall
## V1 8.732235404
## V2 6.415369387
## V3 0.763591825
## V4 7.615118809
## V5 2.023524577
## V6 0.165111172
## V7 -0.005961659
## V8 -0.166362581
## V9 -0.095292651
## V10 -0.074944788
No. The predictors V6-V10 are not used significantly.
b
simulated$duplicate1 <- simulated$V1 + rnorm(200) * .1
cor(simulated$duplicate1, simulated$V1)
## [1] 0.9460206
model2 <- randomForest(y ~ ., data = simulated,
importance = TRUE,
ntree = 1000)
rfImp2 <- varImp(model2, scale = FALSE)
rfImp2
## Overall
## V1 5.69119973
## V2 6.06896061
## V3 0.62970218
## V4 7.04752238
## V5 1.87238438
## V6 0.13569065
## V7 -0.01345645
## V8 -0.04370565
## V9 0.00840438
## V10 0.02894814
## duplicate1 4.28331581
The importance score for V1 decreased.
simulated$duplicate2 <- simulated$V1 + rnorm(200) * .1
cor(simulated$duplicate2, simulated$V1)
## [1] 0.9408631
model3 <- randomForest(y ~ ., data = simulated,
importance = TRUE,
ntree = 1000)
rfImp3 <- varImp(model3, scale = FALSE)
rfImp3
## Overall
## V1 4.91687329
## V2 6.52816504
## V3 0.58711552
## V4 7.04870917
## V5 2.03115561
## V6 0.14213148
## V7 0.10991985
## V8 -0.08405687
## V9 -0.01075028
## V10 0.09230576
## duplicate1 3.80068234
## duplicate2 1.87721959
V1 decreases in importance even more when a third correlated variable is added.
c
library(party)
## Warning: package 'party' was built under R version 3.6.3
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Warning: package 'modeltools' was built under R version 3.6.3
## Loading required package: stats4
## Loading required package: strucchange
## Warning: package 'strucchange' was built under R version 3.6.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.6.2
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Warning: package 'sandwich' was built under R version 3.6.3
##
## Attaching package: 'strucchange'
## The following object is masked from 'package:stringr':
##
## boundary
simulated0 <- simulated %>% select(-duplicate1, -duplicate2)
simulated1 <- simulated %>% select(-duplicate1)
rmodel <- cforest(y~., data = simulated0)
rimp <- varimp(rmodel)
rimp
## V1 V2 V3 V4 V5 V6
## 9.07731187 6.72230284 0.01710830 8.21479567 1.82629310 -0.02010569
## V7 V8 V9 V10
## -0.03954410 -0.04248908 -0.03733885 -0.03582182
rmodel1 <- cforest(y~., data = simulated1)
rimp1 <- varimp(rmodel1)
rimp1
## V1 V2 V3 V4 V5 V6
## 6.98131946 6.46276618 0.06434515 8.02122913 1.90020973 -0.01740631
## V7 V8 V9 V10 duplicate2
## 0.05513301 -0.03353919 -0.02035847 -0.03949474 2.45187870
rmodel2 <- cforest(y~., data = simulated)
rimp2 <- varimp(rmodel2)
rimp2
## V1 V2 V3 V4 V5
## 4.3086158967 5.8623288533 0.0033964540 7.1775874295 1.6497323006
## V6 V7 V8 V9 V10
## 0.0068533939 -0.0002529274 -0.0082908353 -0.0172499235 -0.0383320626
## duplicate1 duplicate2
## 4.2677299093 1.0548503400
Yes, these patterns are the same as the traditional random forest model.
d
library(gbm)
## Warning: package 'gbm' was built under R version 3.6.3
## Loaded gbm 2.1.8
set.seed(100)
gbmGrid <- expand.grid(interaction.depth = seq(1, 7, by = 2), n.trees = seq(100, 1000, by = 50), shrinkage = c(0.01, 0.1), n.minobsinnode = 10)
gbmTune <- train(y ~ ., data = simulated0, method = "gbm", tuneGrid = gbmGrid, verbose = FALSE)
gbmTune1 <- train(y ~ ., data = simulated1, method = "gbm", tuneGrid = gbmGrid, verbose = FALSE)
gbmTune2 <- train(y ~ ., data = simulated, method = "gbm", tuneGrid = gbmGrid, verbose = FALSE)
gimp1 <- varImp(gbmTune)
gimp2 <- varImp(gbmTune1)
gimp3 <- varImp(gbmTune2)
gimp1
## gbm variable importance
##
## Overall
## V1 100.0000
## V2 92.8016
## V4 92.1320
## V5 36.7325
## V3 24.6338
## V6 4.3524
## V7 3.2761
## V10 0.5930
## V9 0.4913
## V8 0.0000
gimp2
## gbm variable importance
##
## Overall
## V4 100.0000
## V1 82.6359
## V2 72.8261
## V5 32.6023
## V3 30.8509
## V7 3.4855
## duplicate2 1.9287
## V8 1.8199
## V6 1.2499
## V9 0.1639
## V10 0.0000
gimp3
## gbm variable importance
##
## Overall
## V4 100.00000
## V2 75.08507
## V1 60.90916
## V3 37.56514
## V5 37.45434
## duplicate2 20.01854
## duplicate1 18.17953
## V6 2.24111
## V7 1.65179
## V10 1.38019
## V9 0.01674
## V8 0.00000
The pattern is the same for the boosted model.
library(Cubist)
## Warning: package 'Cubist' was built under R version 3.6.3
cubistMod <- train(y ~ ., data = simulated0, method = "cubist")
cubistMod1 <- train(y ~ ., data = simulated1, method = "cubist")
cubistMod2 <- train(y ~ ., data = simulated, method = "cubist")
cimp1 <- varImp(cubistMod)
cimp2 <- varImp(cubistMod1)
cimp3 <- varImp(cubistMod2)
cimp1
## cubist variable importance
##
## Overall
## V1 100.00
## V2 75.69
## V4 68.06
## V3 58.33
## V5 55.56
## V6 15.28
## V8 0.00
## V9 0.00
## V10 0.00
## V7 0.00
cimp2
## cubist variable importance
##
## Overall
## V2 100.0000
## V1 90.0709
## V4 76.5957
## V3 58.1560
## V5 41.8440
## duplicate2 24.8227
## V8 14.8936
## V6 12.0567
## V7 1.4184
## V10 0.7092
## V9 0.0000
cimp3
## cubist variable importance
##
## Overall
## V2 100.000
## V1 77.698
## V4 71.942
## V5 54.676
## V3 46.043
## duplicate2 35.971
## duplicate1 35.971
## V6 14.388
## V8 4.317
## V9 0.000
## V10 0.000
## V7 0.000
The pattern is not the same with the cubist model. V1 has the same importance in all 3 but the duplicates have less and less overall importance.
8.2
set.seed(200)
x1 <- sample(0:10000 / 10000, 200, replace = T)
x2 <- sample(0:1000 / 1000, 200, replace = T)
x3 <- sample(0:100 / 100, 200, replace = T)
x4 <- sample(0:10 / 10, 200, replace = T)
y <- sample(0:10000 / 10000, 200, replace = T)
df <- data.frame(x1, x2, x3, x4, y)
smodel <- randomForest(y~., df)
varImp(smodel)
## Overall
## x1 4.360754
## x2 4.093097
## x3 3.983100
## x4 2.425923
The variable that is the most granular, x1, is given the highest importance.
8.3
The higher learning rate means that lower trees are based more heavily on higher trees so you’ll have fewer variables with higher importance. The higher bagging fraction means you’re using more of the data on each sample so there is less variety in the samples.
The lower learning rate and lower bagging fraction will be more generalizable since it has more variety and is less likely to overfit compared to the higher learning rate and higher bagging fraction.
As trees are allowed to grow more deep, more predictors are considered for tree splitting chioce. This will spread the variable importance to more variables rather than selecting very few predictors with very high importance.
8.7
library(tidymodels)
## Warning: package 'tidymodels' was built under R version 3.6.3
## -- Attaching packages ------------------------------------------------------------------------- tidymodels 0.1.0 --
## v broom 0.5.6 v rsample 0.0.6
## v dials 0.0.6 v tune 0.1.0
## v infer 0.5.1 v workflows 0.1.1
## v parsnip 0.1.4 v yardstick 0.0.6
## v recipes 0.1.12
## Warning: package 'broom' was built under R version 3.6.3
## Warning: package 'dials' was built under R version 3.6.3
## Warning: package 'infer' was built under R version 3.6.3
## Warning: package 'parsnip' was built under R version 3.6.3
## Warning: package 'recipes' was built under R version 3.6.3
## Warning: package 'rsample' was built under R version 3.6.3
## Warning: package 'tune' was built under R version 3.6.3
## Warning: package 'workflows' was built under R version 3.6.3
## Warning: package 'yardstick' was built under R version 3.6.3
## -- Conflicts ---------------------------------------------------------------------------- tidymodels_conflicts() --
## x randomForest::combine() masks dplyr::combine()
## x scales::discard() masks purrr::discard()
## x dplyr::filter() masks stats::filter()
## x parsnip::fit() masks party::fit(), modeltools::fit()
## x recipes::fixed() masks stringr::fixed()
## x dplyr::lag() masks stats::lag()
## x caret::lift() masks purrr::lift()
## x dials::margin() masks randomForest::margin(), ggplot2::margin()
## x tune::parameters() masks dials::parameters(), modeltools::parameters()
## x yardstick::precision() masks caret::precision()
## x yardstick::recall() masks caret::recall()
## x yardstick::sensitivity() masks caret::sensitivity()
## x yardstick::spec() masks readr::spec()
## x yardstick::specificity() masks caret::specificity()
## x recipes::step() masks stats::step()
library(rpart)
##
## Attaching package: 'rpart'
## The following object is masked from 'package:dials':
##
## prune
library(AppliedPredictiveModeling)
## Warning: package 'AppliedPredictiveModeling' was built under R version
## 3.6.3
data("ChemicalManufacturingProcess")
set.seed(100)
cmp_data <- as.data.frame(ChemicalManufacturingProcess)
cmp <- recipe(Yield~., data=cmp_data) %>%
step_bagimpute(all_predictors()) %>%
prep() %>%
juice()
splits <- cmp %>% initial_split(prop = 0.8)
cmptest <- splits %>% testing()
cmptrain <- splits %>% training()
cmpmodel <- randomForest(Yield~., cmptrain)
cmpmodel1 <- rpart(Yield~., cmptrain)
cmpmodel2 <- train(Yield ~ ., data = cmptrain, method = "gbm", tuneGrid = gbmGrid, verbose = FALSE)
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
## Warning in (function (x, y, offset = NULL, misc = NULL, distribution =
## "bernoulli", : variable 7: BiologicalMaterial07 has no variation.
cmpmodel3 <- train(Yield~., data = cmptrain, method = "cubist")
cmppred <- predict(cmpmodel, cmptest)
postResample(pred = cmppred, obs = cmptest$Yield)
## RMSE Rsquared MAE
## 1.1922903 0.4522276 0.8873345
cmppred1 <- predict(cmpmodel1, cmptest)
postResample(pred = cmppred1, obs = cmptest$Yield)
## RMSE Rsquared MAE
## 1.4802863 0.3535723 1.1508027
cmppred2 <- predict(cmpmodel2, cmptest)
postResample(pred = cmppred2, obs = cmptest$Yield)
## RMSE Rsquared MAE
## 1.2198424 0.4701204 0.8972471
cmppred3 <- predict(cmpmodel3, cmptest)
postResample(pred = cmppred3, obs = cmptest$Yield)
## RMSE Rsquared MAE
## 1.6205427 0.2481667 1.1798649
The Cubist model has the highest R^2 and lowest RMSE
varImp(cmpmodel3)
## cubist variable importance
##
## only 20 most important variables shown (out of 57)
##
## Overall
## ManufacturingProcess13 100.00
## ManufacturingProcess32 83.52
## ManufacturingProcess17 59.34
## BiologicalMaterial06 53.85
## ManufacturingProcess29 39.56
## ManufacturingProcess09 37.36
## ManufacturingProcess04 35.16
## ManufacturingProcess33 31.87
## BiologicalMaterial03 30.77
## BiologicalMaterial11 30.77
## BiologicalMaterial05 24.18
## ManufacturingProcess37 21.98
## ManufacturingProcess27 19.78
## ManufacturingProcess14 18.68
## ManufacturingProcess10 16.48
## ManufacturingProcess28 16.48
## BiologicalMaterial02 16.48
## BiologicalMaterial08 15.38
## ManufacturingProcess26 14.29
## ManufacturingProcess15 13.19
The top spots are much more varied than the linear and non linear model which gave more importance to the manufacturing processes over the biological material.
summary(cmpmodel3)
##
## Call:
## cubist.default(x = x, y = y, committees = param$committees)
##
##
## Cubist [Release 2.07 GPL Edition] Mon Nov 30 01:27:38 2020
## ---------------------------------
##
## Target attribute `outcome'
##
## Read 141 cases (58 attributes) from undefined.data
##
## Model 1:
##
## Rule 1/1: [141 cases, mean 40.287, range 35.25 to 46.34, est err 0.898]
##
## outcome = -54.273 + 0.265 ManufacturingProcess32
## - 0.85 ManufacturingProcess13 + 0.228 BiologicalMaterial03
## + 0.0152 ManufacturingProcess14 + 0.95 ManufacturingProcess10
## - 0.174 BiologicalMaterial06 - 0.259 ManufacturingProcess33
## - 0.068 ManufacturingProcess28 + 0.46 ManufacturingProcess29
##
## Model 2:
##
## Rule 2/1: [64 cases, mean 39.687, range 35.25 to 43.88, est err 1.026]
##
## if
## ManufacturingProcess13 > 34.7
## then
## outcome = 162.923 + 0.346 ManufacturingProcess32
## - 0.0365 ManufacturingProcess25
##
## Rule 2/2: [77 cases, mean 40.786, range 37.39 to 46.34, est err 0.999]
##
## if
## ManufacturingProcess13 <= 34.7
## then
## outcome = 22.477 - 2.04 ManufacturingProcess13
## + 0.0218 ManufacturingProcess15 - 0.258 ManufacturingProcess33
## - 0.0099 ManufacturingProcess14 + 0.093 ManufacturingProcess32
## + 0.115 BiologicalMaterial06 - 0.074 ManufacturingProcess24
##
## Model 3:
##
## Rule 3/1: [18 cases, mean 38.746, range 36.77 to 41.42, est err 1.379]
##
## if
## ManufacturingProcess09 <= 44.92
## ManufacturingProcess17 <= 34.9
## then
## outcome = -9.06 + 0.266 BiologicalMaterial06
## + 0.14 ManufacturingProcess24 + 0.131 BiologicalMaterial03
## - 0.121 BiologicalMaterial02 + 0.51 ManufacturingProcess29
## + 0.06 ManufacturingProcess32 - 0.088 ManufacturingProcess33
## + 0.0033 ManufacturingProcess14 + 0.22 ManufacturingProcess10
## - 0.1 ManufacturingProcess13 - 0.0016 ManufacturingProcess20
## - 0.017 ManufacturingProcess28 + 0.012 ManufacturingProcess04
## - 0.05 ManufacturingProcess17 + 0.03 BiologicalMaterial04
##
## Rule 3/2: [37 cases, mean 39.893, range 35.25 to 43.44, est err 0.894]
##
## if
## BiologicalMaterial06 <= 55.95
## ManufacturingProcess17 > 34.9
## then
## outcome = -37.171 + 0.331 BiologicalMaterial06
## + 0.303 BiologicalMaterial03 - 0.266 BiologicalMaterial02
## + 0.162 ManufacturingProcess32 + 1.17 ManufacturingProcess29
## - 0.52 ManufacturingProcess17 - 0.119 BiologicalMaterial11
## - 0.241 ManufacturingProcess33 + 0.0091 ManufacturingProcess14
## + 0.6 ManufacturingProcess10 - 0.27 ManufacturingProcess13
## - 0.0044 ManufacturingProcess20 + 0.56 BiologicalMaterial09
## - 0.045 ManufacturingProcess28 + 0.033 ManufacturingProcess04
##
## Rule 3/3: [134 cases, mean 40.237, range 35.25 to 44.35, est err 0.866]
##
## if
## BiologicalMaterial06 <= 55.95
## then
## outcome = -118.025 + 0.512 BiologicalMaterial03
## - 0.455 BiologicalMaterial02 + 0.293 ManufacturingProcess32
## + 2.14 ManufacturingProcess29 - 0.418 ManufacturingProcess33
## + 0.0159 ManufacturingProcess14 + 0.94 ManufacturingProcess10
## - 0.59 ManufacturingProcess13 - 0.0079 ManufacturingProcess20
## - 0.08 ManufacturingProcess28 + 0.063 ManufacturingProcess04
## - 0.05 ManufacturingProcess17 + 0.03 BiologicalMaterial04
##
## Rule 3/4: [7 cases, mean 41.256, range 38.35 to 46.34, est err 2.495]
##
## if
## BiologicalMaterial06 > 55.95
## then
## outcome = 236 - 3.491 BiologicalMaterial06 + 2.75 ManufacturingProcess38
##
## Model 4:
##
## Rule 4/1: [64 cases, mean 39.687, range 35.25 to 43.88, est err 1.048]
##
## if
## ManufacturingProcess13 > 34.7
## then
## outcome = 169.111 + 0.329 ManufacturingProcess32
## - 0.0372 ManufacturingProcess25
##
## Rule 4/2: [77 cases, mean 40.786, range 37.39 to 46.34, est err 0.969]
##
## if
## ManufacturingProcess13 <= 34.7
## then
## outcome = 40.476 - 1.88 ManufacturingProcess13
## + 0.0094 ManufacturingProcess15 - 0.078 ManufacturingProcess24
## + 0.072 ManufacturingProcess32 - 0.0008 ManufacturingProcess25
##
## Model 5:
##
## Rule 5/1: [141 cases, mean 40.287, range 35.25 to 46.34, est err 1.061]
##
## outcome = -121.328 + 0.376 BiologicalMaterial03
## + 0.71 ManufacturingProcess09 - 0.254 BiologicalMaterial02
## + 1.25 BiologicalMaterial08 - 0.159 BiologicalMaterial11
## + 0.0162 ManufacturingProcess26 + 0.358 BiologicalMaterial05
## + 0.131 ManufacturingProcess32 - 0.134 ManufacturingProcess28
## + 0.3 ManufacturingProcess39 - 0.95 ManufacturingProcess37
## - 0.18 ManufacturingProcess17
##
## Model 6:
##
## Rule 6/1: [57 cases, mean 39.646, range 35.25 to 43.88, est err 1.022]
##
## if
## ManufacturingProcess13 > 34.8
## then
## outcome = 161.733 - 1.64 ManufacturingProcess31
## - 3.11 ManufacturingProcess29 + 0.36 ManufacturingProcess32
##
## Rule 6/2: [61 cases, mean 40.103, range 37.39 to 43.12, est err 1.099]
##
## if
## BiologicalMaterial11 <= 151
## ManufacturingProcess13 <= 34.8
## then
## outcome = 92.341 - 1.93 ManufacturingProcess13
## + 0.492 ManufacturingProcess01 + 0.034 BiologicalMaterial11
## + 0.024 ManufacturingProcess32 - 0.026 BiologicalMaterial03
##
## Rule 6/3: [84 cases, mean 40.722, range 37.39 to 46.34, est err 1.082]
##
## if
## ManufacturingProcess13 <= 34.8
## then
## outcome = 78.409 - 2.03 ManufacturingProcess13
## + 0.153 BiologicalMaterial11 + 0.107 ManufacturingProcess32
## - 0.117 BiologicalMaterial03
##
## Model 7:
##
## Rule 7/1: [14 cases, mean 39.048, range 37.39 to 42.31, est err 1.230]
##
## if
## BiologicalMaterial05 <= 16.3
## then
## outcome = 41.064 - 1.26 ManufacturingProcess13
## + 0.268 BiologicalMaterial06 + 17.9 ManufacturingProcess03
##
## Rule 7/2: [26 cases, mean 39.798, range 36.77 to 44.35, est err 1.664]
##
## if
## BiologicalMaterial05 > 16.3
## BiologicalMaterial06 <= 55.95
## BiologicalMaterial10 <= 2.63
## ManufacturingProcess17 <= 34.9
## ManufacturingProcess27 > 4580.784
## then
## outcome = -78.76 - 7.5 BiologicalMaterial10
## - 2.25 ManufacturingProcess17 + 0.0515 ManufacturingProcess27
## - 0.864 BiologicalMaterial05 + 0.336 BiologicalMaterial06
## - 0.33 ManufacturingProcess11 + 0.33 ManufacturingProcess29
## - 0.0052 ManufacturingProcess25
## - 0.0027 ManufacturingProcess20 - 0.19 BiologicalMaterial08
## + 0.07 ManufacturingProcess09 + 0.02 ManufacturingProcess32
## - 0.035 ManufacturingProcess33 + 0.01 ManufacturingProcess04
## - 0.07 ManufacturingProcess37
##
## Rule 7/3: [37 cases, mean 39.893, range 35.25 to 43.44, est err 1.207]
##
## if
## BiologicalMaterial06 <= 55.95
## ManufacturingProcess17 > 34.9
## then
## outcome = -124.071 + 1.95 ManufacturingProcess31
## + 3.92 ManufacturingProcess29 - 1.92 ManufacturingProcess17
## + 0.296 BiologicalMaterial06
##
## Rule 7/4: [76 cases, mean 40.059, range 35.25 to 44.35, est err 1.308]
##
## if
## BiologicalMaterial05 > 16.3
## BiologicalMaterial06 <= 55.95
## ManufacturingProcess27 > 4580.784
## then
## outcome = 16.883 + 0.304 BiologicalMaterial06
## - 1.71 ManufacturingProcess37 + 0.0147 ManufacturingProcess27
## - 0.62 ManufacturingProcess11 + 0.25 ManufacturingProcess09
## + 0.6 ManufacturingProcess29 - 0.01 ManufacturingProcess25
## - 0.0054 ManufacturingProcess20 - 0.22 ManufacturingProcess17
## - 0.38 BiologicalMaterial08 - 0.127 BiologicalMaterial05
## + 0.36 BiologicalMaterial10 + 0.03 ManufacturingProcess32
## - 0.052 ManufacturingProcess33 + 0.014 ManufacturingProcess04
##
## Rule 7/5: [83 cases, mean 40.590, range 36.77 to 44.35, est err 1.228]
##
## if
## BiologicalMaterial05 > 16.3
## BiologicalMaterial06 <= 55.95
## ManufacturingProcess17 <= 34.9
## then
## outcome = 75.978 - 2.34 ManufacturingProcess11
## + 0.0344 ManufacturingProcess27
## - 0.0409 ManufacturingProcess25 + 2.41 ManufacturingProcess29
## + 0.336 BiologicalMaterial06 - 0.0221 ManufacturingProcess20
## - 1.69 BiologicalMaterial08 + 1.44 BiologicalMaterial10
## + 0.39 ManufacturingProcess09 - 0.309 BiologicalMaterial05
## + 0.071 ManufacturingProcess04 + 0.073 ManufacturingProcess32
##
## Rule 7/6: [7 cases, mean 41.256, range 38.35 to 46.34, est err 1.913]
##
## if
## BiologicalMaterial06 > 55.95
## then
## outcome = 380.969 - 2.941 BiologicalMaterial06
## - 0.184 ManufacturingProcess04
##
## Model 8:
##
## Rule 8/1: [64 cases, mean 39.687, range 35.25 to 43.88, est err 1.145]
##
## if
## ManufacturingProcess13 > 34.7
## then
## outcome = 254.919 + 0.391 ManufacturingProcess32
## - 0.0381 ManufacturingProcess26 - 0.66 ManufacturingProcess31
##
## Rule 8/2: [77 cases, mean 40.786, range 37.39 to 46.34, est err 0.972]
##
## if
## ManufacturingProcess13 <= 34.7
## then
## outcome = 78.005 - 2.02 ManufacturingProcess13
## + 0.094 BiologicalMaterial11 + 0.51 ManufacturingProcess29
## + 0.043 ManufacturingProcess32
##
## Model 9:
##
## Rule 9/1: [37 cases, mean 39.893, range 35.25 to 43.44, est err 1.098]
##
## if
## BiologicalMaterial06 <= 55.95
## ManufacturingProcess17 > 34.9
## then
## outcome = 80.145 + 0.704 BiologicalMaterial06
## - 1.3 ManufacturingProcess17 - 0.189 BiologicalMaterial11
##
## Rule 9/2: [134 cases, mean 40.237, range 35.25 to 44.35, est err 1.089]
##
## if
## BiologicalMaterial06 <= 55.95
## then
## outcome = -122.963 + 0.319 ManufacturingProcess32
## - 1.06 ManufacturingProcess17 - 0.431 ManufacturingProcess33
## + 0.155 ManufacturingProcess04 + 1.24 ManufacturingProcess29
## + 0.137 BiologicalMaterial06
##
## Rule 9/3: [7 cases, mean 41.256, range 38.35 to 46.34, est err 2.275]
##
## if
## BiologicalMaterial06 > 55.95
## then
## outcome = 224.831 - 3.298 BiologicalMaterial06
## + 2.85 ManufacturingProcess38
##
## Model 10:
##
## Rule 10/1: [15 cases, mean 38.629, range 37.39 to 40.66, est err 1.492]
##
## if
## BiologicalMaterial12 <= 19.75
## ManufacturingProcess13 <= 34.7
## then
## outcome = 67.109 - 2.06 BiologicalMaterial12
## + 1.44 ManufacturingProcess10 - 0.16 ManufacturingProcess13
## + 0.0013 ManufacturingProcess15 + 0.11 BiologicalMaterial09
## - 0.007 ManufacturingProcess04 - 0.004 ManufacturingProcess24
##
## Rule 10/2: [64 cases, mean 39.687, range 35.25 to 43.88, est err 1.026]
##
## if
## ManufacturingProcess13 > 34.7
## then
## outcome = 234.278 - 0.0347 ManufacturingProcess27
## - 0.98 ManufacturingProcess31 + 0.219 ManufacturingProcess32
##
## Rule 10/3: [77 cases, mean 40.786, range 37.39 to 46.34, est err 1.077]
##
## if
## ManufacturingProcess13 <= 34.7
## then
## outcome = 76.232 - 1.79 ManufacturingProcess13
## + 0.0112 ManufacturingProcess15 + 0.59 ManufacturingProcess10
## - 0.063 ManufacturingProcess04 + 0.92 BiologicalMaterial09
## - 0.034 ManufacturingProcess24 + 0.006 ManufacturingProcess32
## + 0.04 ManufacturingProcess29 - 0.0005 ManufacturingProcess27
##
## Model 11:
##
## Rule 11/1: [141 cases, mean 40.287, range 35.25 to 46.34, est err 0.993]
##
## outcome = 20.841 - 0.79 ManufacturingProcess17
## + 0.148 ManufacturingProcess32 + 1.13 ManufacturingProcess29
##
## Model 12:
##
## Rule 12/1: [64 cases, mean 39.687, range 35.25 to 43.88, est err 1.038]
##
## if
## ManufacturingProcess13 > 34.7
## then
## outcome = -87.388 + 1 ManufacturingProcess09
## + 0.286 ManufacturingProcess32 + 1.01 ManufacturingProcess13
## + 0.092 ManufacturingProcess02
##
## Rule 12/2: [77 cases, mean 40.786, range 37.39 to 46.34, est err 1.045]
##
## if
## ManufacturingProcess13 <= 34.7
## then
## outcome = 31.181 - 2.11 ManufacturingProcess13
## + 0.153 BiologicalMaterial11 + 0.0093 ManufacturingProcess26
## + 0.015 BiologicalMaterial03 + 0.03 ManufacturingProcess09
##
## Model 13:
##
## Rule 13/1: [141 cases, mean 40.287, range 35.25 to 46.34, est err 1.053]
##
## outcome = -62.69 + 0.56 ManufacturingProcess09
## + 0.159 ManufacturingProcess32 - 1.33 ManufacturingProcess37
## + 0.0088 ManufacturingProcess15
##
## Model 14:
##
## Rule 14/1: [64 cases, mean 39.687, range 35.25 to 43.88, est err 0.837]
##
## if
## ManufacturingProcess13 > 34.7
## then
## outcome = 7.401 + 1.84 ManufacturingProcess29
## - 0.81 ManufacturingProcess17 - 0.0187 ManufacturingProcess27
## + 0.173 ManufacturingProcess32 + 0.116 ManufacturingProcess04
## - 0.83 ManufacturingProcess11 + 0.76 ManufacturingProcess10
## + 0.178 BiologicalMaterial05 - 0.0058 ManufacturingProcess18
##
## Rule 14/2: [55 cases, mean 40.098, range 37.39 to 43.12, est err 1.154]
##
## if
## BiologicalMaterial11 <= 150.06
## ManufacturingProcess13 <= 34.7
## then
## outcome = -29.775 + 1.036 ManufacturingProcess01
## - 1.76 ManufacturingProcess13 + 0.197 BiologicalMaterial11
## + 0.216 ManufacturingProcess06 + 0.0096 ManufacturingProcess27
## + 0.35 ManufacturingProcess43
##
## Rule 14/3: [22 cases, mean 42.505, range 38.6 to 46.34, est err 1.853]
##
## if
## BiologicalMaterial11 > 150.06
## ManufacturingProcess13 <= 34.7
## then
## outcome = 110.697 - 2.9 ManufacturingProcess13
## + 0.061 BiologicalMaterial11 + 0.0046 ManufacturingProcess27
##
## Model 15:
##
## Rule 15/1: [141 cases, mean 40.287, range 35.25 to 46.34, est err 1.111]
##
## outcome = -69.056 - 0.394 BiologicalMaterial06
## + 0.367 BiologicalMaterial02 + 0.268 ManufacturingProcess32
## + 0.74 ManufacturingProcess09 - 0.275 ManufacturingProcess33
## + 0.0104 ManufacturingProcess14 - 1.41 ManufacturingProcess37
##
## Model 16:
##
## Rule 16/1: [64 cases, mean 39.687, range 35.25 to 43.88, est err 1.333]
##
## if
## ManufacturingProcess13 > 34.7
## then
## outcome = 73.521 - 0.95 ManufacturingProcess13
##
## Rule 16/2: [77 cases, mean 40.786, range 37.39 to 46.34, est err 1.580]
##
## if
## ManufacturingProcess13 <= 34.7
## then
## outcome = 85.835 + 1.316 ManufacturingProcess01
## - 1.94 ManufacturingProcess13 - 0.082 ManufacturingProcess02
## - 0.063 ManufacturingProcess04 + 0.0088 ManufacturingProcess26
## + 0.93 BiologicalMaterial09
##
## Rule 16/3: [22 cases, mean 42.505, range 38.6 to 46.34, est err 1.799]
##
## if
## BiologicalMaterial11 > 150.06
## ManufacturingProcess13 <= 34.7
## then
## outcome = 74.251 - 2.65 ManufacturingProcess13
## + 0.077 BiologicalMaterial11 + 0.0077 ManufacturingProcess26
##
## Model 17:
##
## Rule 17/1: [141 cases, mean 40.287, range 35.25 to 46.34, est err 1.216]
##
## outcome = -82.14 + 1.11 ManufacturingProcess09
## + 0.323 ManufacturingProcess32 + 0.59 ManufacturingProcess13
##
## Model 18:
##
## Rule 18/1: [41 cases, mean 39.909, range 35.25 to 43.44, est err 1.061]
##
## if
## ManufacturingProcess17 > 34.9
## then
## outcome = -5.342 - 0.9 ManufacturingProcess17
## + 1.56 ManufacturingProcess29 - 0.7 ManufacturingProcess13
## - 0.43 ManufacturingProcess09 + 0.32 ManufacturingProcess31
## + 0.066 ManufacturingProcess04 + 0.081 BiologicalMaterial03
##
## Rule 18/2: [14 cases, mean 40.058, range 37.64 to 42.44, est err 2.489]
##
## if
## ManufacturingProcess01 <= 10.4
## ManufacturingProcess17 <= 34.9
## then
## outcome = 34.294 - 0.196 ManufacturingProcess01
## - 0.14 ManufacturingProcess17 - 0.14 ManufacturingProcess13
## - 0.015 ManufacturingProcess02 - 0.03 BiologicalMaterial06
## + 0.016 ManufacturingProcess04 - 0.13 ManufacturingProcess30
## + 0.1 BiologicalMaterial08 + 0.034 BiologicalMaterial05
## - 0.13 ManufacturingProcess37 + 0.013 BiologicalMaterial03
##
## Rule 18/3: [100 cases, mean 40.442, range 36.77 to 46.34, est err 1.255]
##
## if
## ManufacturingProcess17 <= 34.9
## then
## outcome = 23.193 - 1.38 ManufacturingProcess17
## - 1.09 ManufacturingProcess13 - 0.098 ManufacturingProcess02
## - 0.189 BiologicalMaterial06 + 0.102 ManufacturingProcess04
## - 0.84 ManufacturingProcess30 - 1.26 ManufacturingProcess37
## + 0.63 BiologicalMaterial08 + 0.213 BiologicalMaterial05
## + 0.085 BiologicalMaterial03 + 0.038 BiologicalMaterial11
##
## Model 19:
##
## Rule 19/1: [141 cases, mean 40.287, range 35.25 to 46.34, est err 1.125]
##
## outcome = -42.94 + 0.455 ManufacturingProcess32
## + 0.74 ManufacturingProcess09 - 0.361 ManufacturingProcess33
##
## Model 20:
##
## Rule 20/1: [11 cases, mean 39.187, range 37.39 to 42.23, est err 2.580]
##
## if
## BiologicalMaterial01 <= 5.6
## ManufacturingProcess17 <= 34.9
## then
## outcome = -29.909 + 12.31 BiologicalMaterial01
## - 0.19 ManufacturingProcess17 - 0.016 ManufacturingProcess02
## - 0.11 ManufacturingProcess13 + 0.015 ManufacturingProcess04
##
## Rule 20/2: [41 cases, mean 39.909, range 35.25 to 43.44, est err 0.902]
##
## if
## ManufacturingProcess17 > 34.9
## then
## outcome = -35.594 - 1 ManufacturingProcess17
## + 0.0185 ManufacturingProcess26 - 0.04 ManufacturingProcess13
## + 0.006 BiologicalMaterial03
##
## Rule 20/3: [14 cases, mean 40.058, range 37.64 to 42.44, est err 2.642]
##
## if
## ManufacturingProcess01 <= 10.4
## ManufacturingProcess17 <= 34.9
## then
## outcome = 17.92 + 0.99 BiologicalMaterial12
##
## Rule 20/4: [89 cases, mean 40.598, range 36.77 to 46.34, est err 1.566]
##
## if
## BiologicalMaterial01 > 5.6
## ManufacturingProcess17 <= 34.9
## then
## outcome = 0.043 - 2.07 ManufacturingProcess17
## - 0.814 ManufacturingProcess01 - 0.093 ManufacturingProcess02
## + 0.98 BiologicalMaterial12 + 0.116 ManufacturingProcess04
## - 0.18 ManufacturingProcess13
##
## Rule 20/5: [6 cases, mean 40.773, range 39.08 to 42.73, est err 3.936]
##
## if
## BiologicalMaterial12 <= 19.1
## ManufacturingProcess17 <= 34.9
## then
## outcome = -45.327 + 4.41 BiologicalMaterial12
## - 0.16 ManufacturingProcess17 - 0.012 ManufacturingProcess02
## + 0.014 ManufacturingProcess04 - 0.07 ManufacturingProcess13
##
##
## Evaluation on training data (141 cases):
##
## Average |error| 0.603
## Relative |error| 0.40
## Correlation coefficient 0.90
##
##
## Attribute usage:
## Conds Model
##
## 39% 52% ManufacturingProcess13
## 19% 30% BiologicalMaterial06
## 18% 36% ManufacturingProcess17
## 6% 16% BiologicalMaterial05
## 5% 23% BiologicalMaterial11
## 3% 15% ManufacturingProcess27
## 3% BiologicalMaterial01
## 9% ManufacturingProcess01
## 6% BiologicalMaterial10
## 4% BiologicalMaterial12
## 34% ManufacturingProcess09
## 76% ManufacturingProcess32
## 36% ManufacturingProcess29
## 32% ManufacturingProcess04
## 29% ManufacturingProcess33
## 28% BiologicalMaterial03
## 20% ManufacturingProcess37
## 17% ManufacturingProcess14
## 15% ManufacturingProcess10
## 15% BiologicalMaterial02
## 15% ManufacturingProcess28
## 14% BiologicalMaterial08
## 13% ManufacturingProcess26
## 12% ManufacturingProcess25
## 12% ManufacturingProcess15
## 12% ManufacturingProcess20
## 11% ManufacturingProcess02
## 8% ManufacturingProcess24
## 8% ManufacturingProcess31
## 8% ManufacturingProcess11
## 7% BiologicalMaterial09
## 5% BiologicalMaterial04
## 4% ManufacturingProcess39
## 4% ManufacturingProcess30
## 2% ManufacturingProcess18
## 2% ManufacturingProcess06
## 2% ManufacturingProcess43
##
##
## Time: 0.6 secs