library(AppliedPredictiveModeling)
library(psych)
library(reshape2)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x ggplot2::%+%()   masks psych::%+%()
## x ggplot2::alpha() masks psych::alpha()
## x dplyr::filter()  masks stats::filter()
## x dplyr::lag()     masks stats::lag()
library(mice)
## 
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
## 
##     filter
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
library(caTools)
library(elasticnet)
## Loading required package: lars
## Loaded lars 1.2
## 
## Attaching package: 'lars'
## The following object is masked from 'package:psych':
## 
##     error.bars
library(pls)
## 
## Attaching package: 'pls'
## The following object is masked from 'package:caret':
## 
##     R2
## The following object is masked from 'package:stats':
## 
##     loadings
library(mlbench)
library(earth)
## Loading required package: Formula
## Loading required package: plotmo
## Loading required package: plotrix
## 
## Attaching package: 'plotrix'
## The following object is masked from 'package:psych':
## 
##     rescale
## Loading required package: TeachingDemos
library(kernlab)
## 
## Attaching package: 'kernlab'
## The following object is masked from 'package:purrr':
## 
##     cross
## The following object is masked from 'package:ggplot2':
## 
##     alpha
## The following object is masked from 'package:psych':
## 
##     alpha
library(nnet)

Exercise 7.2

7.2 Friedman (1991) introduced several bendchmark data sets create by simulation. Once of these simulations used the following nonlinear equation to create data:

\[ y=10sin(\pi )x_{1}x_{2} + 20(x_{3}-0.5)^{2} +10x_{4}+5x_{5}+N(0,\sigma ^{^{2}})) \]

where the x values are random variables uniformly distributed between [0,1] (there are also 5 other non-informative variables also created in the simulation). The package mlbench contains a function called mlbench.freidman1 that simulates these data:

library(mlbench)
set.seed(200)
trainingData<-mlbench.friedman1(200,sd=1)
## We convert the 'x' data from a matrix to a data frame

## One reason is that this well give the columns names.

trainingData$x<- data.frame(trainingData$x)

head(trainingData$x)
##          X1        X2         X3         X4         X5         X6        X7
## 1 0.5337724 0.6478064 0.85078526 0.18159957 0.92903976 0.36179060 0.8266609
## 2 0.5837650 0.4381528 0.67272659 0.66924914 0.16379784 0.45305931 0.6489601
## 3 0.5895783 0.5879065 0.40967108 0.33812728 0.89409334 0.02681911 0.1785614
## 4 0.6910399 0.2259548 0.03335447 0.06691274 0.63744519 0.52500637 0.5133614
## 5 0.6673315 0.8188985 0.71676079 0.80324287 0.08306864 0.22344157 0.6644906
## 6 0.8392937 0.3862983 0.64618857 0.86105431 0.63038947 0.43703891 0.3360117
##          X8         X9       X10
## 1 0.4214081 0.59111440 0.5886216
## 2 0.8446239 0.92819306 0.7584008
## 3 0.3495908 0.01759542 0.4441185
## 4 0.7970260 0.68986918 0.4450716
## 5 0.9038919 0.39696995 0.5500808
## 6 0.6489177 0.53116033 0.9066182
## Look at the data using featurePlot

featurePlot(trainingData$x, trainingData$y)

##  This creates a list with a vector 'y' and a matrix of predictors 'x'.  Also, simulate a large test set to estimate the ttrue error rate with good precision:

testData<-mlbench.friedman1(5000, sd=1)
testData$x<-data.frame(testData$x)

Tune several models on these data. For example:

library(caret)
set.seed(200)
knnModel<-train(x=trainingData$x,
                y=trainingData$y,
                method="knn",
                preProc = c("center", "scale"),
                tuneLength=10)

knnModel
## k-Nearest Neighbors 
## 
## 200 samples
##  10 predictor
## 
## Pre-processing: centered (10), scaled (10) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 200, 200, 200, 200, 200, 200, ... 
## Resampling results across tuning parameters:
## 
##   k   RMSE      Rsquared   MAE     
##    5  3.654912  0.4779838  2.958475
##    7  3.529432  0.5118581  2.861742
##    9  3.446330  0.5425096  2.780756
##   11  3.378049  0.5723793  2.719410
##   13  3.332339  0.5953773  2.692863
##   15  3.309235  0.6111389  2.663046
##   17  3.317408  0.6201421  2.678898
##   19  3.311667  0.6333800  2.682098
##   21  3.316340  0.6407537  2.688887
##   23  3.326040  0.6491480  2.705915
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 15.
knnPred<-predict(knnModel, newdata=testData$x)
plot(knnModel)

##  The function 'portResample' can be used to get the test set performance values

postResample(pred=knnPred, obs=testData$y)
##      RMSE  Rsquared       MAE 
## 3.1750657 0.6785946 2.5443169

DISCUSSION

k=15 rmse=3.309235 on the training knn rmse=3.204 on the test

Which models appear to get the best performance? Does the MARS select the informative predictors?

#MARS
set.seed(200)
Mgrid <- expand.grid(.degree = 1:2, .nprune = 2:15)
MARSmodel <- train(x = trainingData$x, 
                  y = trainingData$y,
                  method = "earth",
                  tuneGrid = Mgrid,
                  preProcess = c("center", "scale"),
                  tuneLength = 10)
MARSmodel
## Multivariate Adaptive Regression Spline 
## 
## 200 samples
##  10 predictor
## 
## Pre-processing: centered (10), scaled (10) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 200, 200, 200, 200, 200, 200, ... 
## Resampling results across tuning parameters:
## 
##   degree  nprune  RMSE      Rsquared   MAE     
##   1        2      4.447045  0.2249607  3.650128
##   1        3      3.744821  0.4546610  3.019175
##   1        4      2.828643  0.6892908  2.244131
##   1        5      2.524326  0.7516356  2.027435
##   1        6      2.406670  0.7747079  1.906733
##   1        7      2.027113  0.8375721  1.594956
##   1        8      1.874633  0.8618476  1.474219
##   1        9      1.800794  0.8728377  1.411703
##   1       10      1.810047  0.8721377  1.412023
##   1       11      1.821314  0.8714221  1.427124
##   1       12      1.831608  0.8700790  1.430044
##   1       13      1.839717  0.8686550  1.440537
##   1       14      1.849381  0.8672327  1.450876
##   1       15      1.856211  0.8663787  1.452430
##   2        2      4.447780  0.2248695  3.650597
##   2        3      3.737891  0.4543357  3.018103
##   2        4      2.854288  0.6832049  2.259488
##   2        5      2.513582  0.7550084  2.004730
##   2        6      2.387478  0.7799585  1.889787
##   2        7      2.044028  0.8354683  1.615415
##   2        8      1.910896  0.8568917  1.500375
##   2        9      1.810765  0.8703004  1.404288
##   2       10      1.677078  0.8885385  1.321634
##   2       11      1.561012  0.9045745  1.234778
##   2       12      1.503867  0.9112625  1.183593
##   2       13      1.507992  0.9112557  1.172444
##   2       14      1.505298  0.9114749  1.171595
##   2       15      1.527789  0.9091635  1.188885
## 
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were nprune = 12 and degree = 2.
plot(MARSmodel)

## DISCUSSION: Training - The final values used for the model were nprune = 12 and degree = 2. RMSE= 1.831608

MARS select the informative predictors using varIMP….

varImp(MARSmodel)
## earth variable importance
## 
##    Overall
## X1  100.00
## X4   75.40
## X2   49.00
## X5   15.72
## X3    0.00

using the test data..

marspred <- predict (MARSmodel, testData$x)
postResample(pred = marspred, obs = testData$y)
##      RMSE  Rsquared       MAE 
## 1.3227340 0.9291489 1.0524686

MARS test RMSE=1.322 as compared to training RMSE= 1.831608

SVM

set.seed(200)
SVMmodel <- train(x = trainingData$x,
                   y = trainingData$y,
                   method = "svmRadial",
                   preProcess = c("center", "scale"),
                   tuneLength = 10,
                   trControl = trainControl(method = "cv"))
SVMmodel
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 200 samples
##  10 predictor
## 
## Pre-processing: centered (10), scaled (10) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 180, 180, 180, 180, 180, 180, ... 
## Resampling results across tuning parameters:
## 
##   C       RMSE      Rsquared   MAE     
##     0.25  2.525164  0.7810576  2.010680
##     0.50  2.270567  0.7944850  1.794902
##     1.00  2.099356  0.8155574  1.659376
##     2.00  2.005858  0.8302852  1.578799
##     4.00  1.934650  0.8435677  1.528373
##     8.00  1.915665  0.8475605  1.528648
##    16.00  1.923914  0.8463074  1.535991
##    32.00  1.923914  0.8463074  1.535991
##    64.00  1.923914  0.8463074  1.535991
##   128.00  1.923914  0.8463074  1.535991
## 
## Tuning parameter 'sigma' was held constant at a value of 0.06299324
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were sigma = 0.06299324 and C = 8.
plot(SVMmodel)

## DISCUSSION: The final values used for the model were sigma = 0.06299324 and C = 8. RMSE=1.915

So run test data…….

SVMpred <- predict(SVMmodel, newdata = testData$x)
postResample(pred = SVMpred, obs = testData$y)
##      RMSE  Rsquared       MAE 
## 2.0541197 0.8290353 1.5586411

test data RMSE=2.05

## Run a neural network model.

set.seed(200)
nnetmodel <- nnet(trainingData$x,
                trainingData$y,
                size = 5,
                decay = 0.01,
                linout = TRUE,
                trace = FALSE,
                maxit = 500,
                MaxNWts = 5 * (ncol(trainingData$x) + 1) + 5 + 1)
nnetmodel
## a 10-5-1 network with 61 weights
## options were - linear output units  decay=0.01
summary(nnetmodel)
## a 10-5-1 network with 61 weights
## options were - linear output units  decay=0.01
##   b->h1  i1->h1  i2->h1  i3->h1  i4->h1  i5->h1  i6->h1  i7->h1  i8->h1  i9->h1 
##    5.22   -3.79   -0.54  -19.57    8.95   -2.17   -0.65  -12.82   -4.60    2.98 
## i10->h1 
##    0.57 
##   b->h2  i1->h2  i2->h2  i3->h2  i4->h2  i5->h2  i6->h2  i7->h2  i8->h2  i9->h2 
##   -2.22    1.19    1.56   -1.11    0.97    1.45   -0.61    0.88   -0.16    0.48 
## i10->h2 
##   -0.49 
##   b->h3  i1->h3  i2->h3  i3->h3  i4->h3  i5->h3  i6->h3  i7->h3  i8->h3  i9->h3 
##   -7.16   16.91    6.31    2.79    7.63   -1.50    2.06   -6.56    7.40  -11.59 
## i10->h3 
##    2.73 
##   b->h4  i1->h4  i2->h4  i3->h4  i4->h4  i5->h4  i6->h4  i7->h4  i8->h4  i9->h4 
##  -18.14   -1.87   -2.41   17.38    5.84    4.31    2.78   -1.44   -3.03   -0.05 
## i10->h4 
##    4.10 
##   b->h5  i1->h5  i2->h5  i3->h5  i4->h5  i5->h5  i6->h5  i7->h5  i8->h5  i9->h5 
##  -13.30    0.34    1.96   16.28   12.50  -14.89   -3.94   -3.25    3.36    0.97 
## i10->h5 
##   -2.89 
##   b->o  h1->o  h2->o  h3->o  h4->o  h5->o 
##  -0.22   6.51  19.32   4.76   5.44   4.44

## DISCUSSION: Results:

a 10-5-1 network with 61 weights

Let’s look at variable importance…

varImp(nnetmodel)
##       Overall
## X1  10.152501
## X2   7.395536
## X3  23.690690
## X4  14.521013
## X5  11.405236
## X6   4.833571
## X7  10.246607
## X8   6.754084
## X9   6.013435
## X10  4.987327

Let’s look at new samples…..

nnetpred <- predict(nnetmodel, newdata = testData$x)
postResample(pred = nnetpred, obs = testData$y)
##      RMSE  Rsquared       MAE 
## 2.8134222 0.7083039 2.1815463

## DISCUSSION:

The neural network RMSE =2.813 on test data

## Conclusion:

The MARS model is the best fitting model for this dataset with

MARS test RMSE=1.322 as compared to training RMSE= 1.831608

# Exercise 7.5 Exercise 6.3 describes data for a chemical manufacturing process. Use the same data imputation, data splitting, and pre-processing steps as before and train several non linear regression models.

Impute, datasplit and preprocess…..

library(AppliedPredictiveModeling)
data("ChemicalManufacturingProcess")
dim(ChemicalManufacturingProcess)
## [1] 176  58
str(ChemicalManufacturingProcess)
## 'data.frame':    176 obs. of  58 variables:
##  $ Yield                 : num  38 42.4 42 41.4 42.5 ...
##  $ BiologicalMaterial01  : num  6.25 8.01 8.01 8.01 7.47 6.12 7.48 6.94 6.94 6.94 ...
##  $ BiologicalMaterial02  : num  49.6 61 61 61 63.3 ...
##  $ BiologicalMaterial03  : num  57 67.5 67.5 67.5 72.2 ...
##  $ BiologicalMaterial04  : num  12.7 14.6 14.6 14.6 14 ...
##  $ BiologicalMaterial05  : num  19.5 19.4 19.4 19.4 17.9 ...
##  $ BiologicalMaterial06  : num  43.7 53.1 53.1 53.1 54.7 ...
##  $ BiologicalMaterial07  : num  100 100 100 100 100 100 100 100 100 100 ...
##  $ BiologicalMaterial08  : num  16.7 19 19 19 18.2 ...
##  $ BiologicalMaterial09  : num  11.4 12.6 12.6 12.6 12.8 ...
##  $ BiologicalMaterial10  : num  3.46 3.46 3.46 3.46 3.05 3.78 3.04 3.85 3.85 3.85 ...
##  $ BiologicalMaterial11  : num  138 154 154 154 148 ...
##  $ BiologicalMaterial12  : num  18.8 21.1 21.1 21.1 21.1 ...
##  $ ManufacturingProcess01: num  NA 0 0 0 10.7 12 11.5 12 12 12 ...
##  $ ManufacturingProcess02: num  NA 0 0 0 0 0 0 0 0 0 ...
##  $ ManufacturingProcess03: num  NA NA NA NA NA NA 1.56 1.55 1.56 1.55 ...
##  $ ManufacturingProcess04: num  NA 917 912 911 918 924 933 929 928 938 ...
##  $ ManufacturingProcess05: num  NA 1032 1004 1015 1028 ...
##  $ ManufacturingProcess06: num  NA 210 207 213 206 ...
##  $ ManufacturingProcess07: num  NA 177 178 177 178 178 177 178 177 177 ...
##  $ ManufacturingProcess08: num  NA 178 178 177 178 178 178 178 177 177 ...
##  $ ManufacturingProcess09: num  43 46.6 45.1 44.9 45 ...
##  $ ManufacturingProcess10: num  NA NA NA NA NA NA 11.6 10.2 9.7 10.1 ...
##  $ ManufacturingProcess11: num  NA NA NA NA NA NA 11.5 11.3 11.1 10.2 ...
##  $ ManufacturingProcess12: num  NA 0 0 0 0 0 0 0 0 0 ...
##  $ ManufacturingProcess13: num  35.5 34 34.8 34.8 34.6 34 32.4 33.6 33.9 34.3 ...
##  $ ManufacturingProcess14: num  4898 4869 4878 4897 4992 ...
##  $ ManufacturingProcess15: num  6108 6095 6087 6102 6233 ...
##  $ ManufacturingProcess16: num  4682 4617 4617 4635 4733 ...
##  $ ManufacturingProcess17: num  35.5 34 34.8 34.8 33.9 33.4 33.8 33.6 33.9 35.3 ...
##  $ ManufacturingProcess18: num  4865 4867 4877 4872 4886 ...
##  $ ManufacturingProcess19: num  6049 6097 6078 6073 6102 ...
##  $ ManufacturingProcess20: num  4665 4621 4621 4611 4659 ...
##  $ ManufacturingProcess21: num  0 0 0 0 -0.7 -0.6 1.4 0 0 1 ...
##  $ ManufacturingProcess22: num  NA 3 4 5 8 9 1 2 3 4 ...
##  $ ManufacturingProcess23: num  NA 0 1 2 4 1 1 2 3 1 ...
##  $ ManufacturingProcess24: num  NA 3 4 5 18 1 1 2 3 4 ...
##  $ ManufacturingProcess25: num  4873 4869 4897 4892 4930 ...
##  $ ManufacturingProcess26: num  6074 6107 6116 6111 6151 ...
##  $ ManufacturingProcess27: num  4685 4630 4637 4630 4684 ...
##  $ ManufacturingProcess28: num  10.7 11.2 11.1 11.1 11.3 11.4 11.2 11.1 11.3 11.4 ...
##  $ ManufacturingProcess29: num  21 21.4 21.3 21.3 21.6 21.7 21.2 21.2 21.5 21.7 ...
##  $ ManufacturingProcess30: num  9.9 9.9 9.4 9.4 9 10.1 11.2 10.9 10.5 9.8 ...
##  $ ManufacturingProcess31: num  69.1 68.7 69.3 69.3 69.4 68.2 67.6 67.9 68 68.5 ...
##  $ ManufacturingProcess32: num  156 169 173 171 171 173 159 161 160 164 ...
##  $ ManufacturingProcess33: num  66 66 66 68 70 70 65 65 65 66 ...
##  $ ManufacturingProcess34: num  2.4 2.6 2.6 2.5 2.5 2.5 2.5 2.5 2.5 2.5 ...
##  $ ManufacturingProcess35: num  486 508 509 496 468 490 475 478 491 488 ...
##  $ ManufacturingProcess36: num  0.019 0.019 0.018 0.018 0.017 0.018 0.019 0.019 0.019 0.019 ...
##  $ ManufacturingProcess37: num  0.5 2 0.7 1.2 0.2 0.4 0.8 1 1.2 1.8 ...
##  $ ManufacturingProcess38: num  3 2 2 2 2 2 2 2 3 3 ...
##  $ ManufacturingProcess39: num  7.2 7.2 7.2 7.2 7.3 7.2 7.3 7.3 7.4 7.1 ...
##  $ ManufacturingProcess40: num  NA 0.1 0 0 0 0 0 0 0 0 ...
##  $ ManufacturingProcess41: num  NA 0.15 0 0 0 0 0 0 0 0 ...
##  $ ManufacturingProcess42: num  11.6 11.1 12 10.6 11 11.5 11.7 11.4 11.4 11.3 ...
##  $ ManufacturingProcess43: num  3 0.9 1 1.1 1.1 2.2 0.7 0.8 0.9 0.8 ...
##  $ ManufacturingProcess44: num  1.8 1.9 1.8 1.8 1.7 1.8 2 2 1.9 1.9 ...
##  $ ManufacturingProcess45: num  2.4 2.2 2.3 2.1 2.1 2 2.2 2.2 2.1 2.4 ...
md.pattern(ChemicalManufacturingProcess)

##     Yield BiologicalMaterial01 BiologicalMaterial02 BiologicalMaterial03
## 152     1                    1                    1                    1
## 6       1                    1                    1                    1
## 1       1                    1                    1                    1
## 7       1                    1                    1                    1
## 5       1                    1                    1                    1
## 2       1                    1                    1                    1
## 1       1                    1                    1                    1
## 1       1                    1                    1                    1
## 1       1                    1                    1                    1
##         0                    0                    0                    0
##     BiologicalMaterial04 BiologicalMaterial05 BiologicalMaterial06
## 152                    1                    1                    1
## 6                      1                    1                    1
## 1                      1                    1                    1
## 7                      1                    1                    1
## 5                      1                    1                    1
## 2                      1                    1                    1
## 1                      1                    1                    1
## 1                      1                    1                    1
## 1                      1                    1                    1
##                        0                    0                    0
##     BiologicalMaterial07 BiologicalMaterial08 BiologicalMaterial09
## 152                    1                    1                    1
## 6                      1                    1                    1
## 1                      1                    1                    1
## 7                      1                    1                    1
## 5                      1                    1                    1
## 2                      1                    1                    1
## 1                      1                    1                    1
## 1                      1                    1                    1
## 1                      1                    1                    1
##                        0                    0                    0
##     BiologicalMaterial10 BiologicalMaterial11 BiologicalMaterial12
## 152                    1                    1                    1
## 6                      1                    1                    1
## 1                      1                    1                    1
## 7                      1                    1                    1
## 5                      1                    1                    1
## 2                      1                    1                    1
## 1                      1                    1                    1
## 1                      1                    1                    1
## 1                      1                    1                    1
##                        0                    0                    0
##     ManufacturingProcess09 ManufacturingProcess13 ManufacturingProcess15
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
##                          0                      0                      0
##     ManufacturingProcess16 ManufacturingProcess17 ManufacturingProcess18
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
##                          0                      0                      0
##     ManufacturingProcess19 ManufacturingProcess20 ManufacturingProcess21
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
##                          0                      0                      0
##     ManufacturingProcess32 ManufacturingProcess37 ManufacturingProcess38
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
##                          0                      0                      0
##     ManufacturingProcess39 ManufacturingProcess42 ManufacturingProcess43
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
##                          0                      0                      0
##     ManufacturingProcess44 ManufacturingProcess45 ManufacturingProcess01
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      0
##                          0                      0                      1
##     ManufacturingProcess04 ManufacturingProcess05 ManufacturingProcess07
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        0                      0                      0
##                          1                      1                      1
##     ManufacturingProcess08 ManufacturingProcess12 ManufacturingProcess14
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      0
## 1                        0                      0                      1
##                          1                      1                      1
##     ManufacturingProcess22 ManufacturingProcess23 ManufacturingProcess24
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        0                      0                      0
##                          1                      1                      1
##     ManufacturingProcess40 ManufacturingProcess41 ManufacturingProcess06
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      0
## 1                        1                      1                      1
## 1                        0                      0                      0
##                          1                      1                      2
##     ManufacturingProcess02 ManufacturingProcess25 ManufacturingProcess26
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      0                      0
## 2                        0                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        0                      1                      1
##                          3                      5                      5
##     ManufacturingProcess27 ManufacturingProcess28 ManufacturingProcess29
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        0                      0                      0
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
##                          5                      5                      5
##     ManufacturingProcess30 ManufacturingProcess31 ManufacturingProcess33
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        0                      0                      0
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
##                          5                      5                      5
##     ManufacturingProcess34 ManufacturingProcess35 ManufacturingProcess36
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        0                      0                      0
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
##                          5                      5                      5
##     ManufacturingProcess10 ManufacturingProcess11 ManufacturingProcess03    
## 152                      1                      1                      1   0
## 6                        1                      1                      0   1
## 1                        1                      0                      1   1
## 7                        0                      0                      0   3
## 5                        1                      1                      1  11
## 2                        1                      1                      1   1
## 1                        1                      1                      1   1
## 1                        0                      0                      0   4
## 1                        0                      0                      0  16
##                          9                     10                     15 106
countNA<-colSums(is.na(ChemicalManufacturingProcess))

countNA
##                  Yield   BiologicalMaterial01   BiologicalMaterial02 
##                      0                      0                      0 
##   BiologicalMaterial03   BiologicalMaterial04   BiologicalMaterial05 
##                      0                      0                      0 
##   BiologicalMaterial06   BiologicalMaterial07   BiologicalMaterial08 
##                      0                      0                      0 
##   BiologicalMaterial09   BiologicalMaterial10   BiologicalMaterial11 
##                      0                      0                      0 
##   BiologicalMaterial12 ManufacturingProcess01 ManufacturingProcess02 
##                      0                      1                      3 
## ManufacturingProcess03 ManufacturingProcess04 ManufacturingProcess05 
##                     15                      1                      1 
## ManufacturingProcess06 ManufacturingProcess07 ManufacturingProcess08 
##                      2                      1                      1 
## ManufacturingProcess09 ManufacturingProcess10 ManufacturingProcess11 
##                      0                      9                     10 
## ManufacturingProcess12 ManufacturingProcess13 ManufacturingProcess14 
##                      1                      0                      1 
## ManufacturingProcess15 ManufacturingProcess16 ManufacturingProcess17 
##                      0                      0                      0 
## ManufacturingProcess18 ManufacturingProcess19 ManufacturingProcess20 
##                      0                      0                      0 
## ManufacturingProcess21 ManufacturingProcess22 ManufacturingProcess23 
##                      0                      1                      1 
## ManufacturingProcess24 ManufacturingProcess25 ManufacturingProcess26 
##                      1                      5                      5 
## ManufacturingProcess27 ManufacturingProcess28 ManufacturingProcess29 
##                      5                      5                      5 
## ManufacturingProcess30 ManufacturingProcess31 ManufacturingProcess32 
##                      5                      5                      0 
## ManufacturingProcess33 ManufacturingProcess34 ManufacturingProcess35 
##                      5                      5                      5 
## ManufacturingProcess36 ManufacturingProcess37 ManufacturingProcess38 
##                      5                      0                      0 
## ManufacturingProcess39 ManufacturingProcess40 ManufacturingProcess41 
##                      0                      1                      1 
## ManufacturingProcess42 ManufacturingProcess43 ManufacturingProcess44 
##                      0                      0                      0 
## ManufacturingProcess45 
##                      0
#Look at predictor part of dataset
pred <- ChemicalManufacturingProcess[,-c(1)]

#Imput with KNN

Imp_pred <- preProcess(pred, method="knnImpute") 

#predict function
pred1 <- predict(Imp_pred, pred)

pred2 <- preProcess(pred1, method=c("center", "scale"))
pred3 <- predict(pred2, pred1)

Split .75/.25

#split

set.seed(200)
trainingRows <- createDataPartition(ChemicalManufacturingProcess$Yield, 
                                    p=0.75, list=FALSE) 
train_X2 <- pred3[trainingRows, ]
train_Y2 <- ChemicalManufacturingProcess$Yield[trainingRows]
test_X2 <- pred3[-trainingRows, ]
test_Y2 <- ChemicalManufacturingProcess$Yield[-trainingRows]

## (a) Which nonlinear regression model gives the optimal resampling and test set performance?

set.seed(200)
knnModel2<-train(x=train_X2,
                y=train_Y2,
                method="knn",
                preProc = c("center", "scale"),
                tuneLength=10)
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
knnModel2
## k-Nearest Neighbors 
## 
## 132 samples
##  57 predictor
## 
## Pre-processing: centered (57), scaled (57) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 132, 132, 132, 132, 132, 132, ... 
## Resampling results across tuning parameters:
## 
##   k   RMSE      Rsquared   MAE     
##    5  1.549684  0.3261105  1.223911
##    7  1.506840  0.3461634  1.199741
##    9  1.483347  0.3626527  1.191919
##   11  1.468087  0.3787644  1.182889
##   13  1.457101  0.3929912  1.171540
##   15  1.465710  0.3889257  1.188223
##   17  1.471753  0.3864912  1.188771
##   19  1.470451  0.3930200  1.187516
##   21  1.469651  0.4002355  1.189690
##   23  1.474788  0.4047451  1.192799
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 13.

KNN discussion

The final value used for the model was k = 13. RMSE=1.457101 .

knnPred2<-predict(knnModel2, newdata=test_X2)
plot(knnModel2)

##  The function 'portResample' can be used to get the test set performance values

postResample(pred=knnPred2, obs=test_Y2)
##      RMSE  Rsquared       MAE 
## 1.4451812 0.4074479 1.2009441

KNN test

RMSE=1.4451812

MARS on the chemical manufacturing

#MARS
set.seed(200)
Mgrid2 <- expand.grid(.degree = 1:2, .nprune = 2:15)
MARSmodel2 <- train(x = train_X2, 
                  y = train_Y2,
                  method = "earth",
                  tuneGrid = Mgrid2,
                  preProcess = c("center", "scale"),
                  tuneLength = 10)
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07

## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
MARSmodel2
## Multivariate Adaptive Regression Spline 
## 
## 132 samples
##  57 predictor
## 
## Pre-processing: centered (57), scaled (57) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 132, 132, 132, 132, 132, 132, ... 
## Resampling results across tuning parameters:
## 
##   degree  nprune  RMSE      Rsquared   MAE      
##   1        2      1.490279  0.3755794  1.1718045
##   1        3      1.263663  0.5439061  1.0038594
##   1        4      1.240091  0.5595437  0.9794399
##   1        5      1.284713  0.5331047  1.0046833
##   1        6      1.312602  0.5267242  1.0363784
##   1        7      1.335864  0.5114358  1.0500932
##   1        8      1.388257  0.4859620  1.0834030
##   1        9      1.551251  0.4629298  1.1194819
##   1       10      1.596157  0.4532295  1.1346676
##   1       11      1.771924  0.4250801  1.1845418
##   1       12      1.833954  0.4185417  1.2066192
##   1       13      2.663280  0.3818380  1.3504608
##   1       14      2.668555  0.3793821  1.3613282
##   1       15      2.633944  0.3749723  1.3605887
##   2        2      1.491632  0.3730524  1.1716428
##   2        3      1.379036  0.4764277  1.0871948
##   2        4      1.445515  0.4488930  1.0973929
##   2        5      1.475670  0.4282088  1.1029835
##   2        6      1.524896  0.4137309  1.1229552
##   2        7      1.617197  0.3863619  1.1768719
##   2        8      1.669632  0.3740409  1.2006241
##   2        9      1.817293  0.3430625  1.2538592
##   2       10      1.872404  0.3309127  1.2766724
##   2       11      1.930613  0.3282800  1.3292641
##   2       12      2.039573  0.3038107  1.3849343
##   2       13      2.005695  0.3214613  1.3949230
##   2       14      2.064212  0.3151779  1.4109945
##   2       15      2.165026  0.3176985  1.4397028
## 
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were nprune = 4 and degree = 1.
plot(MARSmodel2)

DISCUSSION:

Training - TThe final values used for the model were nprune = 4 and degree = 1. RMSE= 1.240091

MARS select the informative predictors using varIMP….

varImp(MARSmodel2)
## earth variable importance
## 
##                        Overall
## ManufacturingProcess32  100.00
## ManufacturingProcess09   46.54
## ManufacturingProcess13    0.00

using the test data..

marspred2 <- predict (MARSmodel2, test_X2)
postResample(pred = marspred2, obs = test_Y2)
##      RMSE  Rsquared       MAE 
## 1.1285523 0.6370613 0.8805678

The test RMSE for MARS model = 1.1285523

SVM

set.seed(200)
SVMmodel2 <- train(x = train_X2,
                   y = train_Y2,
                   method = "svmRadial",
                   preProcess = c("center", "scale"),
                   tuneLength = 10,
                   trControl = trainControl(method = "cv"))
SVMmodel2
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 132 samples
##  57 predictor
## 
## Pre-processing: centered (57), scaled (57) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 117, 119, 119, 119, 120, 119, ... 
## Resampling results across tuning parameters:
## 
##   C       RMSE      Rsquared   MAE      
##     0.25  1.421201  0.4526430  1.1627465
##     0.50  1.313353  0.5100836  1.0770470
##     1.00  1.238889  0.5483865  1.0060123
##     2.00  1.188412  0.5689254  0.9634596
##     4.00  1.181378  0.5668063  0.9639502
##     8.00  1.173178  0.5690772  0.9606567
##    16.00  1.171902  0.5695347  0.9589138
##    32.00  1.171902  0.5695347  0.9589138
##    64.00  1.171902  0.5695347  0.9589138
##   128.00  1.171902  0.5695347  0.9589138
## 
## Tuning parameter 'sigma' was held constant at a value of 0.01390285
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were sigma = 0.01390285 and C = 16.
plot(SVMmodel)

## DISCUSSION:

SVM - The final values used for the training model were sigma = 0.01390285 and C = 16. RMSE = 1.171902 .

So run SVM test data…….

SVMpred2 <- predict(SVMmodel2, newdata = test_X2)
postResample(pred = SVMpred2, obs = test_Y2)
##      RMSE  Rsquared       MAE 
## 1.1988287 0.5997168 0.9173792

SVM test data for chemical manufacturing

RMSE = 1.198827

NNET

Let’s model nnet on the dataset…..

set.seed(200)
nnetmodel2 <- nnet(x=train_X2,
                y=train_Y2,
                size = 5,
                decay = 0.01,
                linout = TRUE,
                trace = FALSE,
                maxit = 500,
                MaxNWts = 5 * (ncol(train_X2) + 1) + 5 + 1)
nnetmodel2
## a 57-5-1 network with 296 weights
## options were - linear output units  decay=0.01
summary(nnetmodel2)
## a 57-5-1 network with 296 weights
## options were - linear output units  decay=0.01
##   b->h1  i1->h1  i2->h1  i3->h1  i4->h1  i5->h1  i6->h1  i7->h1  i8->h1  i9->h1 
##    0.89    0.47    0.78    0.80   -0.23    0.28    0.72    0.59    0.82   -0.31 
## i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1 i16->h1 i17->h1 i18->h1 i19->h1 
##   -0.42   -1.24    0.00   -0.53   -0.71   -0.36    0.47   -0.37   -0.22   -0.34 
## i20->h1 i21->h1 i22->h1 i23->h1 i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 
##    0.35    0.12   -0.15   -0.27   -0.66    0.41    0.10   -0.34   -0.03    0.10 
## i30->h1 i31->h1 i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1 
##   -0.04   -0.09   -0.02   -0.17   -0.59   -0.05   -0.42    0.03   -0.41   -0.13 
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1 i48->h1 i49->h1 
##    0.04   -0.01    0.14    0.46    0.95   -0.95   -0.11    0.08   -0.47   -0.40 
## i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1 i56->h1 i57->h1 
##    0.00   -0.49    0.94   -0.21    0.11    0.49    0.28    0.02 
##   b->h2  i1->h2  i2->h2  i3->h2  i4->h2  i5->h2  i6->h2  i7->h2  i8->h2  i9->h2 
##    1.42   -0.45   -0.34    0.03   -0.03    0.70    0.19    0.05   -0.47   -0.30 
## i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2 i16->h2 i17->h2 i18->h2 i19->h2 
##    0.03   -0.35    0.57   -0.62   -1.04    0.29   -0.22    0.02    0.42    0.08 
## i20->h2 i21->h2 i22->h2 i23->h2 i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 
##   -0.12    0.09    0.44   -0.27   -0.02   -0.31    0.03   -0.21   -0.58   -0.42 
## i30->h2 i31->h2 i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2 
##    0.72   -0.55   -0.17    0.18   -0.59    0.17    0.20    0.04   -0.42    0.10 
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2 i48->h2 i49->h2 
##   -0.37    0.23    0.11   -0.06    0.25   -0.10   -0.21   -0.14   -0.02   -0.23 
## i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2 i56->h2 i57->h2 
##   -0.50   -0.05   -0.86    0.44    0.18    0.48   -0.32    0.72 
##   b->h3  i1->h3  i2->h3  i3->h3  i4->h3  i5->h3  i6->h3  i7->h3  i8->h3  i9->h3 
##    1.45    0.97   -0.28   -0.48   -0.32   -0.07   -0.09    0.10    0.59   -0.11 
## i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3 i16->h3 i17->h3 i18->h3 i19->h3 
##    0.12   -0.09   -0.36    0.41    0.08   -0.34    0.37   -0.14   -0.28    0.19 
## i20->h3 i21->h3 i22->h3 i23->h3 i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 
##   -0.31   -0.22   -0.32    0.23   -0.03   -0.60    0.47   -0.35    0.03   -0.57 
## i30->h3 i31->h3 i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3 
##    0.23    0.04    0.53   -0.26   -0.04   -0.38    0.11    0.21   -0.09   -0.35 
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3 i48->h3 i49->h3 
##    0.14    0.70    0.45    0.40    0.00    0.13   -0.21    0.21    0.30    0.18 
## i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3 i56->h3 i57->h3 
##   -0.49   -0.19   -0.12   -0.48    0.28   -0.43    0.65   -0.72 
##   b->h4  i1->h4  i2->h4  i3->h4  i4->h4  i5->h4  i6->h4  i7->h4  i8->h4  i9->h4 
##    1.51   -0.29    0.04    0.32    0.05   -0.43    0.74   -1.09    0.01    0.32 
## i10->h4 i11->h4 i12->h4 i13->h4 i14->h4 i15->h4 i16->h4 i17->h4 i18->h4 i19->h4 
##    0.28   -0.62   -0.05   -0.28    0.83    0.12   -0.11    0.69    0.09    0.09 
## i20->h4 i21->h4 i22->h4 i23->h4 i24->h4 i25->h4 i26->h4 i27->h4 i28->h4 i29->h4 
##   -0.02   -0.18    0.18   -0.35    0.38   -0.30   -0.06    0.70   -0.31   -0.23 
## i30->h4 i31->h4 i32->h4 i33->h4 i34->h4 i35->h4 i36->h4 i37->h4 i38->h4 i39->h4 
##    0.67    0.34    0.43   -0.07    0.61    0.20   -0.04   -0.19   -0.06   -0.06 
## i40->h4 i41->h4 i42->h4 i43->h4 i44->h4 i45->h4 i46->h4 i47->h4 i48->h4 i49->h4 
##   -0.28   -0.10   -0.11    0.50    0.76   -0.52   -0.06   -0.39    0.32    0.37 
## i50->h4 i51->h4 i52->h4 i53->h4 i54->h4 i55->h4 i56->h4 i57->h4 
##    0.12    0.67   -0.21    0.06    0.39    0.37   -0.83    0.04 
##   b->h5  i1->h5  i2->h5  i3->h5  i4->h5  i5->h5  i6->h5  i7->h5  i8->h5  i9->h5 
##    1.35    0.13   -0.26   -0.39   -0.36    0.19   -0.38   -0.11   -0.44    0.40 
## i10->h5 i11->h5 i12->h5 i13->h5 i14->h5 i15->h5 i16->h5 i17->h5 i18->h5 i19->h5 
##    0.24    0.63    0.01    0.21   -0.04   -0.09    0.29   -0.15    0.15    0.10 
## i20->h5 i21->h5 i22->h5 i23->h5 i24->h5 i25->h5 i26->h5 i27->h5 i28->h5 i29->h5 
##    0.27    0.36    0.74   -0.10    0.46    0.53   -0.16    0.56    0.30    0.36 
## i30->h5 i31->h5 i32->h5 i33->h5 i34->h5 i35->h5 i36->h5 i37->h5 i38->h5 i39->h5 
##    0.92   -0.51   -0.36    0.36    0.65   -0.11   -0.46    0.09    0.01   -0.05 
## i40->h5 i41->h5 i42->h5 i43->h5 i44->h5 i45->h5 i46->h5 i47->h5 i48->h5 i49->h5 
##   -0.29   -0.37   -0.45    0.27   -0.62   -0.22    0.18    0.82   -0.81   -0.06 
## i50->h5 i51->h5 i52->h5 i53->h5 i54->h5 i55->h5 i56->h5 i57->h5 
##    0.65    0.32   -0.19    0.75    0.36   -0.23   -0.40   -0.29 
##  b->o h1->o h2->o h3->o h4->o h5->o 
## 11.06  8.18  8.11  8.29 10.04  7.22

DISCUSSION:

Results:

a 57-5-1 network with 296 weights options were - linear output units decay=0.01

nnetpred <- predict(nnetmodel2, newdata = test_X2)
postResample(pred = nnetpred, obs = test_X2)
##     RMSE Rsquared      MAE 
## 39.34299       NA 39.13668

Neural network model RMSE=39.24

DISCUSSION:

The SVM model is superior with an training = 1.1719 and test RMSE = 1.019. However, the MARS is a close contender.

(b) Which predictors are most important in the optimal nonlinear regression model? Do either the biological or process variables dominate the list? How do the top ten predictors compare to the top ten predictors from the optimal linear model?

varImp(SVMmodel2)
## loess r-squared variable importance
## 
##   only 20 most important variables shown (out of 57)
## 
##                        Overall
## ManufacturingProcess32  100.00
## ManufacturingProcess17   80.31
## BiologicalMaterial06     75.09
## ManufacturingProcess13   74.47
## BiologicalMaterial03     67.11
## ManufacturingProcess06   66.44
## ManufacturingProcess36   66.30
## BiologicalMaterial12     65.94
## ManufacturingProcess09   65.29
## BiologicalMaterial02     55.94
## ManufacturingProcess31   53.57
## ManufacturingProcess29   47.17
## ManufacturingProcess33   45.79
## BiologicalMaterial08     44.98
## ManufacturingProcess11   41.72
## ManufacturingProcess02   41.58
## BiologicalMaterial11     41.18
## BiologicalMaterial04     40.31
## BiologicalMaterial09     35.12
## BiologicalMaterial01     34.15

DISCUSSION:

Similar to the linear model, 11 of the 20 predictors are manufacturing.

The top ten from the linear model elastnet were: ## Overall Elastic net var importance ### ManufacturingProcess13 100.00 ### ManufacturingProcess32 93.67 ### BiologicalMaterial03 92.86 ### BiologicalMaterial06 86.68 ### ManufacturingProcess17 80.34 ### BiologicalMaterial12 76.76 ### ManufacturingProcess09 76.15 ### ManufacturingProcess36 75.95 ### ManufacturingProcess06 63.29 ### BiologicalMaterial02 59.13 ### ManufacturingProcess11 54.38 ### BiologicalMaterial11 53.65 ### ManufacturingProcess31 51.81 ### BiologicalMaterial04 48.83 ### BiologicalMaterial09 47.43 ### ManufacturingProcess18 43.88 ### ManufacturingProcess30 42.31 ### BiologicalMaterial08 40.33 ### BiologicalMaterial01 40.08 ### ManufacturingProcess33 38.33

DISCUSSION:

The top 5 important predictors from the linear (elasticnet) and nonlinear (SVM) models match exactly, only with an ordering difference. The second 5-10 important predictors from linear and nonlinear also match exactly with an ordering difference.

The top ten are exactly the same in both models.

Furthermore, 18 of the 20 variables matched exactly.

(c) Explore the relationships between the top predictors and the response for the predictors that are unique to the optimal nonlinear regression model. Do these reveal intuition about the biological or process predictors and their relationship with yield?

Let’s look at the top five variables of importance…

use1 <- pred3  %>% cbind(ChemicalManufacturingProcess$Yield)

# Manufacturing Process 32 1st 
cor(use1$ManufacturingProcess32,use1$`ChemicalManufacturingProcess$Yield`)
## [1] 0.6083321
ggplot(use1, aes(use1$ManufacturingProcess32,use1$`ChemicalManufacturingProcess$Yield`)) +
  geom_point()
## Warning: Use of `use1$ManufacturingProcess32` is discouraged. Use
## `ManufacturingProcess32` instead.
## Warning: Use of `use1$`ChemicalManufacturingProcess$Yield`` is discouraged. Use
## `ChemicalManufacturingProcess$Yield` instead.

# Manufacturing Process 17 2nd 
cor(use1$ManufacturingProcess17,use1$`ChemicalManufacturingProcess$Yield`)
## [1] -0.4258069
ggplot(use1, aes(use1$ManufacturingProcess17,use1$`ChemicalManufacturingProcess$Yield`)) +
  geom_point()
## Warning: Use of `use1$ManufacturingProcess17` is discouraged. Use
## `ManufacturingProcess17` instead.

## Warning: Use of `use1$`ChemicalManufacturingProcess$Yield`` is discouraged. Use
## `ChemicalManufacturingProcess$Yield` instead.

# Biological Process 06 3rd 
cor(use1$BiologicalMaterial06,use1$`ChemicalManufacturingProcess$Yield`)
## [1] 0.4781634
ggplot(use1, aes(use1$BiologicalMaterial06,use1$`ChemicalManufacturingProcess$Yield`)) +
  geom_point()
## Warning: Use of `use1$BiologicalMaterial06` is discouraged. Use
## `BiologicalMaterial06` instead.

## Warning: Use of `use1$`ChemicalManufacturingProcess$Yield`` is discouraged. Use
## `ChemicalManufacturingProcess$Yield` instead.

# Manufacturing Process 13 4th 
cor(use1$ManufacturingProcess13,use1$`ChemicalManufacturingProcess$Yield`)
## [1] -0.5036797
ggplot(use1, aes(use1$ManufacturingProcess13,use1$`ChemicalManufacturingProcess$Yield`)) +
  geom_point()
## Warning: Use of `use1$ManufacturingProcess13` is discouraged. Use
## `ManufacturingProcess13` instead.

## Warning: Use of `use1$`ChemicalManufacturingProcess$Yield`` is discouraged. Use
## `ChemicalManufacturingProcess$Yield` instead.

# Biological Process 03 5th 
cor(use1$BiologicalMaterial03,use1$`ChemicalManufacturingProcess$Yield`)
## [1] 0.445086
ggplot(use1, aes(use1$BiologicalMaterial03,use1$`ChemicalManufacturingProcess$Yield`)) +
  geom_point()
## Warning: Use of `use1$BiologicalMaterial03` is discouraged. Use
## `BiologicalMaterial03` instead.

## Warning: Use of `use1$`ChemicalManufacturingProcess$Yield`` is discouraged. Use
## `ChemicalManufacturingProcess$Yield` instead.

# Manufacturing Process 06 6th 
cor(use1$ManufacturingProcess06,use1$`ChemicalManufacturingProcess$Yield`)
## [1] 0.3918329
ggplot(use1, aes(use1$ManufacturingProcess06,use1$`ChemicalManufacturingProcess$Yield`)) +
  geom_point()
## Warning: Use of `use1$ManufacturingProcess06` is discouraged. Use
## `ManufacturingProcess06` instead.

## Warning: Use of `use1$`ChemicalManufacturingProcess$Yield`` is discouraged. Use
## `ChemicalManufacturingProcess$Yield` instead.

# Manufacturing Process 36 7th 
cor(use1$ManufacturingProcess36,use1$`ChemicalManufacturingProcess$Yield`)
## [1] -0.5237389
ggplot(use1, aes(use1$ManufacturingProcess36,use1$`ChemicalManufacturingProcess$Yield`)) +
  geom_point()
## Warning: Use of `use1$ManufacturingProcess36` is discouraged. Use
## `ManufacturingProcess36` instead.

## Warning: Use of `use1$`ChemicalManufacturingProcess$Yield`` is discouraged. Use
## `ChemicalManufacturingProcess$Yield` instead.

# Biological Process 12 8th 
cor(use1$BiologicalMaterial12,use1$`ChemicalManufacturingProcess$Yield`)
## [1] 0.3674976
ggplot(use1, aes(use1$BiologicalMaterial12,use1$`ChemicalManufacturingProcess$Yield`)) +
  geom_point()
## Warning: Use of `use1$BiologicalMaterial12` is discouraged. Use
## `BiologicalMaterial12` instead.

## Warning: Use of `use1$`ChemicalManufacturingProcess$Yield`` is discouraged. Use
## `ChemicalManufacturingProcess$Yield` instead.

# Manufacturing Process 09 9th 
cor(use1$ManufacturingProcess09,use1$`ChemicalManufacturingProcess$Yield`)
## [1] 0.5034705
ggplot(use1, aes(use1$ManufacturingProcess09,use1$`ChemicalManufacturingProcess$Yield`)) +
  geom_point()
## Warning: Use of `use1$ManufacturingProcess09` is discouraged. Use
## `ManufacturingProcess09` instead.

## Warning: Use of `use1$`ChemicalManufacturingProcess$Yield`` is discouraged. Use
## `ChemicalManufacturingProcess$Yield` instead.

# Biological Process 02 10th 
cor(use1$BiologicalMaterial02,use1$`ChemicalManufacturingProcess$Yield`)
## [1] 0.4815158
ggplot(use1, aes(use1$BiologicalMaterial02,use1$`ChemicalManufacturingProcess$Yield`)) +
  geom_point()
## Warning: Use of `use1$BiologicalMaterial02` is discouraged. Use
## `BiologicalMaterial02` instead.

## Warning: Use of `use1$`ChemicalManufacturingProcess$Yield`` is discouraged. Use
## `ChemicalManufacturingProcess$Yield` instead.

## DISCUSSION:

The correlation does not adjust for other variables. This information is helpful to know how one variables is correlated to another with no other variables in the mix.

This could help in future decision making.

In all of the 10 important variables, the correlation is in the range of |.39, .6| depicting moderate correlation.

3 of the 10 variables have negative correlation, M32, M13, M36. However, the top variable is Manufacturing 32 with the highest correlation of .6. This should be further understood, because perhaps a manufacturing process could be tweaked more easily than a biological process.