Data 624 HW 8

7.2. Friedman (1991) introduced several benchmark data sets create by sim- ulation. One of these simulations used the following nonlinear equation to create data:

y = 10sin(πx1x2)+20(x3 −0.5)2 +10x4 +5x5 +N(0,σ2)

where the x values are random variables uniformly distributed between [0, 1] (there are also 5 other non-informative variables also created in the simula- tion). The package mlbench contains a function called mlbench.friedman1 that simulates these data: includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

 library(mlbench)
 set.seed(200)
 trainingData <- mlbench.friedman1(200, sd = 1)
 ## We convert the 'x' data from a matrix to a data frame  ## One reason is that this will give the columns names.  
 trainingData$x <- data.frame(trainingData$x)
 ## Look at the data using
 featurePlot(trainingData$x, trainingData$y)

 ## or other methods.

 ## This creates a list with a vector 'y' and a matrix
 ## of predictors 'x'. Also simulate a large test set to  ## estimate the true error rate with good precision:
 testData <- mlbench.friedman1(5000, sd = 1)
 testData$x <- data.frame(testData$x)
 trx<-trainingData$x
try<-trainingData$y

  library(caret)
 
##KNN Model
 knnModel <- train(x=trx,
                  y=try,
                  method = "knn",
                  preProcess = c("center", "scale"),
                  tuneLength = 10)



 knnPred <- predict(knnModel, newdata = testData$x)

 
 ##SVM Model
 svm = train(x = trx, 
                 y = try,
                 method = "svmRadial",
                 preProc = c("center", "scale"),
                 tuneLength = 10)
 
svmPred <- predict(svm, newdata = testData$x)
## MARs Spline
 
grid <- expand.grid(.degree = 1:2, .nprune = 2:38)
mars <- train(x=trx,
                  y=try,
                  method = "earth",
                  tuneGrid = grid,
                  preProcess = c("center", "scale"),
                  tuneLength = 10)

## Loading required package: earth

## Loading required package: plotmo

## Loading required package: plotrix

## Loading required package: TeachingDemos

## 
## Attaching package: 'TeachingDemos'

## The following objects are masked from 'package:Hmisc':
## 
##     cnvrt.coords, subplot

marsPred <- predict(mars, newdata = testData$x)

#Neural Network
nn <-train(x=trx,
                  y=try,
                  method = "avNNet",
                  preProcess = c("center", "scale"),
                  tuneLength = 10,
                  linout = TRUE,  trace = FALSE, 
                  maxit = 10)

## Warning: executing %dopar% sequentially: no parallel backend registered

nnPred <- predict(nn, newdata = testData$x)

knnModel

## k-Nearest Neighbors 
## 
## 200 samples
##  10 predictor
## 
## Pre-processing: centered (10), scaled (10) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 200, 200, 200, 200, 200, 200, ... 
## Resampling results across tuning parameters:
## 
##   k   RMSE      Rsquared   MAE     
##    5  3.466085  0.5121775  2.816838
##    7  3.349428  0.5452823  2.727410
##    9  3.264276  0.5785990  2.660026
##   11  3.214216  0.6024244  2.603767
##   13  3.196510  0.6176570  2.591935
##   15  3.184173  0.6305506  2.577482
##   17  3.183130  0.6425367  2.567787
##   19  3.198752  0.6483184  2.592683
##   21  3.188993  0.6611428  2.588787
##   23  3.200458  0.6638353  2.604529
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 17.

 postResample(pred = knnPred, obs = testData$y)

##      RMSE  Rsquared       MAE 
## 3.2040595 0.6819919 2.5683461

svm

## Support Vector Machines with Radial Basis Function Kernel 
## 
## 200 samples
##  10 predictor
## 
## Pre-processing: centered (10), scaled (10) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 200, 200, 200, 200, 200, 200, ... 
## Resampling results across tuning parameters:
## 
##   C       RMSE      Rsquared   MAE     
##     0.25  2.545335  0.7804647  2.015121
##     0.50  2.319786  0.7965148  1.830009
##     1.00  2.188357  0.8119624  1.726031
##     2.00  2.103655  0.8241314  1.655842
##     4.00  2.066890  0.8294297  1.631062
##     8.00  2.052688  0.8313917  1.623563
##    16.00  2.049883  0.8318288  1.621842
##    32.00  2.049883  0.8318288  1.621842
##    64.00  2.049883  0.8318288  1.621842
##   128.00  2.049883  0.8318288  1.621842
## 
## Tuning parameter 'sigma' was held constant at a value of 0.06802164
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were sigma = 0.06802164 and C = 16.

 postResample(pred = svmPred, obs = testData$y)

##      RMSE  Rsquared       MAE 
## 2.0864652 0.8236735 1.5854649

 mars

## Multivariate Adaptive Regression Spline 
## 
## 200 samples
##  10 predictor
## 
## Pre-processing: centered (10), scaled (10) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 200, 200, 200, 200, 200, 200, ... 
## Resampling results across tuning parameters:
## 
##   degree  nprune  RMSE      Rsquared   MAE     
##   1        2      4.447386  0.2254125  3.620675
##   1        3      3.790305  0.4344625  3.058704
##   1        4      2.801182  0.6884819  2.233531
##   1        5      2.551283  0.7412626  2.051644
##   1        6      2.493135  0.7492201  1.986528
##   1        7      2.089713  0.8239588  1.645996
##   1        8      1.889475  0.8565881  1.484798
##   1        9      1.816053  0.8673608  1.420333
##   1       10      1.819611  0.8674028  1.417343
##   1       11      1.819783  0.8670556  1.415058
##   1       12      1.832487  0.8651613  1.426371
##   1       13      1.845943  0.8632112  1.436005
##   1       14      1.855353  0.8613778  1.452115
##   1       15      1.854557  0.8617322  1.452920
##   1       16      1.856173  0.8616879  1.455393
##   1       17      1.856989  0.8615480  1.456862
##   1       18      1.856989  0.8615480  1.456862
##   1       19      1.856989  0.8615480  1.456862
##   1       20      1.856989  0.8615480  1.456862
##   1       21      1.856989  0.8615480  1.456862
##   1       22      1.856989  0.8615480  1.456862
##   1       23      1.856989  0.8615480  1.456862
##   1       24      1.856989  0.8615480  1.456862
##   1       25      1.856989  0.8615480  1.456862
##   1       26      1.856989  0.8615480  1.456862
##   1       27      1.856989  0.8615480  1.456862
##   1       28      1.856989  0.8615480  1.456862
##   1       29      1.856989  0.8615480  1.456862
##   1       30      1.856989  0.8615480  1.456862
##   1       31      1.856989  0.8615480  1.456862
##   1       32      1.856989  0.8615480  1.456862
##   1       33      1.856989  0.8615480  1.456862
##   1       34      1.856989  0.8615480  1.456862
##   1       35      1.856989  0.8615480  1.456862
##   1       36      1.856989  0.8615480  1.456862
##   1       37      1.856989  0.8615480  1.456862
##   1       38      1.856989  0.8615480  1.456862
##   2        2      4.434592  0.2241213  3.616685
##   2        3      3.799538  0.4319047  3.064845
##   2        4      2.806374  0.6871266  2.237911
##   2        5      2.524002  0.7462965  2.023657
##   2        6      2.446243  0.7602514  1.931404
##   2        7      2.147529  0.8127597  1.682839
##   2        8      1.977186  0.8393569  1.557609
##   2        9      1.831267  0.8635192  1.428370
##   2       10      1.639428  0.8902850  1.280510
##   2       11      1.545708  0.9019039  1.213559
##   2       12      1.499558  0.9081641  1.171249
##   2       13      1.494111  0.9087340  1.161702
##   2       14      1.492700  0.9102980  1.160345
##   2       15      1.484444  0.9116520  1.153052
##   2       16      1.487065  0.9109633  1.151057
##   2       17      1.496021  0.9098876  1.156630
##   2       18      1.487296  0.9111035  1.150491
##   2       19      1.486280  0.9113126  1.149198
##   2       20      1.486280  0.9113126  1.149198
##   2       21      1.486280  0.9113126  1.149198
##   2       22      1.486280  0.9113126  1.149198
##   2       23      1.486280  0.9113126  1.149198
##   2       24      1.486280  0.9113126  1.149198
##   2       25      1.486280  0.9113126  1.149198
##   2       26      1.486280  0.9113126  1.149198
##   2       27      1.486280  0.9113126  1.149198
##   2       28      1.486280  0.9113126  1.149198
##   2       29      1.486280  0.9113126  1.149198
##   2       30      1.486280  0.9113126  1.149198
##   2       31      1.486280  0.9113126  1.149198
##   2       32      1.486280  0.9113126  1.149198
##   2       33      1.486280  0.9113126  1.149198
##   2       34      1.486280  0.9113126  1.149198
##   2       35      1.486280  0.9113126  1.149198
##   2       36      1.486280  0.9113126  1.149198
##   2       37      1.486280  0.9113126  1.149198
##   2       38      1.486280  0.9113126  1.149198
## 
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were nprune = 15 and degree = 2.

 postResample(pred = marsPred, obs = testData$y)

##      RMSE  Rsquared       MAE 
## 1.1908806 0.9428866 0.9496858

nn

## Model Averaged Neural Network 
## 
## 200 samples
##  10 predictor
## 
## Pre-processing: centered (10), scaled (10) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 200, 200, 200, 200, 200, 200, ... 
## Resampling results across tuning parameters:
## 
##   size  decay         RMSE      Rsquared   MAE     
##    1    0.0000000000  3.128820  0.6320687  2.498811
##    1    0.0001000000  3.107544  0.6370921  2.495407
##    1    0.0002371374  3.163252  0.6149715  2.524956
##    1    0.0005623413  3.123078  0.6287146  2.494660
##    1    0.0013335214  3.141668  0.6375256  2.510705
##    1    0.0031622777  3.130592  0.6368533  2.487958
##    1    0.0074989421  3.083642  0.6431109  2.460352
##    1    0.0177827941  3.109579  0.6300262  2.484535
##    1    0.0421696503  3.162708  0.6289212  2.519191
##    1    0.1000000000  3.094492  0.6421950  2.473164
##    3    0.0000000000  2.648455  0.7251107  2.133916
##    3    0.0001000000  2.679208  0.7206391  2.165041
##    3    0.0002371374  2.706788  0.7147311  2.178912
##    3    0.0005623413  2.762171  0.7013884  2.238492
##    3    0.0013335214  2.660382  0.7234814  2.153105
##    3    0.0031622777  2.681812  0.7207370  2.157336
##    3    0.0074989421  2.674776  0.7157596  2.156645
##    3    0.0177827941  2.729657  0.7104905  2.199836
##    3    0.0421696503  2.631564  0.7283282  2.103279
##    3    0.1000000000  2.696572  0.7136820  2.177114
##    5    0.0000000000  2.922733  0.6759939  2.327976
##    5    0.0001000000  2.853978  0.6908079  2.308763
##    5    0.0002371374  2.895676  0.6765080  2.342618
##    5    0.0005623413  2.880794  0.6782593  2.308731
##    5    0.0013335214  2.934064  0.6713027  2.357009
##    5    0.0031622777  2.960609  0.6663393  2.378958
##    5    0.0074989421  2.867584  0.6795627  2.298606
##    5    0.0177827941  2.961187  0.6543978  2.384218
##    5    0.0421696503  2.920749  0.6680839  2.335376
##    5    0.1000000000  2.890832  0.6838303  2.341878
##    7    0.0000000000  3.015235  0.6559685  2.402779
##    7    0.0001000000  2.910304  0.6765894  2.346044
##    7    0.0002371374  2.842701  0.6945078  2.286811
##    7    0.0005623413  2.994514  0.6571072  2.388802
##    7    0.0013335214  2.796730  0.6934224  2.251695
##    7    0.0031622777  3.033221  0.6585045  2.431778
##    7    0.0074989421  2.899710  0.6864625  2.335690
##    7    0.0177827941  2.983988  0.6701486  2.393009
##    7    0.0421696503  2.850925  0.6884874  2.292561
##    7    0.1000000000  2.967903  0.6583638  2.378423
##    9    0.0000000000  2.614224  0.7365018  2.048382
##    9    0.0001000000  2.608856  0.7332699  2.063064
##    9    0.0002371374  2.568944  0.7402739  2.019396
##    9    0.0005623413  2.614273  0.7320823  2.076642
##    9    0.0013335214  2.628447  0.7284600  2.047091
##    9    0.0031622777  2.605874  0.7378485  2.062779
##    9    0.0074989421  2.618953  0.7319339  2.089107
##    9    0.0177827941  2.570207  0.7416047  2.030899
##    9    0.0421696503  2.568533  0.7423609  2.035018
##    9    0.1000000000  2.576620  0.7391354  2.033585
##   11    0.0000000000  2.511344  0.7530407  1.961509
##   11    0.0001000000  2.497001  0.7553022  1.966422
##   11    0.0002371374  2.500386  0.7536464  1.947595
##   11    0.0005623413  2.527625  0.7492405  1.978398
##   11    0.0013335214  2.527984  0.7495224  1.979065
##   11    0.0031622777  2.520052  0.7485472  1.969125
##   11    0.0074989421  2.518164  0.7512021  1.961448
##   11    0.0177827941  2.525764  0.7488841  1.968072
##   11    0.0421696503  2.501572  0.7545813  1.951691
##   11    0.1000000000  2.504610  0.7545259  1.951779
##   13    0.0000000000  2.503232  0.7537019  1.961522
##   13    0.0001000000  2.497599  0.7537725  1.959738
##   13    0.0002371374  2.486734  0.7574174  1.953242
##   13    0.0005623413  2.493780  0.7575011  1.944466
##   13    0.0013335214  2.485324  0.7583431  1.949699
##   13    0.0031622777  2.500245  0.7547206  1.958415
##   13    0.0074989421  2.492082  0.7547764  1.952203
##   13    0.0177827941  2.492416  0.7551457  1.957591
##   13    0.0421696503  2.464628  0.7592132  1.928874
##   13    0.1000000000  2.455520  0.7625533  1.910475
##   15    0.0000000000  2.483624  0.7589583  1.951348
##   15    0.0001000000  2.518595  0.7524384  1.973049
##   15    0.0002371374  2.492682  0.7566005  1.959836
##   15    0.0005623413  2.526795  0.7501101  1.986905
##   15    0.0013335214  2.525187  0.7513776  1.986068
##   15    0.0031622777  2.484207  0.7583813  1.954868
##   15    0.0074989421  2.528672  0.7493950  2.001492
##   15    0.0177827941  2.482741  0.7548608  1.961780
##   15    0.0421696503  2.489621  0.7578285  1.961323
##   15    0.1000000000  2.481894  0.7581087  1.949783
##   17    0.0000000000  2.509274  0.7545777  1.965533
##   17    0.0001000000  2.509781  0.7550460  1.970905
##   17    0.0002371374  2.521266  0.7533134  1.975144
##   17    0.0005623413  2.516596  0.7522336  1.975194
##   17    0.0013335214  2.496458  0.7564561  1.957884
##   17    0.0031622777  2.480115  0.7589945  1.953981
##   17    0.0074989421  2.494060  0.7575602  1.950211
##   17    0.0177827941  2.522617  0.7516227  1.980314
##   17    0.0421696503  2.489164  0.7586780  1.951236
##   17    0.1000000000  2.507616  0.7539608  1.965839
##   19    0.0000000000  2.508316  0.7530887  1.969766
##   19    0.0001000000  2.516852  0.7516058  1.972130
##   19    0.0002371374  2.512634  0.7532131  1.986644
##   19    0.0005623413  2.502437  0.7563636  1.970859
##   19    0.0013335214  2.519458  0.7534491  1.977272
##   19    0.0031622777  2.504889  0.7570108  1.974312
##   19    0.0074989421  2.502389  0.7549638  1.967457
##   19    0.0177827941  2.483511  0.7575397  1.962037
##   19    0.0421696503  2.518330  0.7533324  1.974423
##   19    0.1000000000  2.507275  0.7543896  1.974774
## 
## Tuning parameter 'bag' was held constant at a value of FALSE
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were size = 13, decay = 0.1 and bag = FALSE.

 postResample(pred = nnPred, obs = testData$y)

##      RMSE  Rsquared       MAE 
## 2.5243195 0.7435131 1.9586665

 varImp(mars)

## earth variable importance
## 
##    Overall
## X1  100.00
## X4   75.31
## X2   48.86
## X5   15.61
## X3    0.00

Which models appear to give the best performance? Does MARS select the informative predictors (those named X1–X5)?

MARS easily gave the best performance with Rsquared = 0.94 and RMSE of only 1.19. MARS also identified the most informative predictors (X1-X5).

7.5. Exercise 6.3 describes data for a chemical manufacturing process. Use the same data imputation, data splitting, and pre-processing steps as before and train several nonlinear regression models.

library(AppliedPredictiveModeling)
data(ChemicalManufacturingProcess)
ChemicalManufacturingProcess<-na.omit(ChemicalManufacturingProcess)
set.seed(6354)
partition <- createDataPartition(ChemicalManufacturingProcess[,1] , p=0.75, list=F)
trx <- ChemicalManufacturingProcess[partition,-1]
try<- ChemicalManufacturingProcess[partition,1]
tex<- ChemicalManufacturingProcess[-partition,-1]
tey<- ChemicalManufacturingProcess[-partition,1]


##KNN Model
 knnModel <- train(x=trx,
                  y=try,
                  method = "knn",
                  preProcess = c("center", "scale","nzv"),
                  tuneLength = 10)



 knnPred <- predict(knnModel, newdata = tex)

 
 ##SVM Model
 svm = train(x = trx, 
                 y = try,
                 method = "svmRadial",
                 preProc = c("center", "scale","nzv"),
                 tuneLength = 10)
 
svmPred <- predict(svm, newdata = tex)
## MARs Spline
 
grid <- expand.grid(.degree = 1:2, .nprune = 2:38)
mars <- train(x=trx,
                  y=try,
                  method = "earth",
                  tuneGrid = grid,
                  preProcess = c("center", "scale","nzv"),
                  tuneLength = 10)

marsPred <- predict(mars, newdata = tex)

#Neural Network
nn <-train(x=trx,
                  y=try,
                  method = "avNNet",
           tuneGrid = (expand.grid(decay = c(0, 0.01, .1), 
                       size = c(1,5,10), bag = FALSE) ),
                  preProcess = c("center", "scale","nzv"),
                  tuneLength = 10,
                  linout = TRUE,  trace = FALSE, 
                  maxit = 10)

nnPred <- predict(nn, newdata = tex)

knnModel

## k-Nearest Neighbors 
## 
## 116 samples
##  57 predictor
## 
## Pre-processing: centered (56), scaled (56), remove (1) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 116, 116, 116, 116, 116, 116, ... 
## Resampling results across tuning parameters:
## 
##   k   RMSE      Rsquared   MAE     
##    5  1.464010  0.4673338  1.139585
##    7  1.477998  0.4625326  1.175028
##    9  1.490020  0.4636216  1.200355
##   11  1.510578  0.4509582  1.217519
##   13  1.519028  0.4522871  1.230534
##   15  1.533386  0.4506921  1.241647
##   17  1.543198  0.4495521  1.244545
##   19  1.553834  0.4536583  1.248100
##   21  1.566911  0.4481065  1.258900
##   23  1.576190  0.4458291  1.268532
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 5.

 postResample(pred = knnPred, obs = tey)

##     RMSE Rsquared      MAE 
## 1.325821 0.490956 1.113889

svm

## Support Vector Machines with Radial Basis Function Kernel 
## 
## 116 samples
##  57 predictor
## 
## Pre-processing: centered (56), scaled (56), remove (1) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 116, 116, 116, 116, 116, 116, ... 
## Resampling results across tuning parameters:
## 
##   C       RMSE      Rsquared   MAE      
##     0.25  1.419957  0.4796030  1.1454965
##     0.50  1.306286  0.5285395  1.0528488
##     1.00  1.227004  0.5680746  0.9794792
##     2.00  1.177241  0.5936343  0.9307681
##     4.00  1.162746  0.5979497  0.9109044
##     8.00  1.159309  0.5996155  0.9077492
##    16.00  1.159309  0.5996155  0.9077492
##    32.00  1.159309  0.5996155  0.9077492
##    64.00  1.159309  0.5996155  0.9077492
##   128.00  1.159309  0.5996155  0.9077492
## 
## Tuning parameter 'sigma' was held constant at a value of 0.01140901
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were sigma = 0.01140901 and C = 8.

 postResample(pred = svmPred, obs = tey)

##      RMSE  Rsquared       MAE 
## 1.2943112 0.5358659 1.0293632

 mars

## Multivariate Adaptive Regression Spline 
## 
## 116 samples
##  57 predictor
## 
## Pre-processing: centered (56), scaled (56), remove (1) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 116, 116, 116, 116, 116, 116, ... 
## Resampling results across tuning parameters:
## 
##   degree  nprune  RMSE      Rsquared   MAE     
##   1        2      1.707440  0.2496511  1.376532
##   1        3      1.388959  0.4901480  1.094787
##   1        4      1.444912  0.4902384  1.118930
##   1        5      1.498006  0.4694278  1.151446
##   1        6      1.528519  0.4587299  1.171904
##   1        7      1.576584  0.4460382  1.189386
##   1        8      1.617268  0.4277323  1.211580
##   1        9      1.638462  0.4308237  1.223718
##   1       10      1.669968  0.4221263  1.243401
##   1       11      1.675764  0.4182999  1.252501
##   1       12      1.706199  0.4162498  1.261994
##   1       13      1.694483  0.4248081  1.264487
##   1       14      1.715165  0.4212555  1.274582
##   1       15      1.725466  0.4200319  1.270540
##   1       16      1.824161  0.4063203  1.312025
##   1       17      1.820779  0.4085712  1.311544
##   1       18      1.831965  0.4053512  1.316651
##   1       19      1.832663  0.4065130  1.318522
##   1       20      1.827386  0.4078707  1.314769
##   1       21      1.827599  0.4075625  1.315001
##   1       22      1.827599  0.4075625  1.315001
##   1       23      1.827599  0.4075625  1.315001
##   1       24      1.827599  0.4075625  1.315001
##   1       25      1.827599  0.4075625  1.315001
##   1       26      1.827599  0.4075625  1.315001
##   1       27      1.827599  0.4075625  1.315001
##   1       28      1.827599  0.4075625  1.315001
##   1       29      1.827599  0.4075625  1.315001
##   1       30      1.827599  0.4075625  1.315001
##   1       31      1.827599  0.4075625  1.315001
##   1       32      1.827599  0.4075625  1.315001
##   1       33      1.827599  0.4075625  1.315001
##   1       34      1.827599  0.4075625  1.315001
##   1       35      1.827599  0.4075625  1.315001
##   1       36      1.827599  0.4075625  1.315001
##   1       37      1.827599  0.4075625  1.315001
##   1       38      1.827599  0.4075625  1.315001
##   2        2      1.710047  0.2453159  1.379016
##   2        3      1.495344  0.4264676  1.175314
##   2        4      1.521460  0.4386420  1.185581
##   2        5      1.417891  0.4934109  1.129042
##   2        6      1.537525  0.4585037  1.183102
##   2        7      1.577855  0.4420211  1.211962
##   2        8      1.632740  0.4294073  1.234393
##   2        9      1.638082  0.4313794  1.247853
##   2       10      1.651471  0.4414841  1.247159
##   2       11      1.753376  0.3950680  1.308549
##   2       12      1.799449  0.3888504  1.331243
##   2       13      1.830554  0.3818888  1.341384
##   2       14      1.849959  0.3692278  1.360607
##   2       15      1.954879  0.3476060  1.409477
##   2       16      1.943804  0.3471390  1.407520
##   2       17      1.974040  0.3422408  1.424330
##   2       18      2.012323  0.3331669  1.454372
##   2       19      2.059219  0.3239627  1.473705
##   2       20      2.080363  0.3146588  1.491976
##   2       21      2.083605  0.3155383  1.493052
##   2       22      2.098307  0.3141344  1.494201
##   2       23      2.128191  0.3091935  1.507331
##   2       24      2.148130  0.3117672  1.515721
##   2       25      2.156205  0.3093130  1.517955
##   2       26      2.155815  0.3104356  1.523452
##   2       27      2.168359  0.3106528  1.531438
##   2       28      2.174273  0.3104658  1.539279
##   2       29      2.207294  0.3015582  1.565373
##   2       30      2.200813  0.3046702  1.564638
##   2       31      2.203162  0.3039354  1.565944
##   2       32      2.203548  0.3028535  1.566915
##   2       33      2.204614  0.3023516  1.568340
##   2       34      2.205578  0.3021943  1.568886
##   2       35      2.205578  0.3021943  1.568886
##   2       36      2.205578  0.3021943  1.568886
##   2       37      2.205578  0.3021943  1.568886
##   2       38      2.205578  0.3021943  1.568886
## 
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were nprune = 3 and degree = 1.

 postResample(pred = marsPred, obs = tey)

##     RMSE Rsquared      MAE 
## 1.271644 0.534292 1.040254

nn

## Model Averaged Neural Network 
## 
## 116 samples
##  57 predictor
## 
## Pre-processing: centered (56), scaled (56), remove (1) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 116, 116, 116, 116, 116, 116, ... 
## Resampling results across tuning parameters:
## 
##   decay  size  RMSE      Rsquared   MAE     
##   0.00    1    2.199301  0.1217990  1.710041
##   0.00    5    2.030027  0.2704261  1.597465
##   0.00   10    4.382785  0.1542262  3.311450
##   0.01    1    1.820337  0.1812592  1.471841
##   0.01    5    2.117801  0.2201042  1.666371
##   0.01   10    4.539377  0.1199944  3.440216
##   0.10    1    2.428366  0.1630367  2.044005
##   0.10    5    2.128162  0.2359959  1.705267
##   0.10   10    4.484375  0.1433486  3.422183
## 
## Tuning parameter 'bag' was held constant at a value of FALSE
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were size = 1, decay = 0.01 and bag = FALSE.

 postResample(pred = nnPred, obs = tey)

##         RMSE     Rsquared          MAE 
## 6.3221443205 0.0005328759 5.8206778353

(a) Which nonlinear regression model gives the optimal resampling and test set performance?

SVM and MARs offer very similar performance base on RMSE(1.29 to 1.27) respectively and they are the highest performing.

(b) Which predictors are most important in the optimal nonlinear regression model? Do either the biological or process variables dominate the list? How do the top ten important predictors compare to the top ten predictors from the optimal linear model?

The MARs model only lists Manufacturing Process 32 qs the most important which indicates to me that we should lean to the SVM. For SVM Manbufacturing Process 13 is the most important with Manufacturing 17,32,9 and 36 next in line. It does include Biological Materials 3,6, and 8 in its top 10 indicating a balanced model.

varImp(mars)

## earth variable importance
## 
##                        Overall
## ManufacturingProcess32     100
## ManufacturingProcess09       0

varImp(svm)

## loess r-squared variable importance
## 
##   only 20 most important variables shown (out of 57)
## 
##                        Overall
## ManufacturingProcess13  100.00
## ManufacturingProcess17   97.52
## ManufacturingProcess32   97.31
## ManufacturingProcess09   87.08
## ManufacturingProcess36   72.21
## BiologicalMaterial03     67.92
## BiologicalMaterial06     65.83
## ManufacturingProcess31   64.19
## ManufacturingProcess06   59.23
## BiologicalMaterial12     55.05
## ManufacturingProcess30   53.55
## BiologicalMaterial02     51.49
## BiologicalMaterial09     51.04
## ManufacturingProcess11   50.20
## BiologicalMaterial08     49.05
## BiologicalMaterial04     45.25
## ManufacturingProcess12   37.88
## ManufacturingProcess29   36.81
## ManufacturingProcess18   36.69
## ManufacturingProcess01   36.17

(c) Explore the relationships between the top predictors and the response for the predictors that are unique to the optimal nonlinear regression model. Do these plots reveal intuition about the biological or process predictors and their relationship with yield?

We can see all of the biological materials ae closely correlated with each other which makes sense. there are also strong correlations between manufacturing 13 and 17. The include biological materials most likely indicated a similarity for the data. If something can be identified that is driving the levels of the biological material it could help identify better yield.

v<-ChemicalManufacturingProcess%>%drop_na()%>%dplyr::select(Yield,ManufacturingProcess13, ManufacturingProcess17,ManufacturingProcess32, ManufacturingProcess09,ManufacturingProcess36,BiologicalMaterial03,BiologicalMaterial06,ManufacturingProcess31,ManufacturingProcess06,BiologicalMaterial12)
G<-(cor(v))
corrplot(G, method = 'number')

Data 624 HW 8

Adam Gersowitz

11/9/2021