library(mlbench)
set.seed(200)
trainingData <- mlbench.friedman1(200, sd = 1)
## We convert the 'x' data from a matrix to a data frame ## One reason is that this will give the columns names.
trainingData$x <- data.frame(trainingData$x)
## Look at the data using
featurePlot(trainingData$x, trainingData$y)
## or other methods.
## This creates a list with a vector 'y' and a matrix
## of predictors 'x'. Also simulate a large test set to ## estimate the true error rate with good precision:
testData <- mlbench.friedman1(5000, sd = 1)
testData$x <- data.frame(testData$x)
trx<-trainingData$x
try<-trainingData$y
library(caret)
##KNN Model
knnModel <- train(x=trx,
y=try,
method = "knn",
preProcess = c("center", "scale"),
tuneLength = 10)
knnPred <- predict(knnModel, newdata = testData$x)
##SVM Model
svm = train(x = trx,
y = try,
method = "svmRadial",
preProc = c("center", "scale"),
tuneLength = 10)
svmPred <- predict(svm, newdata = testData$x)
## MARs Spline
grid <- expand.grid(.degree = 1:2, .nprune = 2:38)
mars <- train(x=trx,
y=try,
method = "earth",
tuneGrid = grid,
preProcess = c("center", "scale"),
tuneLength = 10)
## Loading required package: earth
## Loading required package: plotmo
## Loading required package: plotrix
## Loading required package: TeachingDemos
##
## Attaching package: 'TeachingDemos'
## The following objects are masked from 'package:Hmisc':
##
## cnvrt.coords, subplot
marsPred <- predict(mars, newdata = testData$x)
#Neural Network
nn <-train(x=trx,
y=try,
method = "avNNet",
preProcess = c("center", "scale"),
tuneLength = 10,
linout = TRUE, trace = FALSE,
maxit = 10)
## Warning: executing %dopar% sequentially: no parallel backend registered
nnPred <- predict(nn, newdata = testData$x)
knnModel
## k-Nearest Neighbors
##
## 200 samples
## 10 predictor
##
## Pre-processing: centered (10), scaled (10)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 200, 200, 200, 200, 200, 200, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 5 3.466085 0.5121775 2.816838
## 7 3.349428 0.5452823 2.727410
## 9 3.264276 0.5785990 2.660026
## 11 3.214216 0.6024244 2.603767
## 13 3.196510 0.6176570 2.591935
## 15 3.184173 0.6305506 2.577482
## 17 3.183130 0.6425367 2.567787
## 19 3.198752 0.6483184 2.592683
## 21 3.188993 0.6611428 2.588787
## 23 3.200458 0.6638353 2.604529
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 17.
postResample(pred = knnPred, obs = testData$y)
## RMSE Rsquared MAE
## 3.2040595 0.6819919 2.5683461
svm
## Support Vector Machines with Radial Basis Function Kernel
##
## 200 samples
## 10 predictor
##
## Pre-processing: centered (10), scaled (10)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 200, 200, 200, 200, 200, 200, ...
## Resampling results across tuning parameters:
##
## C RMSE Rsquared MAE
## 0.25 2.545335 0.7804647 2.015121
## 0.50 2.319786 0.7965148 1.830009
## 1.00 2.188357 0.8119624 1.726031
## 2.00 2.103655 0.8241314 1.655842
## 4.00 2.066890 0.8294297 1.631062
## 8.00 2.052688 0.8313917 1.623563
## 16.00 2.049883 0.8318288 1.621842
## 32.00 2.049883 0.8318288 1.621842
## 64.00 2.049883 0.8318288 1.621842
## 128.00 2.049883 0.8318288 1.621842
##
## Tuning parameter 'sigma' was held constant at a value of 0.06802164
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were sigma = 0.06802164 and C = 16.
postResample(pred = svmPred, obs = testData$y)
## RMSE Rsquared MAE
## 2.0864652 0.8236735 1.5854649
mars
## Multivariate Adaptive Regression Spline
##
## 200 samples
## 10 predictor
##
## Pre-processing: centered (10), scaled (10)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 200, 200, 200, 200, 200, 200, ...
## Resampling results across tuning parameters:
##
## degree nprune RMSE Rsquared MAE
## 1 2 4.447386 0.2254125 3.620675
## 1 3 3.790305 0.4344625 3.058704
## 1 4 2.801182 0.6884819 2.233531
## 1 5 2.551283 0.7412626 2.051644
## 1 6 2.493135 0.7492201 1.986528
## 1 7 2.089713 0.8239588 1.645996
## 1 8 1.889475 0.8565881 1.484798
## 1 9 1.816053 0.8673608 1.420333
## 1 10 1.819611 0.8674028 1.417343
## 1 11 1.819783 0.8670556 1.415058
## 1 12 1.832487 0.8651613 1.426371
## 1 13 1.845943 0.8632112 1.436005
## 1 14 1.855353 0.8613778 1.452115
## 1 15 1.854557 0.8617322 1.452920
## 1 16 1.856173 0.8616879 1.455393
## 1 17 1.856989 0.8615480 1.456862
## 1 18 1.856989 0.8615480 1.456862
## 1 19 1.856989 0.8615480 1.456862
## 1 20 1.856989 0.8615480 1.456862
## 1 21 1.856989 0.8615480 1.456862
## 1 22 1.856989 0.8615480 1.456862
## 1 23 1.856989 0.8615480 1.456862
## 1 24 1.856989 0.8615480 1.456862
## 1 25 1.856989 0.8615480 1.456862
## 1 26 1.856989 0.8615480 1.456862
## 1 27 1.856989 0.8615480 1.456862
## 1 28 1.856989 0.8615480 1.456862
## 1 29 1.856989 0.8615480 1.456862
## 1 30 1.856989 0.8615480 1.456862
## 1 31 1.856989 0.8615480 1.456862
## 1 32 1.856989 0.8615480 1.456862
## 1 33 1.856989 0.8615480 1.456862
## 1 34 1.856989 0.8615480 1.456862
## 1 35 1.856989 0.8615480 1.456862
## 1 36 1.856989 0.8615480 1.456862
## 1 37 1.856989 0.8615480 1.456862
## 1 38 1.856989 0.8615480 1.456862
## 2 2 4.434592 0.2241213 3.616685
## 2 3 3.799538 0.4319047 3.064845
## 2 4 2.806374 0.6871266 2.237911
## 2 5 2.524002 0.7462965 2.023657
## 2 6 2.446243 0.7602514 1.931404
## 2 7 2.147529 0.8127597 1.682839
## 2 8 1.977186 0.8393569 1.557609
## 2 9 1.831267 0.8635192 1.428370
## 2 10 1.639428 0.8902850 1.280510
## 2 11 1.545708 0.9019039 1.213559
## 2 12 1.499558 0.9081641 1.171249
## 2 13 1.494111 0.9087340 1.161702
## 2 14 1.492700 0.9102980 1.160345
## 2 15 1.484444 0.9116520 1.153052
## 2 16 1.487065 0.9109633 1.151057
## 2 17 1.496021 0.9098876 1.156630
## 2 18 1.487296 0.9111035 1.150491
## 2 19 1.486280 0.9113126 1.149198
## 2 20 1.486280 0.9113126 1.149198
## 2 21 1.486280 0.9113126 1.149198
## 2 22 1.486280 0.9113126 1.149198
## 2 23 1.486280 0.9113126 1.149198
## 2 24 1.486280 0.9113126 1.149198
## 2 25 1.486280 0.9113126 1.149198
## 2 26 1.486280 0.9113126 1.149198
## 2 27 1.486280 0.9113126 1.149198
## 2 28 1.486280 0.9113126 1.149198
## 2 29 1.486280 0.9113126 1.149198
## 2 30 1.486280 0.9113126 1.149198
## 2 31 1.486280 0.9113126 1.149198
## 2 32 1.486280 0.9113126 1.149198
## 2 33 1.486280 0.9113126 1.149198
## 2 34 1.486280 0.9113126 1.149198
## 2 35 1.486280 0.9113126 1.149198
## 2 36 1.486280 0.9113126 1.149198
## 2 37 1.486280 0.9113126 1.149198
## 2 38 1.486280 0.9113126 1.149198
##
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were nprune = 15 and degree = 2.
postResample(pred = marsPred, obs = testData$y)
## RMSE Rsquared MAE
## 1.1908806 0.9428866 0.9496858
nn
## Model Averaged Neural Network
##
## 200 samples
## 10 predictor
##
## Pre-processing: centered (10), scaled (10)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 200, 200, 200, 200, 200, 200, ...
## Resampling results across tuning parameters:
##
## size decay RMSE Rsquared MAE
## 1 0.0000000000 3.128820 0.6320687 2.498811
## 1 0.0001000000 3.107544 0.6370921 2.495407
## 1 0.0002371374 3.163252 0.6149715 2.524956
## 1 0.0005623413 3.123078 0.6287146 2.494660
## 1 0.0013335214 3.141668 0.6375256 2.510705
## 1 0.0031622777 3.130592 0.6368533 2.487958
## 1 0.0074989421 3.083642 0.6431109 2.460352
## 1 0.0177827941 3.109579 0.6300262 2.484535
## 1 0.0421696503 3.162708 0.6289212 2.519191
## 1 0.1000000000 3.094492 0.6421950 2.473164
## 3 0.0000000000 2.648455 0.7251107 2.133916
## 3 0.0001000000 2.679208 0.7206391 2.165041
## 3 0.0002371374 2.706788 0.7147311 2.178912
## 3 0.0005623413 2.762171 0.7013884 2.238492
## 3 0.0013335214 2.660382 0.7234814 2.153105
## 3 0.0031622777 2.681812 0.7207370 2.157336
## 3 0.0074989421 2.674776 0.7157596 2.156645
## 3 0.0177827941 2.729657 0.7104905 2.199836
## 3 0.0421696503 2.631564 0.7283282 2.103279
## 3 0.1000000000 2.696572 0.7136820 2.177114
## 5 0.0000000000 2.922733 0.6759939 2.327976
## 5 0.0001000000 2.853978 0.6908079 2.308763
## 5 0.0002371374 2.895676 0.6765080 2.342618
## 5 0.0005623413 2.880794 0.6782593 2.308731
## 5 0.0013335214 2.934064 0.6713027 2.357009
## 5 0.0031622777 2.960609 0.6663393 2.378958
## 5 0.0074989421 2.867584 0.6795627 2.298606
## 5 0.0177827941 2.961187 0.6543978 2.384218
## 5 0.0421696503 2.920749 0.6680839 2.335376
## 5 0.1000000000 2.890832 0.6838303 2.341878
## 7 0.0000000000 3.015235 0.6559685 2.402779
## 7 0.0001000000 2.910304 0.6765894 2.346044
## 7 0.0002371374 2.842701 0.6945078 2.286811
## 7 0.0005623413 2.994514 0.6571072 2.388802
## 7 0.0013335214 2.796730 0.6934224 2.251695
## 7 0.0031622777 3.033221 0.6585045 2.431778
## 7 0.0074989421 2.899710 0.6864625 2.335690
## 7 0.0177827941 2.983988 0.6701486 2.393009
## 7 0.0421696503 2.850925 0.6884874 2.292561
## 7 0.1000000000 2.967903 0.6583638 2.378423
## 9 0.0000000000 2.614224 0.7365018 2.048382
## 9 0.0001000000 2.608856 0.7332699 2.063064
## 9 0.0002371374 2.568944 0.7402739 2.019396
## 9 0.0005623413 2.614273 0.7320823 2.076642
## 9 0.0013335214 2.628447 0.7284600 2.047091
## 9 0.0031622777 2.605874 0.7378485 2.062779
## 9 0.0074989421 2.618953 0.7319339 2.089107
## 9 0.0177827941 2.570207 0.7416047 2.030899
## 9 0.0421696503 2.568533 0.7423609 2.035018
## 9 0.1000000000 2.576620 0.7391354 2.033585
## 11 0.0000000000 2.511344 0.7530407 1.961509
## 11 0.0001000000 2.497001 0.7553022 1.966422
## 11 0.0002371374 2.500386 0.7536464 1.947595
## 11 0.0005623413 2.527625 0.7492405 1.978398
## 11 0.0013335214 2.527984 0.7495224 1.979065
## 11 0.0031622777 2.520052 0.7485472 1.969125
## 11 0.0074989421 2.518164 0.7512021 1.961448
## 11 0.0177827941 2.525764 0.7488841 1.968072
## 11 0.0421696503 2.501572 0.7545813 1.951691
## 11 0.1000000000 2.504610 0.7545259 1.951779
## 13 0.0000000000 2.503232 0.7537019 1.961522
## 13 0.0001000000 2.497599 0.7537725 1.959738
## 13 0.0002371374 2.486734 0.7574174 1.953242
## 13 0.0005623413 2.493780 0.7575011 1.944466
## 13 0.0013335214 2.485324 0.7583431 1.949699
## 13 0.0031622777 2.500245 0.7547206 1.958415
## 13 0.0074989421 2.492082 0.7547764 1.952203
## 13 0.0177827941 2.492416 0.7551457 1.957591
## 13 0.0421696503 2.464628 0.7592132 1.928874
## 13 0.1000000000 2.455520 0.7625533 1.910475
## 15 0.0000000000 2.483624 0.7589583 1.951348
## 15 0.0001000000 2.518595 0.7524384 1.973049
## 15 0.0002371374 2.492682 0.7566005 1.959836
## 15 0.0005623413 2.526795 0.7501101 1.986905
## 15 0.0013335214 2.525187 0.7513776 1.986068
## 15 0.0031622777 2.484207 0.7583813 1.954868
## 15 0.0074989421 2.528672 0.7493950 2.001492
## 15 0.0177827941 2.482741 0.7548608 1.961780
## 15 0.0421696503 2.489621 0.7578285 1.961323
## 15 0.1000000000 2.481894 0.7581087 1.949783
## 17 0.0000000000 2.509274 0.7545777 1.965533
## 17 0.0001000000 2.509781 0.7550460 1.970905
## 17 0.0002371374 2.521266 0.7533134 1.975144
## 17 0.0005623413 2.516596 0.7522336 1.975194
## 17 0.0013335214 2.496458 0.7564561 1.957884
## 17 0.0031622777 2.480115 0.7589945 1.953981
## 17 0.0074989421 2.494060 0.7575602 1.950211
## 17 0.0177827941 2.522617 0.7516227 1.980314
## 17 0.0421696503 2.489164 0.7586780 1.951236
## 17 0.1000000000 2.507616 0.7539608 1.965839
## 19 0.0000000000 2.508316 0.7530887 1.969766
## 19 0.0001000000 2.516852 0.7516058 1.972130
## 19 0.0002371374 2.512634 0.7532131 1.986644
## 19 0.0005623413 2.502437 0.7563636 1.970859
## 19 0.0013335214 2.519458 0.7534491 1.977272
## 19 0.0031622777 2.504889 0.7570108 1.974312
## 19 0.0074989421 2.502389 0.7549638 1.967457
## 19 0.0177827941 2.483511 0.7575397 1.962037
## 19 0.0421696503 2.518330 0.7533324 1.974423
## 19 0.1000000000 2.507275 0.7543896 1.974774
##
## Tuning parameter 'bag' was held constant at a value of FALSE
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were size = 13, decay = 0.1 and bag = FALSE.
postResample(pred = nnPred, obs = testData$y)
## RMSE Rsquared MAE
## 2.5243195 0.7435131 1.9586665
varImp(mars)
## earth variable importance
##
## Overall
## X1 100.00
## X4 75.31
## X2 48.86
## X5 15.61
## X3 0.00
MARS easily gave the best performance with Rsquared = 0.94 and RMSE of only 1.19. MARS also identified the most informative predictors (X1-X5).
library(AppliedPredictiveModeling)
data(ChemicalManufacturingProcess)
ChemicalManufacturingProcess<-na.omit(ChemicalManufacturingProcess)
set.seed(6354)
partition <- createDataPartition(ChemicalManufacturingProcess[,1] , p=0.75, list=F)
trx <- ChemicalManufacturingProcess[partition,-1]
try<- ChemicalManufacturingProcess[partition,1]
tex<- ChemicalManufacturingProcess[-partition,-1]
tey<- ChemicalManufacturingProcess[-partition,1]
##KNN Model
knnModel <- train(x=trx,
y=try,
method = "knn",
preProcess = c("center", "scale","nzv"),
tuneLength = 10)
knnPred <- predict(knnModel, newdata = tex)
##SVM Model
svm = train(x = trx,
y = try,
method = "svmRadial",
preProc = c("center", "scale","nzv"),
tuneLength = 10)
svmPred <- predict(svm, newdata = tex)
## MARs Spline
grid <- expand.grid(.degree = 1:2, .nprune = 2:38)
mars <- train(x=trx,
y=try,
method = "earth",
tuneGrid = grid,
preProcess = c("center", "scale","nzv"),
tuneLength = 10)
marsPred <- predict(mars, newdata = tex)
#Neural Network
nn <-train(x=trx,
y=try,
method = "avNNet",
tuneGrid = (expand.grid(decay = c(0, 0.01, .1),
size = c(1,5,10), bag = FALSE) ),
preProcess = c("center", "scale","nzv"),
tuneLength = 10,
linout = TRUE, trace = FALSE,
maxit = 10)
nnPred <- predict(nn, newdata = tex)
knnModel
## k-Nearest Neighbors
##
## 116 samples
## 57 predictor
##
## Pre-processing: centered (56), scaled (56), remove (1)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 116, 116, 116, 116, 116, 116, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 5 1.464010 0.4673338 1.139585
## 7 1.477998 0.4625326 1.175028
## 9 1.490020 0.4636216 1.200355
## 11 1.510578 0.4509582 1.217519
## 13 1.519028 0.4522871 1.230534
## 15 1.533386 0.4506921 1.241647
## 17 1.543198 0.4495521 1.244545
## 19 1.553834 0.4536583 1.248100
## 21 1.566911 0.4481065 1.258900
## 23 1.576190 0.4458291 1.268532
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 5.
postResample(pred = knnPred, obs = tey)
## RMSE Rsquared MAE
## 1.325821 0.490956 1.113889
svm
## Support Vector Machines with Radial Basis Function Kernel
##
## 116 samples
## 57 predictor
##
## Pre-processing: centered (56), scaled (56), remove (1)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 116, 116, 116, 116, 116, 116, ...
## Resampling results across tuning parameters:
##
## C RMSE Rsquared MAE
## 0.25 1.419957 0.4796030 1.1454965
## 0.50 1.306286 0.5285395 1.0528488
## 1.00 1.227004 0.5680746 0.9794792
## 2.00 1.177241 0.5936343 0.9307681
## 4.00 1.162746 0.5979497 0.9109044
## 8.00 1.159309 0.5996155 0.9077492
## 16.00 1.159309 0.5996155 0.9077492
## 32.00 1.159309 0.5996155 0.9077492
## 64.00 1.159309 0.5996155 0.9077492
## 128.00 1.159309 0.5996155 0.9077492
##
## Tuning parameter 'sigma' was held constant at a value of 0.01140901
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were sigma = 0.01140901 and C = 8.
postResample(pred = svmPred, obs = tey)
## RMSE Rsquared MAE
## 1.2943112 0.5358659 1.0293632
mars
## Multivariate Adaptive Regression Spline
##
## 116 samples
## 57 predictor
##
## Pre-processing: centered (56), scaled (56), remove (1)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 116, 116, 116, 116, 116, 116, ...
## Resampling results across tuning parameters:
##
## degree nprune RMSE Rsquared MAE
## 1 2 1.707440 0.2496511 1.376532
## 1 3 1.388959 0.4901480 1.094787
## 1 4 1.444912 0.4902384 1.118930
## 1 5 1.498006 0.4694278 1.151446
## 1 6 1.528519 0.4587299 1.171904
## 1 7 1.576584 0.4460382 1.189386
## 1 8 1.617268 0.4277323 1.211580
## 1 9 1.638462 0.4308237 1.223718
## 1 10 1.669968 0.4221263 1.243401
## 1 11 1.675764 0.4182999 1.252501
## 1 12 1.706199 0.4162498 1.261994
## 1 13 1.694483 0.4248081 1.264487
## 1 14 1.715165 0.4212555 1.274582
## 1 15 1.725466 0.4200319 1.270540
## 1 16 1.824161 0.4063203 1.312025
## 1 17 1.820779 0.4085712 1.311544
## 1 18 1.831965 0.4053512 1.316651
## 1 19 1.832663 0.4065130 1.318522
## 1 20 1.827386 0.4078707 1.314769
## 1 21 1.827599 0.4075625 1.315001
## 1 22 1.827599 0.4075625 1.315001
## 1 23 1.827599 0.4075625 1.315001
## 1 24 1.827599 0.4075625 1.315001
## 1 25 1.827599 0.4075625 1.315001
## 1 26 1.827599 0.4075625 1.315001
## 1 27 1.827599 0.4075625 1.315001
## 1 28 1.827599 0.4075625 1.315001
## 1 29 1.827599 0.4075625 1.315001
## 1 30 1.827599 0.4075625 1.315001
## 1 31 1.827599 0.4075625 1.315001
## 1 32 1.827599 0.4075625 1.315001
## 1 33 1.827599 0.4075625 1.315001
## 1 34 1.827599 0.4075625 1.315001
## 1 35 1.827599 0.4075625 1.315001
## 1 36 1.827599 0.4075625 1.315001
## 1 37 1.827599 0.4075625 1.315001
## 1 38 1.827599 0.4075625 1.315001
## 2 2 1.710047 0.2453159 1.379016
## 2 3 1.495344 0.4264676 1.175314
## 2 4 1.521460 0.4386420 1.185581
## 2 5 1.417891 0.4934109 1.129042
## 2 6 1.537525 0.4585037 1.183102
## 2 7 1.577855 0.4420211 1.211962
## 2 8 1.632740 0.4294073 1.234393
## 2 9 1.638082 0.4313794 1.247853
## 2 10 1.651471 0.4414841 1.247159
## 2 11 1.753376 0.3950680 1.308549
## 2 12 1.799449 0.3888504 1.331243
## 2 13 1.830554 0.3818888 1.341384
## 2 14 1.849959 0.3692278 1.360607
## 2 15 1.954879 0.3476060 1.409477
## 2 16 1.943804 0.3471390 1.407520
## 2 17 1.974040 0.3422408 1.424330
## 2 18 2.012323 0.3331669 1.454372
## 2 19 2.059219 0.3239627 1.473705
## 2 20 2.080363 0.3146588 1.491976
## 2 21 2.083605 0.3155383 1.493052
## 2 22 2.098307 0.3141344 1.494201
## 2 23 2.128191 0.3091935 1.507331
## 2 24 2.148130 0.3117672 1.515721
## 2 25 2.156205 0.3093130 1.517955
## 2 26 2.155815 0.3104356 1.523452
## 2 27 2.168359 0.3106528 1.531438
## 2 28 2.174273 0.3104658 1.539279
## 2 29 2.207294 0.3015582 1.565373
## 2 30 2.200813 0.3046702 1.564638
## 2 31 2.203162 0.3039354 1.565944
## 2 32 2.203548 0.3028535 1.566915
## 2 33 2.204614 0.3023516 1.568340
## 2 34 2.205578 0.3021943 1.568886
## 2 35 2.205578 0.3021943 1.568886
## 2 36 2.205578 0.3021943 1.568886
## 2 37 2.205578 0.3021943 1.568886
## 2 38 2.205578 0.3021943 1.568886
##
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were nprune = 3 and degree = 1.
postResample(pred = marsPred, obs = tey)
## RMSE Rsquared MAE
## 1.271644 0.534292 1.040254
nn
## Model Averaged Neural Network
##
## 116 samples
## 57 predictor
##
## Pre-processing: centered (56), scaled (56), remove (1)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 116, 116, 116, 116, 116, 116, ...
## Resampling results across tuning parameters:
##
## decay size RMSE Rsquared MAE
## 0.00 1 2.199301 0.1217990 1.710041
## 0.00 5 2.030027 0.2704261 1.597465
## 0.00 10 4.382785 0.1542262 3.311450
## 0.01 1 1.820337 0.1812592 1.471841
## 0.01 5 2.117801 0.2201042 1.666371
## 0.01 10 4.539377 0.1199944 3.440216
## 0.10 1 2.428366 0.1630367 2.044005
## 0.10 5 2.128162 0.2359959 1.705267
## 0.10 10 4.484375 0.1433486 3.422183
##
## Tuning parameter 'bag' was held constant at a value of FALSE
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were size = 1, decay = 0.01 and bag = FALSE.
postResample(pred = nnPred, obs = tey)
## RMSE Rsquared MAE
## 6.3221443205 0.0005328759 5.8206778353
SVM and MARs offer very similar performance base on RMSE(1.29 to 1.27) respectively and they are the highest performing.
The MARs model only lists Manufacturing Process 32 qs the most important which indicates to me that we should lean to the SVM. For SVM Manbufacturing Process 13 is the most important with Manufacturing 17,32,9 and 36 next in line. It does include Biological Materials 3,6, and 8 in its top 10 indicating a balanced model.
varImp(mars)
## earth variable importance
##
## Overall
## ManufacturingProcess32 100
## ManufacturingProcess09 0
varImp(svm)
## loess r-squared variable importance
##
## only 20 most important variables shown (out of 57)
##
## Overall
## ManufacturingProcess13 100.00
## ManufacturingProcess17 97.52
## ManufacturingProcess32 97.31
## ManufacturingProcess09 87.08
## ManufacturingProcess36 72.21
## BiologicalMaterial03 67.92
## BiologicalMaterial06 65.83
## ManufacturingProcess31 64.19
## ManufacturingProcess06 59.23
## BiologicalMaterial12 55.05
## ManufacturingProcess30 53.55
## BiologicalMaterial02 51.49
## BiologicalMaterial09 51.04
## ManufacturingProcess11 50.20
## BiologicalMaterial08 49.05
## BiologicalMaterial04 45.25
## ManufacturingProcess12 37.88
## ManufacturingProcess29 36.81
## ManufacturingProcess18 36.69
## ManufacturingProcess01 36.17
We can see all of the biological materials ae closely correlated with each other which makes sense. there are also strong correlations between manufacturing 13 and 17. The include biological materials most likely indicated a similarity for the data. If something can be identified that is driving the levels of the biological material it could help identify better yield.
v<-ChemicalManufacturingProcess%>%drop_na()%>%dplyr::select(Yield,ManufacturingProcess13, ManufacturingProcess17,ManufacturingProcess32, ManufacturingProcess09,ManufacturingProcess36,BiologicalMaterial03,BiologicalMaterial06,ManufacturingProcess31,ManufacturingProcess06,BiologicalMaterial12)
G<-(cor(v))
corrplot(G, method = 'number')