7.2

library(mlbench)
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(earth)
## Loading required package: Formula
## Loading required package: plotmo
## Loading required package: plotrix
## Loading required package: TeachingDemos
set.seed(284781)
training <- mlbench.friedman1(200, sd = 1)
## We convert the 'x' data from a matrix to a data frame
## One reason is that this will give the columns names.
training$x <- data.frame(training$x)
## Look at the data using
featurePlot(training$x, training$y)

test <- mlbench.friedman1(5000, sd = 1)
test$x <- data.frame(test$x)

KNN

knn_model <- train(x = training$x,
                  y = training$y,
                  method = "knn",
                  preProc = c("center", "scale"),
                  tuneLength = 10)
knn_model
## k-Nearest Neighbors 
## 
## 200 samples
##  10 predictor
## 
## Pre-processing: centered (10), scaled (10) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 200, 200, 200, 200, 200, 200, ... 
## Resampling results across tuning parameters:
## 
##   k   RMSE      Rsquared   MAE     
##    5  3.837233  0.4460483  3.082491
##    7  3.795854  0.4634447  3.070342
##    9  3.748734  0.4853179  3.058327
##   11  3.749681  0.4936896  3.072591
##   13  3.755816  0.5003674  3.077159
##   15  3.744031  0.5179209  3.061309
##   17  3.739528  0.5283752  3.054106
##   19  3.762090  0.5278787  3.077658
##   21  3.754473  0.5422230  3.060255
##   23  3.761631  0.5475558  3.062064
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 17.

KNN Predictions

knn_pred <- predict(knn_model, newdata = test$x)

postResample(pred = knn_pred, obs = test$y)
##      RMSE  Rsquared       MAE 
## 3.2180029 0.6902624 2.5830817

The KNN RMS and R-squared value can be a sort of baseline versus the other, more complex model types.

MARS

mars_grid <- expand.grid(.degree = 1:2, .nprune = 2:15)
mars_model <- train(x = training$x, 
                  y = training$y,
                  method = "earth",
                  tuneGrid = mars_grid,
                  preProcess = c("center", "scale"),
                  tuneLength = 10)
mars_model
## Multivariate Adaptive Regression Spline 
## 
## 200 samples
##  10 predictor
## 
## Pre-processing: centered (10), scaled (10) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 200, 200, 200, 200, 200, 200, ... 
## Resampling results across tuning parameters:
## 
##   degree  nprune  RMSE      Rsquared   MAE     
##   1        2      4.420760  0.2659490  3.734609
##   1        3      3.866596  0.4362895  3.117283
##   1        4      3.332638  0.5795485  2.659700
##   1        5      2.814336  0.7018928  2.207811
##   1        6      2.639572  0.7402360  2.053217
##   1        7      2.363674  0.7881426  1.831327
##   1        8      2.079630  0.8367420  1.616083
##   1        9      1.956392  0.8561370  1.534864
##   1       10      1.893344  0.8652014  1.497770
##   1       11      1.879812  0.8669273  1.486811
##   1       12      1.878365  0.8675629  1.487472
##   1       13      1.912799  0.8625434  1.514213
##   1       14      1.929153  0.8604977  1.525622
##   1       15      1.941622  0.8585217  1.530798
##   2        2      4.431376  0.2633254  3.758500
##   2        3      3.850895  0.4389288  3.113149
##   2        4      3.305752  0.5898894  2.649576
##   2        5      2.872114  0.6896624  2.264406
##   2        6      2.675087  0.7302338  2.080708
##   2        7      2.326861  0.7989202  1.833539
##   2        8      2.090856  0.8342304  1.666143
##   2        9      1.785856  0.8787872  1.410562
##   2       10      1.588257  0.9049035  1.258295
##   2       11      1.508781  0.9153083  1.203985
##   2       12      1.448210  0.9213881  1.153985
##   2       13      1.402000  0.9265610  1.116501
##   2       14      1.397053  0.9267936  1.113223
##   2       15      1.406628  0.9267778  1.116179
## 
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were nprune = 14 and degree = 2.

MARS Predictions

mars_predictions <- predict(mars_model, newdata = test$x)
postResample(pred = mars_predictions, obs = test$y)
##      RMSE  Rsquared       MAE 
## 1.3256520 0.9286603 1.0196659

MARS has a much better RMSE than KNN. R-squared is comparable.

varImp(mars_model)
## earth variable importance
## 
##    Overall
## X4  100.00
## X1   82.21
## X2   67.65
## X5   55.06
## X3   43.03
## X8    0.00

The MARS model has, in fact, selected the informative predictors.

SVM

svm_model <- train(x = training$x,
                   y = training$y,
                   method = "svmRadial",
                   preProcess = c("center", "scale"),
                   tuneLength = 10,
                   trControl = trainControl(method = "cv"))
svm_model
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 200 samples
##  10 predictor
## 
## Pre-processing: centered (10), scaled (10) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 180, 180, 180, 180, 180, 180, ... 
## Resampling results across tuning parameters:
## 
##   C       RMSE      Rsquared   MAE     
##     0.25  3.082187  0.6983577  2.446508
##     0.50  2.765976  0.7373152  2.158110
##     1.00  2.565931  0.7673541  1.987885
##     2.00  2.339705  0.8060278  1.836148
##     4.00  2.239696  0.8219310  1.742172
##     8.00  2.209119  0.8288286  1.752601
##    16.00  2.242573  0.8242354  1.785742
##    32.00  2.245518  0.8238255  1.787816
##    64.00  2.245518  0.8238255  1.787816
##   128.00  2.245518  0.8238255  1.787816
## 
## Tuning parameter 'sigma' was held constant at a value of 0.06452199
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were sigma = 0.06452199 and C = 8.

SVM Predictions

svm_predictions <- predict(svm_model, newdata = test$x)
postResample(pred = svm_predictions, obs = test$y)
##      RMSE  Rsquared       MAE 
## 2.0925447 0.8239415 1.6451044

The SVM model has a worse RMSE than the MARS and a worse R-squared than both. So far, MARS seems to be the top performing model.

Neural Network

nnet_grid <- expand.grid(.decay=c(0, 0.01, 0.1, 0.5, 0.9),
                        .size=c(1, 10, 15, 20),
                        .bag=FALSE)

nnet_model <- train(x = training$x,
                   y = training$y,
                   method = "avNNet",
                   tuneGrid = nnet_grid,
                   preProc = c("center", "scale"),
                   trace=FALSE,
                   linout=TRUE,
                   maxit=500)
## Warning: executing %dopar% sequentially: no parallel backend registered
nnet_model
## Model Averaged Neural Network 
## 
## 200 samples
##  10 predictor
## 
## Pre-processing: centered (10), scaled (10) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 200, 200, 200, 200, 200, 200, ... 
## Resampling results across tuning parameters:
## 
##   decay  size  RMSE      Rsquared   MAE     
##   0.00    1    3.213027  0.6009585  2.503680
##   0.00   10    3.952337  0.5279348  2.877521
##   0.00   15    3.179907  0.6193990  2.503094
##   0.00   20    2.995475  0.6537507  2.342278
##   0.01    1    3.193652  0.6085512  2.468448
##   0.01   10    3.184978  0.6217268  2.507903
##   0.01   15    2.898531  0.6759028  2.292387
##   0.01   20    2.594368  0.7349180  2.028543
##   0.10    1    3.145068  0.6183100  2.406646
##   0.10   10    3.062719  0.6416988  2.396324
##   0.10   15    2.666857  0.7195360  2.095962
##   0.10   20    2.464999  0.7594462  1.902376
##   0.50    1    3.157788  0.6150242  2.422322
##   0.50   10    2.744122  0.7056143  2.134140
##   0.50   15    2.449158  0.7629989  1.910419
##   0.50   20    2.424859  0.7675369  1.881082
##   0.90    1    3.172773  0.6109898  2.443381
##   0.90   10    2.630401  0.7276318  2.043081
##   0.90   15    2.435435  0.7655483  1.894194
##   0.90   20    2.387742  0.7744201  1.848050
## 
## Tuning parameter 'bag' was held constant at a value of FALSE
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were size = 20, decay = 0.9 and bag = FALSE.

NNet Predictions

nnet_pred <- predict(nnet_model, newdata = test$x)
postResample(pred = nnet_pred, obs = test$y)
##      RMSE  Rsquared       MAE 
## 1.7659827 0.8737405 1.3968758

The neural network had the second best RMSE but it looks like MARS performed the best, based on RMSE and R-squared, for this particular data sample.

7.5

We will load the same data as exercise 6.3 and pre-process it the same way.

library(mice)
## 
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
## 
##     filter
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
## 
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
## 
##     sleep
library(AppliedPredictiveModeling)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data("ChemicalManufacturingProcess")

md.pattern(ChemicalManufacturingProcess)

##     Yield BiologicalMaterial01 BiologicalMaterial02 BiologicalMaterial03
## 152     1                    1                    1                    1
## 6       1                    1                    1                    1
## 1       1                    1                    1                    1
## 7       1                    1                    1                    1
## 5       1                    1                    1                    1
## 2       1                    1                    1                    1
## 1       1                    1                    1                    1
## 1       1                    1                    1                    1
## 1       1                    1                    1                    1
##         0                    0                    0                    0
##     BiologicalMaterial04 BiologicalMaterial05 BiologicalMaterial06
## 152                    1                    1                    1
## 6                      1                    1                    1
## 1                      1                    1                    1
## 7                      1                    1                    1
## 5                      1                    1                    1
## 2                      1                    1                    1
## 1                      1                    1                    1
## 1                      1                    1                    1
## 1                      1                    1                    1
##                        0                    0                    0
##     BiologicalMaterial07 BiologicalMaterial08 BiologicalMaterial09
## 152                    1                    1                    1
## 6                      1                    1                    1
## 1                      1                    1                    1
## 7                      1                    1                    1
## 5                      1                    1                    1
## 2                      1                    1                    1
## 1                      1                    1                    1
## 1                      1                    1                    1
## 1                      1                    1                    1
##                        0                    0                    0
##     BiologicalMaterial10 BiologicalMaterial11 BiologicalMaterial12
## 152                    1                    1                    1
## 6                      1                    1                    1
## 1                      1                    1                    1
## 7                      1                    1                    1
## 5                      1                    1                    1
## 2                      1                    1                    1
## 1                      1                    1                    1
## 1                      1                    1                    1
## 1                      1                    1                    1
##                        0                    0                    0
##     ManufacturingProcess09 ManufacturingProcess13 ManufacturingProcess15
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
##                          0                      0                      0
##     ManufacturingProcess16 ManufacturingProcess17 ManufacturingProcess18
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
##                          0                      0                      0
##     ManufacturingProcess19 ManufacturingProcess20 ManufacturingProcess21
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
##                          0                      0                      0
##     ManufacturingProcess32 ManufacturingProcess37 ManufacturingProcess38
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
##                          0                      0                      0
##     ManufacturingProcess39 ManufacturingProcess42 ManufacturingProcess43
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
##                          0                      0                      0
##     ManufacturingProcess44 ManufacturingProcess45 ManufacturingProcess01
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      0
##                          0                      0                      1
##     ManufacturingProcess04 ManufacturingProcess05 ManufacturingProcess07
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        0                      0                      0
##                          1                      1                      1
##     ManufacturingProcess08 ManufacturingProcess12 ManufacturingProcess14
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      0
## 1                        0                      0                      1
##                          1                      1                      1
##     ManufacturingProcess22 ManufacturingProcess23 ManufacturingProcess24
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        0                      0                      0
##                          1                      1                      1
##     ManufacturingProcess40 ManufacturingProcess41 ManufacturingProcess06
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      1                      1
## 2                        1                      1                      1
## 1                        1                      1                      0
## 1                        1                      1                      1
## 1                        0                      0                      0
##                          1                      1                      2
##     ManufacturingProcess02 ManufacturingProcess25 ManufacturingProcess26
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        1                      0                      0
## 2                        0                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        0                      1                      1
##                          3                      5                      5
##     ManufacturingProcess27 ManufacturingProcess28 ManufacturingProcess29
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        0                      0                      0
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
##                          5                      5                      5
##     ManufacturingProcess30 ManufacturingProcess31 ManufacturingProcess33
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        0                      0                      0
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
##                          5                      5                      5
##     ManufacturingProcess34 ManufacturingProcess35 ManufacturingProcess36
## 152                      1                      1                      1
## 6                        1                      1                      1
## 1                        1                      1                      1
## 7                        1                      1                      1
## 5                        0                      0                      0
## 2                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
## 1                        1                      1                      1
##                          5                      5                      5
##     ManufacturingProcess10 ManufacturingProcess11 ManufacturingProcess03    
## 152                      1                      1                      1   0
## 6                        1                      1                      0   1
## 1                        1                      0                      1   1
## 7                        0                      0                      0   3
## 5                        1                      1                      1  11
## 2                        1                      1                      1   1
## 1                        1                      1                      1   1
## 1                        0                      0                      0   4
## 1                        0                      0                      0  16
##                          9                     10                     15 106
cmp_df <- kNN(ChemicalManufacturingProcess,imp_var=FALSE)

zeroVar <- nearZeroVar(cmp_df)

cmp_df_final <- cmp_df[,-zeroVar]

part <- ChemicalManufacturingProcess$Yield %>%
  createDataPartition(p=0.8,list=FALSE,times=1)

x_train <- cmp_df_final[part,]
x_test <- cmp_df_final[-part,]

y_train <- ChemicalManufacturingProcess$Yield[part]
y_test <- ChemicalManufacturingProcess$Yield[-part]

Now we will experiment with the same models as the previous question, starting with KNN.

KNN

knn_model <- train(x = x_train,
                  y = y_train,
                  method = "knn",
                  preProc = c("center", "scale"),
                  tuneLength = 10)
knn_model
## k-Nearest Neighbors 
## 
## 144 samples
##  57 predictor
## 
## Pre-processing: centered (57), scaled (57) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 144, 144, 144, 144, 144, 144, ... 
## Resampling results across tuning parameters:
## 
##   k   RMSE      Rsquared   MAE      
##    5  1.265975  0.5452400  0.9902709
##    7  1.265480  0.5547005  1.0085555
##    9  1.272697  0.5591807  1.0135258
##   11  1.272022  0.5704851  1.0187692
##   13  1.275673  0.5717302  1.0199876
##   15  1.284043  0.5708050  1.0268790
##   17  1.293110  0.5666238  1.0331613
##   19  1.306244  0.5615143  1.0432799
##   21  1.323831  0.5510852  1.0594225
##   23  1.334774  0.5485627  1.0683099
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 7.

KNN Predictions

knn_pred <- predict(knn_model, newdata = x_test)

postResample(pred = knn_pred, obs = y_test)
##      RMSE  Rsquared       MAE 
## 1.1013208 0.6555528 0.8938393

Again we have KNN as a baseline. R-squared looks like it could be improved significantly.

MARS

mars_grid <- expand.grid(.degree=1:2,
                        .nprune=2:10)

mars_model <- train(x = x_train,
                   y = y_train,
                   method = "earth",
                   tuneGrid = mars_grid,
                   preProc = c("center", "scale"))

mars_model
## Multivariate Adaptive Regression Spline 
## 
## 144 samples
##  57 predictor
## 
## Pre-processing: centered (57), scaled (57) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 144, 144, 144, 144, 144, 144, ... 
## Resampling results across tuning parameters:
## 
##   degree  nprune  RMSE          Rsquared  MAE        
##   1        2      1.463033e-14  1         1.44855e-14
##   1        3      1.463033e-14  1         1.44855e-14
##   1        4      1.463033e-14  1         1.44855e-14
##   1        5      1.463033e-14  1         1.44855e-14
##   1        6      1.463033e-14  1         1.44855e-14
##   1        7      1.463033e-14  1         1.44855e-14
##   1        8      1.463033e-14  1         1.44855e-14
##   1        9      1.463033e-14  1         1.44855e-14
##   1       10      1.463033e-14  1         1.44855e-14
##   2        2      1.463033e-14  1         1.44855e-14
##   2        3      1.463033e-14  1         1.44855e-14
##   2        4      1.463033e-14  1         1.44855e-14
##   2        5      1.463033e-14  1         1.44855e-14
##   2        6      1.463033e-14  1         1.44855e-14
##   2        7      1.463033e-14  1         1.44855e-14
##   2        8      1.463033e-14  1         1.44855e-14
##   2        9      1.463033e-14  1         1.44855e-14
##   2       10      1.463033e-14  1         1.44855e-14
## 
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were nprune = 2 and degree = 1.

MARS Predictions

mars_predictions <- predict(mars_model, newdata = x_test)
postResample(pred = mars_predictions, obs = y_test)
##         RMSE     Rsquared          MAE 
## 1.432145e-14 1.000000e+00 1.421085e-14

MARS appears not to be the methodology for these data – worse performance than KNN by RMSE and a strange 100% R-squared.

SVM

svm_model <- train(x = x_train,
                   y = y_train,
                   method = "svmRadial",
                   preProcess = c("center", "scale"),
                   tuneLength = 10,
                   trControl = trainControl(method = "cv"))
svm_model
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 144 samples
##  57 predictor
## 
## Pre-processing: centered (57), scaled (57) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 131, 132, 129, 129, 130, 128, ... 
## Resampling results across tuning parameters:
## 
##   C       RMSE       Rsquared   MAE      
##     0.25  1.1697520  0.7125480  0.9361767
##     0.50  0.9465636  0.8071920  0.7394489
##     1.00  0.7653627  0.8640640  0.5905697
##     2.00  0.6868656  0.8852735  0.5231221
##     4.00  0.6776011  0.8876721  0.5181494
##     8.00  0.6776011  0.8876721  0.5181494
##    16.00  0.6776011  0.8876721  0.5181494
##    32.00  0.6776011  0.8876721  0.5181494
##    64.00  0.6776011  0.8876721  0.5181494
##   128.00  0.6776011  0.8876721  0.5181494
## 
## Tuning parameter 'sigma' was held constant at a value of 0.0147849
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were sigma = 0.0147849 and C = 4.

SVM Predictions

svm_predictions <- predict(svm_model, newdata = x_test)
postResample(pred = svm_predictions, obs = y_test)
##      RMSE  Rsquared       MAE 
## 0.5392078 0.9264314 0.4392968

SVM has the lowest RMSE so far and a high R-squared – approximately 97%.

Neural Network

nnet_grid <- expand.grid(.decay=c(0, 0.01, 0.1),
                        .size=c(1, 5, 10),
                        .bag=FALSE)

nnet_model <- train(x = x_train,
                   y = y_train,
                   method = "avNNet",
                   tuneGrid = nnet_grid,
                   preProc = c("center", "scale"),
                   trace=FALSE,
                   linout=TRUE,
                   maxit=500)


nnet_model
## Model Averaged Neural Network 
## 
## 144 samples
##  57 predictor
## 
## Pre-processing: centered (57), scaled (57) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 144, 144, 144, 144, 144, 144, ... 
## Resampling results across tuning parameters:
## 
##   decay  size  RMSE       Rsquared   MAE      
##   0.00    1    1.5034445  0.4121909  1.2083307
##   0.00    5    2.1846413  0.4065370  1.6802527
##   0.00   10    9.1835472  0.1217374  5.9462823
##   0.01    1    0.5351704  0.8971591  0.2181828
##   0.01    5    1.4501439  0.5532420  1.0262482
##   0.01   10    2.1020990  0.4737367  1.5980706
##   0.10    1    0.6731660  0.8690445  0.3851554
##   0.10    5    1.7664417  0.5057023  0.9525642
##   0.10   10    1.3177992  0.5934042  0.9650665
## 
## Tuning parameter 'bag' was held constant at a value of FALSE
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were size = 1, decay = 0.01 and bag = FALSE.

NNet Predictions

nnet_pred <- predict(nnet_model, newdata = x_test)
postResample(pred = nnet_pred, obs = y_test)
##      RMSE  Rsquared       MAE 
## 1.0578760 0.7809455 0.2817886

Neural network has a decently good RMSE but not as good as SVM. R-squared is one of the worst.

SVM will be the selected model for these data.

B.

important_vars <- varImp(svm_model)

important_vars
## loess r-squared variable importance
## 
##   only 20 most important variables shown (out of 57)
## 
##                        Overall
## Yield                   100.00
## ManufacturingProcess13   37.43
## ManufacturingProcess32   34.40
## ManufacturingProcess17   31.60
## BiologicalMaterial06     29.07
## ManufacturingProcess09   28.43
## BiologicalMaterial12     28.20
## ManufacturingProcess36   27.51
## ManufacturingProcess31   25.39
## BiologicalMaterial03     24.71
## ManufacturingProcess06   24.58
## BiologicalMaterial02     22.74
## ManufacturingProcess11   21.10
## BiologicalMaterial11     18.96
## ManufacturingProcess33   18.58
## BiologicalMaterial08     16.84
## BiologicalMaterial04     15.86
## ManufacturingProcess30   15.20
## ManufacturingProcess12   15.16
## BiologicalMaterial09     14.32

Manufacturing process variables appear to be the most important, with 13, 32, and 17 having the highest values. This was the same in the previous homework.

C.

ggplot(cmp_df_final, aes(ManufacturingProcess13, Yield)) +
  geom_point()

ggplot(cmp_df_final, aes(ManufacturingProcess32, Yield)) +
  geom_point()

ggplot(cmp_df_final, aes(ManufacturingProcess17, Yield)) +
  geom_point()

Manufacturing Processes 13 and 17 appear to have an inverse relationship with yield, while MP32 has a direct positive relationship.