$ y = 10 sin(x_1x_2) + 20(x_3 − 0.5)^2 + 10x_4 + 5x_5 + N(0, σ2)$ ## where the x values are random variables uniformly distributed between [0, 1] ## (there are also 5 other non-informative variables also created in the simulation). The package mlbench contains a function called mlbench.friedman1 that simulates these data:
set.seed(200)
# install.packages("mlbench")
library(mlbench)
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
trainingData <- mlbench.friedman1(200, sd = 1)
trainingData$x <- data.frame(trainingData$x)
testData <- mlbench.friedman1(5000, sd = 1)
testData$x <- data.frame(testData$x)
featurePlot(trainingData$x, trainingData$y)
## Tune several models on these data. For example: # KNN
library(caret)
knnFit <- train(x = trainingData$x,
y = trainingData$y,
method = "knn",
preProc = c("center", "scale"),
tuneLength = 12)
knnFit
## k-Nearest Neighbors
##
## 200 samples
## 10 predictor
##
## Pre-processing: centered (10), scaled (10)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 200, 200, 200, 200, 200, 200, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 5 3.466085 0.5121775 2.816838
## 7 3.349428 0.5452823 2.727410
## 9 3.264276 0.5785990 2.660026
## 11 3.214216 0.6024244 2.603767
## 13 3.196510 0.6176570 2.591935
## 15 3.184173 0.6305506 2.577482
## 17 3.183130 0.6425367 2.567787
## 19 3.198752 0.6483184 2.592683
## 21 3.188993 0.6611428 2.588787
## 23 3.200458 0.6638353 2.604529
## 25 3.209660 0.6723668 2.606953
## 27 3.230715 0.6719092 2.620592
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 17.
knnPred <- predict(knnFit, newdata = testData$x)
## The function 'postResample' can be used to get the test set
## performance values
knn_model_stats <-postResample(pred = knnPred, obs = testData$y)
knn_model_stats
## RMSE Rsquared MAE
## 3.2040595 0.6819919 2.5683461
knn_pred_actual <- data.frame(testData$y,knnPred)
ggplot(knn_pred_actual,aes(x=testData$y,y=knnPred))+geom_bin2d(binwidth = c(0.25, 0.25))+scale_fill_gradientn(limits=c(0,30), breaks=seq(0, 40, by=10), colours=rainbow(10))
# SVM
library(caret)
svmFit <- train(x = trainingData$x,
y = trainingData$y,
method = "svmRadial",
tuneLength=10,
preProc = c("center", "scale"))
svmPred <- predict(svmFit, newdata = testData$x)
svmFit
## Support Vector Machines with Radial Basis Function Kernel
##
## 200 samples
## 10 predictor
##
## Pre-processing: centered (10), scaled (10)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 200, 200, 200, 200, 200, 200, ...
## Resampling results across tuning parameters:
##
## C RMSE Rsquared MAE
## 0.25 2.547516 0.7726615 2.006413
## 0.50 2.320477 0.7917364 1.820893
## 1.00 2.192587 0.8088843 1.722220
## 2.00 2.116056 0.8196188 1.654683
## 4.00 2.069331 0.8263439 1.617767
## 8.00 2.045729 0.8300195 1.603214
## 16.00 2.041974 0.8306705 1.601002
## 32.00 2.041974 0.8306705 1.601002
## 64.00 2.041974 0.8306705 1.601002
## 128.00 2.041974 0.8306705 1.601002
##
## Tuning parameter 'sigma' was held constant at a value of 0.06148849
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were sigma = 0.06148849 and C = 16.
## The function 'postResample' can be used to get the test set
## performance values
svm_model_stats <- postResample(pred = svmPred, obs = testData$y)
svm_model_stats
## RMSE Rsquared MAE
## 2.0700606 0.8262412 1.5723919
svm_pred_actual <- data.frame(testData$y,svmPred)
ggplot(knn_pred_actual,aes(x=testData$y,y=svmPred))+geom_bin2d(binwidth = c(0.25, 0.25))+scale_fill_gradientn(limits=c(0,30), breaks=seq(0, 40, by=10), colours=rainbow(10))
library(caret)
nnetGrid <- expand.grid(.decay=c( 0.01), ## , 0, 0.1, 0.5, 0.9
.size=c(1), ##, 10, 15, 20
.bag=FALSE)
nnetFit <- train(x = trainingData$x,
y = trainingData$y,
method = "avNNet",
tuneGrid = nnetGrid,
preProc = c("center", "scale"),
trace=FALSE,
linout=TRUE,
maxit=500)
## Warning: executing %dopar% sequentially: no parallel backend registered
nnetFit
## Model Averaged Neural Network
##
## 200 samples
## 10 predictor
##
## Pre-processing: centered (10), scaled (10)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 200, 200, 200, 200, 200, 200, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 2.543819 0.7414866 1.994862
##
## Tuning parameter 'size' was held constant at a value of 1
## Tuning
## parameter 'decay' was held constant at a value of 0.01
## Tuning parameter
## 'bag' was held constant at a value of FALSE
nnetPred <- predict(nnetFit, newdata = testData$x)
nnet_model_stats <- postResample(pred = nnetPred, obs = testData$y)
nnet_model_stats
## RMSE Rsquared MAE
## 2.6433128 0.7194467 2.0232365
nnet_pred_actual <- data.frame(testData$y,nnetPred)
ggplot(knn_pred_actual,aes(x=testData$y,y=nnetPred))+geom_bin2d(binwidth = c(0.25, 0.25))+scale_fill_gradientn(limits=c(0,30), breaks=seq(0, 40, by=10), colours=rainbow(10))
library(caret)
marsGrid <- expand.grid(.degree=1:2,
.nprune=2:20)
marsFit <- train(x = trainingData$x,
y = trainingData$y,
method = "earth",
tuneGrid = marsGrid,
preProc = c("center", "scale"))
## Loading required package: earth
## Loading required package: Formula
## Loading required package: plotmo
## Loading required package: plotrix
## Loading required package: TeachingDemos
marsPred <- predict(marsFit, newdata = testData$x)
varImp(marsFit)
## earth variable importance
##
## Overall
## X1 100.00
## X4 75.31
## X2 48.86
## X5 15.61
## X3 0.00
marsFit$finalModel
## Selected 15 of 21 terms, and 5 of 10 predictors (nprune=15)
## Termination condition: Reached nk 21
## Importance: X1, X4, X2, X5, X3, X6-unused, X7-unused, X8-unused, X9-unused, ...
## Number of terms at each degree of interaction: 1 10 4
## GCV 1.726398 RSS 232.166 GRSq 0.9299075 RSq 0.952395
## The function 'postResample' can be used to get the test set
## performance values
mars_model_stats <- postResample(pred = marsPred, obs = testData$y)
mars_pred_actual <- data.frame(testData$y,marsPred)
ggplot(knn_pred_actual,aes(x=testData$y,y=marsPred))+geom_bin2d(binwidth = c(0.25, 0.25))+scale_fill_gradientn(limits=c(0,30), breaks=seq(0, 40, by=10), colours=rainbow(10))
Answer: MARS model is best with the highest R^2. It assigned the following importance: X1, X4, X2, X5, X3, X6-unused, X7-unused, X8-unused, X9-unused, X10-unused ; hence no importance has been assigned to the terms which were not relevant.
library(caret)
library(RANN)
library(AppliedPredictiveModeling)
data(ChemicalManufacturingProcess)
set.seed(200)
prepro <- preProcess(ChemicalManufacturingProcess,
method = c("knnImpute"))
newdata <- predict(prepro, ChemicalManufacturingProcess)
trainRow <- createDataPartition(ChemicalManufacturingProcess$Yield, p=0.8, list=FALSE)
colY <- which(colnames(ChemicalManufacturingProcess) == "Yield")
X.train <- newdata[,-colY][trainRow, ]
y.train <- ChemicalManufacturingProcess$Yield[trainRow]
X.test <- newdata[,-colY][-trainRow, ]
y.test <- ChemicalManufacturingProcess$Yield[-trainRow]
KNN:
set.seed(200)
knnModel <- train(x = X.train,
y = y.train,
method = "knn",
preProc = c("center", "scale"),
tuneLength = 10)
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
knnModel
## k-Nearest Neighbors
##
## 144 samples
## 57 predictor
##
## Pre-processing: centered (57), scaled (57)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 144, 144, 144, 144, 144, 144, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 5 1.365780 0.4528739 1.069655
## 7 1.378378 0.4413042 1.080146
## 9 1.375417 0.4373255 1.080401
## 11 1.380799 0.4355753 1.086821
## 13 1.386536 0.4292945 1.093778
## 15 1.392702 0.4240102 1.103727
## 17 1.400824 0.4188405 1.113934
## 19 1.403301 0.4181174 1.117597
## 21 1.410616 0.4141855 1.127149
## 23 1.421834 0.4068892 1.138288
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 5.
Neural network:
nnetGrid <- expand.grid(.decay=c( 0.01), ## , 0, 0.1, 0.5, 0.9
.size=c(1), ##, 10, 15, 20
.bag=FALSE)
set.seed(1)
nnetModel <- train(x = X.train,
y = y.train,
method = "avNNet",
tuneGrid = nnetGrid,
preProc = c("center", "scale"),
trace=FALSE,
linout=TRUE,
maxit=500)
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
nnetModel
## Model Averaged Neural Network
##
## 144 samples
## 57 predictor
##
## Pre-processing: centered (57), scaled (57)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 144, 144, 144, 144, 144, 144, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 1.767153 0.2912356 1.400779
##
## Tuning parameter 'size' was held constant at a value of 1
## Tuning
## parameter 'decay' was held constant at a value of 0.01
## Tuning parameter
## 'bag' was held constant at a value of FALSE
MARS
marsGrid <- expand.grid(.degree=1:2,
.nprune=2:10)
set.seed(200)
marsModel <- train(x = X.train,
y = y.train,
method = "earth",
tuneGrid = marsGrid,
preProc = c("center", "scale"))
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
marsModel
## Multivariate Adaptive Regression Spline
##
## 144 samples
## 57 predictor
##
## Pre-processing: centered (57), scaled (57)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 144, 144, 144, 144, 144, 144, ...
## Resampling results across tuning parameters:
##
## degree nprune RMSE Rsquared MAE
## 1 2 1.411268 0.4098750 1.107151
## 1 3 1.307141 0.4905180 1.041817
## 1 4 1.293540 0.5052587 1.029050
## 1 5 1.347941 0.4793801 1.073904
## 1 6 1.340144 0.4906001 1.065237
## 1 7 1.353643 0.4856596 1.077448
## 1 8 1.375574 0.4804173 1.098254
## 1 9 1.416728 0.4559809 1.115563
## 1 10 1.436290 0.4458512 1.124370
## 2 2 1.411268 0.4098750 1.107151
## 2 3 1.350940 0.4625688 1.062403
## 2 4 1.319006 0.4921283 1.044610
## 2 5 1.382786 0.4831476 1.079984
## 2 6 1.475093 0.4640268 1.096458
## 2 7 1.449447 0.4568105 1.095087
## 2 8 1.795503 0.4321311 1.165932
## 2 9 1.764130 0.4390187 1.158521
## 2 10 1.808197 0.4213890 1.173868
##
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were nprune = 4 and degree = 1.
SVM
set.seed(200)
svm<- train(x = X.train,
y = y.train,
method = "svmRadial",
tuneLength=10,
preProc = c("center", "scale"))
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: BiologicalMaterial07
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
svm
## Support Vector Machines with Radial Basis Function Kernel
##
## 144 samples
## 57 predictor
##
## Pre-processing: centered (57), scaled (57)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 144, 144, 144, 144, 144, 144, ...
## Resampling results across tuning parameters:
##
## C RMSE Rsquared MAE
## 0.25 1.398759 0.4537202 1.1255148
## 0.50 1.302496 0.5047557 1.0358766
## 1.00 1.235115 0.5467465 0.9743106
## 2.00 1.186407 0.5784783 0.9385012
## 4.00 1.164512 0.5926128 0.9296788
## 8.00 1.157286 0.5969286 0.9258664
## 16.00 1.155801 0.5969221 0.9263493
## 32.00 1.155801 0.5969221 0.9263493
## 64.00 1.155801 0.5969221 0.9263493
## 128.00 1.155801 0.5969221 0.9263493
##
## Tuning parameter 'sigma' was held constant at a value of 0.01358238
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were sigma = 0.01358238 and C = 16.
model_list <- resamples(list(KNN=knnModel, NNet=nnetModel, MARS=marsModel, SVM=svm))
summary(model_list)
##
## Call:
## summary.resamples(object = model_list)
##
## Models: KNN, NNet, MARS, SVM
## Number of resamples: 25
##
## MAE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## KNN 0.8734492 0.9571554 1.0865124 1.0696545 1.1243159 1.352277 0
## NNet 0.9626489 1.2795109 1.3712333 1.4007793 1.5123413 1.918635 0
## MARS 0.8156142 0.9428349 1.0231779 1.0290501 1.1064248 1.285856 0
## SVM 0.7583403 0.8596967 0.9117069 0.9263493 0.9853586 1.093117 0
##
## RMSE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## KNN 1.1367187 1.296736 1.347619 1.365780 1.448562 1.684804 0
## NNet 1.2162841 1.641295 1.773364 1.767153 1.877997 2.365619 0
## MARS 1.0404386 1.185886 1.281264 1.293540 1.398731 1.633982 0
## SVM 0.9692539 1.069662 1.135511 1.155801 1.214655 1.358024 0
##
## Rsquared
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## KNN 0.3054950 0.4084353 0.4456296 0.4528739 0.5260487 0.5634062 0
## NNet 0.1053802 0.2118994 0.2853491 0.2912356 0.3541902 0.6115916 0
## MARS 0.2658525 0.4538329 0.4983337 0.5052587 0.5865740 0.6711294 0
## SVM 0.4116634 0.5367938 0.5850332 0.5969221 0.6594510 0.7138705 0
Answer: Looking at the mean R^2 across models, SVM appears to have the highest R^2.
Features <- varImp(svm)
Features
## loess r-squared variable importance
##
## only 20 most important variables shown (out of 57)
##
## Overall
## ManufacturingProcess32 100.00
## BiologicalMaterial06 87.81
## ManufacturingProcess13 78.23
## BiologicalMaterial03 76.45
## BiologicalMaterial12 69.16
## ManufacturingProcess17 68.44
## ManufacturingProcess36 67.83
## ManufacturingProcess31 67.82
## ManufacturingProcess09 64.12
## ManufacturingProcess06 60.80
## ManufacturingProcess29 53.56
## BiologicalMaterial02 53.50
## ManufacturingProcess11 50.24
## BiologicalMaterial11 48.66
## ManufacturingProcess33 46.88
## ManufacturingProcess30 45.42
## BiologicalMaterial09 38.34
## BiologicalMaterial04 37.89
## BiologicalMaterial08 36.89
## ManufacturingProcess12 36.65