\[7.2\]
library(ISLR)
## Warning: package 'ISLR' was built under R version 4.3.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(caret)
## Warning: package 'caret' was built under R version 4.3.3
## Loading required package: lattice
library(mlbench)
## Warning: package 'mlbench' was built under R version 4.3.3
library(caret)
set.seed(200)
trainingData <- mlbench.friedman1(200, sd = 1)
## We convert the 'x' data from a matrix to a data frame
## One reason is that this will give the columns names.
trainingData$x <- data.frame(trainingData$x)
## Look at the data using
featurePlot(trainingData$x, trainingData$y)
## or other methods.
## This creates a list with a vector 'y' and a matrix
## of predictors 'x'. Also simulate a large test set to
## estimate the true error rate with good precision:
testData <- mlbench.friedman1(5000, sd = 1)
testData$x <- data.frame(testData$x)
knnModel <- train(x = trainingData$x,
y = trainingData$y,
method = "knn",
preProc = c("center", "scale"),
tuneLength = 10)
knnModel
## k-Nearest Neighbors
##
## 200 samples
## 10 predictor
##
## Pre-processing: centered (10), scaled (10)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 200, 200, 200, 200, 200, 200, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 5 3.466085 0.5121775 2.816838
## 7 3.349428 0.5452823 2.727410
## 9 3.264276 0.5785990 2.660026
## 11 3.214216 0.6024244 2.603767
## 13 3.196510 0.6176570 2.591935
## 15 3.184173 0.6305506 2.577482
## 17 3.183130 0.6425367 2.567787
## 19 3.198752 0.6483184 2.592683
## 21 3.188993 0.6611428 2.588787
## 23 3.200458 0.6638353 2.604529
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 17.
knnPred <- predict(knnModel, newdata = testData$x)
## The function 'postResample' can be used to get the test set
## perforamnce values
postResample(pred = knnPred, obs = testData$y)
## RMSE Rsquared MAE
## 3.2040595 0.6819919 2.5683461
## Neural Network Model:
findCorrelation(cor(trainingData$x), cutoff = 0.7) # no highly correlated variables
## integer(0)
nnetGrid <- expand.grid(.decay = c(0, 0.01, .1),
.size = c(1:10))
set.seed(123)
nnetTune <- train(trainingData$x,
trainingData$y,
method = "nnet",
tuneGrid = nnetGrid,
trControl = trainControl(method = "cv"),
preProc = c("center", "scale"),
linout = TRUE,
trace = FALSE,
MaxNWts = 10 * (ncol(trainingData$x) + 1) + 10 + 1,
maxit = 500)
nnetTune
## Neural Network
##
## 200 samples
## 10 predictor
##
## Pre-processing: centered (10), scaled (10)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 180, 180, 180, 180, 180, 180, ...
## Resampling results across tuning parameters:
##
## decay size RMSE Rsquared MAE
## 0.00 1 2.428469 0.7653147 1.881622
## 0.00 2 2.658319 0.7176523 2.123341
## 0.00 3 2.439662 0.7576737 1.909810
## 0.00 4 2.395450 0.7718850 1.870523
## 0.00 5 3.158842 0.6600032 2.306494
## 0.00 6 5.114760 0.5927134 2.878301
## 0.00 7 3.998873 0.5863680 2.807368
## 0.00 8 13.038224 0.3380882 6.216311
## 0.00 9 3.418244 0.6064786 2.750387
## 0.00 10 14.282253 0.3903329 5.817233
## 0.01 1 2.428178 0.7651061 1.880566
## 0.01 2 2.628711 0.7352039 2.081463
## 0.01 3 2.500025 0.7465002 1.952391
## 0.01 4 2.487535 0.7612713 1.893652
## 0.01 5 2.627321 0.7308586 2.175922
## 0.01 6 2.779170 0.7162527 2.154943
## 0.01 7 2.974175 0.6754817 2.306175
## 0.01 8 3.029658 0.6786905 2.368330
## 0.01 9 3.530934 0.6095407 2.864381
## 0.01 10 3.427628 0.6325805 2.755656
## 0.10 1 2.441856 0.7622291 1.892003
## 0.10 2 2.571867 0.7378116 1.967507
## 0.10 3 2.209160 0.8028016 1.797465
## 0.10 4 2.384313 0.7706745 1.946165
## 0.10 5 2.703109 0.7134765 2.109896
## 0.10 6 2.691025 0.7343891 2.178368
## 0.10 7 2.807927 0.7008760 2.202846
## 0.10 8 2.906717 0.7081779 2.361708
## 0.10 9 3.246571 0.6386602 2.586619
## 0.10 10 3.239180 0.6467128 2.555378
##
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were size = 3 and decay = 0.1.
nnetPred <- predict(nnetTune, testData$x)
postResample(nnetPred, testData$y)
## RMSE Rsquared MAE
## 2.4763089 0.7565333 1.8564599
## MARS Model:
marsGrid <- expand.grid(.degree = 1:2, .nprune = 2:38)
set.seed(123)
marsTuned <- train(trainingData$x,
trainingData$y,
method = "earth",
tuneGrid = marsGrid,
trControl = trainControl(method = "cv"))
## Loading required package: earth
## Warning: package 'earth' was built under R version 4.3.3
## Loading required package: Formula
## Loading required package: plotmo
## Warning: package 'plotmo' was built under R version 4.3.3
## Loading required package: plotrix
## Warning: package 'plotrix' was built under R version 4.3.2
marsTuned
## Multivariate Adaptive Regression Spline
##
## 200 samples
## 10 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 180, 180, 180, 180, 180, 180, ...
## Resampling results across tuning parameters:
##
## degree nprune RMSE Rsquared MAE
## 1 2 4.311247 0.2748122 3.603533
## 1 3 3.531005 0.5107259 2.857560
## 1 4 2.609132 0.7291471 2.109945
## 1 5 2.234494 0.8007350 1.788244
## 1 6 2.279819 0.7999357 1.803273
## 1 7 1.792708 0.8748522 1.398541
## 1 8 1.710582 0.8857656 1.323419
## 1 9 1.662155 0.8892531 1.291466
## 1 10 1.706154 0.8823897 1.306578
## 1 11 1.743116 0.8739494 1.360521
## 1 12 1.740790 0.8734421 1.357507
## 1 13 1.703492 0.8788657 1.326192
## 1 14 1.700604 0.8791430 1.324716
## 1 15 1.692444 0.8801290 1.317804
## 1 16 1.692444 0.8801290 1.317804
## 1 17 1.692444 0.8801290 1.317804
## 1 18 1.692444 0.8801290 1.317804
## 1 19 1.692444 0.8801290 1.317804
## 1 20 1.692444 0.8801290 1.317804
## 1 21 1.692444 0.8801290 1.317804
## 1 22 1.692444 0.8801290 1.317804
## 1 23 1.692444 0.8801290 1.317804
## 1 24 1.692444 0.8801290 1.317804
## 1 25 1.692444 0.8801290 1.317804
## 1 26 1.692444 0.8801290 1.317804
## 1 27 1.692444 0.8801290 1.317804
## 1 28 1.692444 0.8801290 1.317804
## 1 29 1.692444 0.8801290 1.317804
## 1 30 1.692444 0.8801290 1.317804
## 1 31 1.692444 0.8801290 1.317804
## 1 32 1.692444 0.8801290 1.317804
## 1 33 1.692444 0.8801290 1.317804
## 1 34 1.692444 0.8801290 1.317804
## 1 35 1.692444 0.8801290 1.317804
## 1 36 1.692444 0.8801290 1.317804
## 1 37 1.692444 0.8801290 1.317804
## 1 38 1.692444 0.8801290 1.317804
## 2 2 4.311247 0.2748122 3.603533
## 2 3 3.531005 0.5107259 2.857560
## 2 4 2.609132 0.7291471 2.109945
## 2 5 2.243508 0.7985944 1.788189
## 2 6 2.236723 0.7987764 1.770156
## 2 7 1.815177 0.8693557 1.425563
## 2 8 1.699050 0.8834064 1.317662
## 2 9 1.487692 0.9084049 1.182061
## 2 10 1.469496 0.9053535 1.160443
## 2 11 1.392318 0.9178210 1.085187
## 2 12 1.302695 0.9312685 1.032827
## 2 13 1.293800 0.9331208 1.033397
## 2 14 1.265082 0.9371588 1.012795
## 2 15 1.275804 0.9351561 1.019457
## 2 16 1.288843 0.9335588 1.031360
## 2 17 1.296439 0.9327583 1.035093
## 2 18 1.296439 0.9327583 1.035093
## 2 19 1.296439 0.9327583 1.035093
## 2 20 1.296439 0.9327583 1.035093
## 2 21 1.296439 0.9327583 1.035093
## 2 22 1.296439 0.9327583 1.035093
## 2 23 1.296439 0.9327583 1.035093
## 2 24 1.296439 0.9327583 1.035093
## 2 25 1.296439 0.9327583 1.035093
## 2 26 1.296439 0.9327583 1.035093
## 2 27 1.296439 0.9327583 1.035093
## 2 28 1.296439 0.9327583 1.035093
## 2 29 1.296439 0.9327583 1.035093
## 2 30 1.296439 0.9327583 1.035093
## 2 31 1.296439 0.9327583 1.035093
## 2 32 1.296439 0.9327583 1.035093
## 2 33 1.296439 0.9327583 1.035093
## 2 34 1.296439 0.9327583 1.035093
## 2 35 1.296439 0.9327583 1.035093
## 2 36 1.296439 0.9327583 1.035093
## 2 37 1.296439 0.9327583 1.035093
## 2 38 1.296439 0.9327583 1.035093
##
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were nprune = 14 and degree = 2.
marsPred <- predict(marsTuned, testData$x)
postResample(marsPred, testData$y)
## RMSE Rsquared MAE
## 1.1722635 0.9448890 0.9324923
## SVM Model:
svmRTuned <- train(trainingData$x,
trainingData$y,
method = "svmRadial",
preProc = c("center", "scale"),
tuneLength = 14,
trControl = trainControl(method = "cv"))
svmRTuned
## Support Vector Machines with Radial Basis Function Kernel
##
## 200 samples
## 10 predictor
##
## Pre-processing: centered (10), scaled (10)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 180, 180, 180, 180, 180, 180, ...
## Resampling results across tuning parameters:
##
## C RMSE Rsquared MAE
## 0.25 2.483272 0.8002484 1.987944
## 0.50 2.222253 0.8176803 1.773054
## 1.00 2.053650 0.8394871 1.613834
## 2.00 1.906672 0.8593733 1.498131
## 4.00 1.820527 0.8683577 1.405055
## 8.00 1.795894 0.8714776 1.414387
## 16.00 1.799680 0.8713663 1.426459
## 32.00 1.800000 0.8713301 1.426565
## 64.00 1.800000 0.8713301 1.426565
## 128.00 1.800000 0.8713301 1.426565
## 256.00 1.800000 0.8713301 1.426565
## 512.00 1.800000 0.8713301 1.426565
## 1024.00 1.800000 0.8713301 1.426565
## 2048.00 1.800000 0.8713301 1.426565
##
## Tuning parameter 'sigma' was held constant at a value of 0.05865089
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were sigma = 0.05865089 and C = 8.
svmPred <- predict(svmRTuned, testData$x)
postResample(svmPred, testData$y)
## RMSE Rsquared MAE
## 2.0367520 0.8318292 1.5442180
rbind(knnMod = postResample(knnPred, testData$y),
nnetMod = postResample(nnetPred, testData$y),
marsMod = postResample(marsPred, testData$y),
svmMod = postResample(svmPred, testData$y))
## RMSE Rsquared MAE
## knnMod 3.204059 0.6819919 2.5683461
## nnetMod 2.476309 0.7565333 1.8564599
## marsMod 1.172263 0.9448890 0.9324923
## svmMod 2.036752 0.8318292 1.5442180
varImp(marsTuned)
## earth variable importance
##
## Overall
## X1 100.00
## X4 75.24
## X2 48.74
## X5 15.53
## X3 0.00
\[7.5\]
library(AppliedPredictiveModeling)
## Warning: package 'AppliedPredictiveModeling' was built under R version 4.3.3
library(caret)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.2
## Warning: package 'readr' was built under R version 4.3.2
## Warning: package 'purrr' was built under R version 4.3.2
## Warning: package 'dplyr' was built under R version 4.3.2
## Warning: package 'stringr' was built under R version 4.3.2
## Warning: package 'forcats' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::lift() masks caret::lift()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.3.3
## corrplot 0.95 loaded
library(Amelia)
## Warning: package 'Amelia' was built under R version 4.3.3
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 4.3.2
## ##
## ## Amelia II: Multiple Imputation
## ## (Version 1.8.2, built: 2024-04-10)
## ## Copyright (C) 2005-2024 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
# load data
data(ChemicalManufacturingProcess)
# data splitting, and processing from homework 7
imputations <- preProcess(ChemicalManufacturingProcess,
method = c("knnImpute"),
k=5)
chem_man_imputed <- predict(imputations, ChemicalManufacturingProcess)
chem_man_filtered <- chem_man_imputed[,-nearZeroVar(chem_man_imputed)]
set.seed(123)
# split into training and testing
train_indices <- sample(nrow(chem_man_filtered), nrow(chem_man_filtered)*.8, replace=F)
trainChem <- chem_man_filtered[train_indices,]
testChem <- chem_man_filtered[-train_indices,]
## a.
# KNN:
knnModel <- train(Yield ~ .,
data=trainChem,
method = "knn",
preProc = c("center", "scale"),
tuneLength = 10)
knnModel
## k-Nearest Neighbors
##
## 140 samples
## 56 predictor
##
## Pre-processing: centered (56), scaled (56)
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 140, 140, 140, 140, 140, 140, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 5 0.8051317 0.3751255 0.6326099
## 7 0.7915491 0.3944335 0.6275697
## 9 0.7880395 0.4009045 0.6242525
## 11 0.7944818 0.3876910 0.6294825
## 13 0.7924476 0.3959429 0.6270117
## 15 0.7989080 0.3895254 0.6323954
## 17 0.8069230 0.3801722 0.6391329
## 19 0.8107104 0.3832165 0.6418239
## 21 0.8166273 0.3804731 0.6487960
## 23 0.8179001 0.3835214 0.6491087
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 9.
knnPred <- predict(knnModel, testChem)
# Neural Network Model
trainChem_x <- trainChem |>
dplyr::select(-Yield)
trainChem_y <- trainChem |>
dplyr::select(Yield)
testChem_x <- testChem |>
dplyr::select(-Yield)
testChem_y <- testChem |>
dplyr::select(Yield)
corr_indices <- findCorrelation(cor(trainChem_x), cutoff = 0.7)
trainChemFiltered <- trainChem_x[, -corr_indices]
testChemFiltered <- testChem_x[, -corr_indices]
trainChemFiltered$Yield <- trainChem_y$Yield
testChemFiltered$Yield <- testChem_y$Yield # no highly correlated variables
nnetGrid <- expand.grid(.decay = c(0, 0.01, .1),
.size = c(1:10))
set.seed(613)
nnetTune <- train(Yield ~ .,
data=trainChemFiltered,
method = "nnet",
tuneGrid = nnetGrid,
trControl = trainControl(method = "cv"),
preProc = c("center", "scale"),
linout = TRUE,
trace = FALSE,
MaxNWts = 10 * (ncol(trainChemFiltered)) + 10 + 1,
maxit = 500)
nnetTune
## Neural Network
##
## 140 samples
## 34 predictor
##
## Pre-processing: centered (34), scaled (34)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 126, 127, 126, 124, 126, 126, ...
## Resampling results across tuning parameters:
##
## decay size RMSE Rsquared MAE
## 0.00 1 0.8860080 0.3276148 0.6908584
## 0.00 2 0.9284650 0.3004782 0.7192417
## 0.00 3 1.1724749 0.3008227 0.9426286
## 0.00 4 1.1434151 0.2525458 0.9164239
## 0.00 5 1.1022941 0.2552948 0.8771900
## 0.00 6 1.3843251 0.1782694 1.0832505
## 0.00 7 1.3482599 0.1682292 1.0402059
## 0.00 8 1.1045291 0.2407480 0.8767745
## 0.00 9 1.0053394 0.3015537 0.7864717
## 0.00 10 0.9630697 0.3471828 0.8022991
## 0.01 1 0.9399990 0.3016182 0.7591808
## 0.01 2 0.8752422 0.4118767 0.6787028
## 0.01 3 1.2722424 0.2416129 1.0132543
## 0.01 4 1.1520935 0.2662412 0.9329635
## 0.01 5 1.1449964 0.2719272 0.9525353
## 0.01 6 0.9270159 0.4164483 0.7405059
## 0.01 7 0.8722660 0.3954060 0.7248977
## 0.01 8 0.9235080 0.3203924 0.7340114
## 0.01 9 0.8737608 0.3763744 0.7044871
## 0.01 10 0.8120468 0.4439574 0.6569336
## 0.10 1 0.7972195 0.4000895 0.6467419
## 0.10 2 0.9275708 0.4067340 0.7693819
## 0.10 3 1.0285978 0.3172875 0.8188056
## 0.10 4 0.9351223 0.3453424 0.7437457
## 0.10 5 0.9246938 0.3467975 0.7554783
## 0.10 6 0.8913509 0.3563416 0.7393845
## 0.10 7 0.8117307 0.4388129 0.6655987
## 0.10 8 0.8766561 0.3540777 0.7129637
## 0.10 9 0.8334868 0.4145568 0.6885491
## 0.10 10 0.8019996 0.4253869 0.6555231
##
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were size = 1 and decay = 0.1.
nnetPred <- predict(nnetTune, testChemFiltered)
# MARS Model:
marsGrid <- expand.grid(.degree = 1:2, .nprune = 2:38)
set.seed(613)
marsTuned <- train(Yield ~ .,
data=trainChem,
method = "earth",
tuneGrid = marsGrid,
trControl = trainControl(method = "cv"))
marsTuned
## Multivariate Adaptive Regression Spline
##
## 140 samples
## 56 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 126, 127, 126, 124, 126, 126, ...
## Resampling results across tuning parameters:
##
## degree nprune RMSE Rsquared MAE
## 1 2 0.7447063 0.4439679 0.5915238
## 1 3 0.6376569 0.5767418 0.5178721
## 1 4 0.7048392 0.5182127 0.5385218
## 1 5 0.7422687 0.4712073 0.5580131
## 1 6 0.7868659 0.4560331 0.5872088
## 1 7 0.7982740 0.4581982 0.5971732
## 1 8 0.8310610 0.4165120 0.6211374
## 1 9 0.8083470 0.4278275 0.6082458
## 1 10 0.8070489 0.4412733 0.6070551
## 1 11 0.8098200 0.4334555 0.6150378
## 1 12 0.8270384 0.4550866 0.6138953
## 1 13 0.8105597 0.4500556 0.6135100
## 1 14 0.8288852 0.4335370 0.6314773
## 1 15 0.7878973 0.4512077 0.6057949
## 1 16 0.7797134 0.4605289 0.5975943
## 1 17 0.7815254 0.4537066 0.5965864
## 1 18 0.7799054 0.4555640 0.5944069
## 1 19 0.7799054 0.4555640 0.5944069
## 1 20 0.7799054 0.4555640 0.5944069
## 1 21 0.7799054 0.4555640 0.5944069
## 1 22 0.7799054 0.4555640 0.5944069
## 1 23 0.7799054 0.4555640 0.5944069
## 1 24 0.7799054 0.4555640 0.5944069
## 1 25 0.7799054 0.4555640 0.5944069
## 1 26 0.7799054 0.4555640 0.5944069
## 1 27 0.7799054 0.4555640 0.5944069
## 1 28 0.7799054 0.4555640 0.5944069
## 1 29 0.7799054 0.4555640 0.5944069
## 1 30 0.7799054 0.4555640 0.5944069
## 1 31 0.7799054 0.4555640 0.5944069
## 1 32 0.7799054 0.4555640 0.5944069
## 1 33 0.7799054 0.4555640 0.5944069
## 1 34 0.7799054 0.4555640 0.5944069
## 1 35 0.7799054 0.4555640 0.5944069
## 1 36 0.7799054 0.4555640 0.5944069
## 1 37 0.7799054 0.4555640 0.5944069
## 1 38 0.7799054 0.4555640 0.5944069
## 2 2 0.7447063 0.4439679 0.5915238
## 2 3 0.6684686 0.5256099 0.5320134
## 2 4 0.6435412 0.5555535 0.5149291
## 2 5 0.6204790 0.6019286 0.4901222
## 2 6 0.6192558 0.5960994 0.4989223
## 2 7 0.6118131 0.6132054 0.4882839
## 2 8 0.6114004 0.6195031 0.4773827
## 2 9 0.6418331 0.5782684 0.5082956
## 2 10 0.6315870 0.5965272 0.5064992
## 2 11 0.6335806 0.6003002 0.5070047
## 2 12 0.6655862 0.5924883 0.5408615
## 2 13 0.6662762 0.5955636 0.5419373
## 2 14 0.6742523 0.5964206 0.5443044
## 2 15 0.6898043 0.5875630 0.5479353
## 2 16 0.6934838 0.5905822 0.5546733
## 2 17 0.6912745 0.6012155 0.5510503
## 2 18 0.6955843 0.6013427 0.5541434
## 2 19 0.6974083 0.6048551 0.5499242
## 2 20 0.6891369 0.6099843 0.5475698
## 2 21 0.6896803 0.6161657 0.5567493
## 2 22 0.7104724 0.5999726 0.5750748
## 2 23 0.7051356 0.6014778 0.5707753
## 2 24 0.7138143 0.5932797 0.5803284
## 2 25 0.7138143 0.5932797 0.5803284
## 2 26 0.7138143 0.5932797 0.5803284
## 2 27 0.7138143 0.5932797 0.5803284
## 2 28 0.7138143 0.5932797 0.5803284
## 2 29 0.7138143 0.5932797 0.5803284
## 2 30 0.7138143 0.5932797 0.5803284
## 2 31 0.7138143 0.5932797 0.5803284
## 2 32 0.7138143 0.5932797 0.5803284
## 2 33 0.7138143 0.5932797 0.5803284
## 2 34 0.7138143 0.5932797 0.5803284
## 2 35 0.7138143 0.5932797 0.5803284
## 2 36 0.7138143 0.5932797 0.5803284
## 2 37 0.7138143 0.5932797 0.5803284
## 2 38 0.7138143 0.5932797 0.5803284
##
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were nprune = 8 and degree = 2.
marsPred <- predict(marsTuned, testChem)
# SVM Model:
svmRTuned <- train(Yield ~ .,
data=trainChem,
method = "svmRadial",
preProc = c("center", "scale"),
tuneLength = 14,
trControl = trainControl(method = "cv"))
svmRTuned
## Support Vector Machines with Radial Basis Function Kernel
##
## 140 samples
## 56 predictor
##
## Pre-processing: centered (56), scaled (56)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 126, 124, 128, 126, 127, 125, ...
## Resampling results across tuning parameters:
##
## C RMSE Rsquared MAE
## 0.25 0.7683277 0.4751059 0.6232367
## 0.50 0.7055179 0.5313663 0.5727722
## 1.00 0.6588907 0.5844897 0.5343755
## 2.00 0.6232091 0.6277378 0.5064778
## 4.00 0.6223625 0.6223060 0.5064420
## 8.00 0.6183000 0.6168089 0.5089816
## 16.00 0.6125701 0.6235705 0.5034715
## 32.00 0.6125701 0.6235705 0.5034715
## 64.00 0.6125701 0.6235705 0.5034715
## 128.00 0.6125701 0.6235705 0.5034715
## 256.00 0.6125701 0.6235705 0.5034715
## 512.00 0.6125701 0.6235705 0.5034715
## 1024.00 0.6125701 0.6235705 0.5034715
## 2048.00 0.6125701 0.6235705 0.5034715
##
## Tuning parameter 'sigma' was held constant at a value of 0.01220051
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were sigma = 0.01220051 and C = 16.
svmPred <- predict(svmRTuned, testChem)
rbind(knnMod = postResample(knnPred, testChem$Yield),
nnetMod = postResample(nnetPred, testChemFiltered$Yield),
marsMod = postResample(marsPred, testChem$Yield),
svmMod = postResample(svmPred, testChem$Yield))
## RMSE Rsquared MAE
## knnMod 0.8164187 0.5549754 0.6749210
## nnetMod 0.7513991 0.5490702 0.5680688
## marsMod 0.7768123 0.5854577 0.5701455
## svmMod 0.6050492 0.7761230 0.4482863
## b.
plot(varImp(svmRTuned), 10)# best most important predictors
lasso_mod <- train(Yield ~ .,
data=trainChem,
method = "glmnet",
preProcess = c("center", "scale"),
trControl = trainControl(method = "cv"),
tuneGrid = expand.grid(.alpha = 1, .lambda = seq(0, 1, 0.05)))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
plot(varImp(lasso_mod), 10)
## c.
chem_man_filtered[,c("Yield", "BiologicalMaterial06", "ManufacturingProcess31", "BiologicalMaterial03", "BiologicalMaterial12")] |>
cor() |>
corrplot(method="shade",
diag=FALSE,
type="full",
addCoef.col = "blue",
number.cex=0.5)
## BiologicalMaterial06 has the highest positive correlation with Yield,
followed by BiologicalMaterial03 and BiologicalMaterial12.
ManufacturingProcess31 has only a slight negative correlation with
Yield.