library(dplyr)
library(varImp)
library(elasticnet)
library(AppliedPredictiveModeling)
data(permeability)
str(permeability)
## num [1:165, 1] 12.52 1.12 19.41 1.73 1.68 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:165] "1" "2" "3" "4" ...
## ..$ : chr "permeability"
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
##
## Attaching package: 'caret'
## The following object is masked from 'package:varImp':
##
## varImp
## The following objects are masked from 'package:measures':
##
## MAE, RMSE
dim(fingerprints)
## [1] 165 1107
fp <- fingerprints[, -nearZeroVar(fingerprints)]
dim(fp)
## [1] 165 388
set.seed(17)
trainingRows <- createDataPartition(permeability, p = .80, list= FALSE)
x_train <- fp[trainingRows, ]
y_train <- permeability[trainingRows]
x_test <- fp[-trainingRows, ]
y_test <- permeability[-trainingRows]
Pls_Fit <- train(x=x_train,
y=y_train,
method='pls',
metric='Rsquared',
tuneLength=20,
trControl=trainControl(method='cv'),
preProcess=c('center', 'scale')
)
Pls_Result <- Pls_Fit$results
Pls_Fit
## Partial Least Squares
##
## 133 samples
## 388 predictors
##
## Pre-processing: centered (388), scaled (388)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 121, 118, 121, 118, 120, 119, ...
## Resampling results across tuning parameters:
##
## ncomp RMSE Rsquared MAE
## 1 12.90025 0.3222520 9.941320
## 2 11.68413 0.4581354 8.250378
## 3 11.97359 0.4600123 8.860956
## 4 12.24383 0.4492765 8.896348
## 5 12.01675 0.4693410 8.872978
## 6 12.14125 0.4684395 9.032265
## 7 12.02625 0.4712362 9.194178
## 8 12.13464 0.4669973 9.459727
## 9 12.06592 0.4712771 9.257314
## 10 12.36057 0.4565498 9.496713
## 11 12.81838 0.4400975 9.620152
## 12 13.04478 0.4310474 9.828846
## 13 13.14836 0.4311849 9.831693
## 14 13.58285 0.4095574 10.153620
## 15 14.00005 0.3966119 10.426516
## 16 14.38551 0.3946972 10.706439
## 17 14.64843 0.3835487 10.773578
## 18 14.94472 0.3876079 11.067579
## 19 15.30298 0.3759125 11.357803
## 20 15.58543 0.3663076 11.549326
##
## Rsquared was used to select the optimal model using the largest value.
## The final value used for the model was ncomp = 9.
plot(Pls_Fit)
plsPred <- predict(Pls_Fit, newdata=x_test)
postResample(pred=plsPred, obs=y_test)
## RMSE Rsquared MAE
## 11.455821 0.530198 8.381798
set.seed(17)
ridgeFit <- train(x=x_train,
y=y_train,
method='ridge',
metric='Rsquared',
tuneGrid=data.frame(.lambda = seq(0, 1, by=0.1)),
trControl=trainControl(method='cv'),
preProcess=c('center','scale')
)
ridgeFit
## Ridge Regression
##
## 133 samples
## 388 predictors
##
## Pre-processing: centered (388), scaled (388)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 120, 119, 121, 120, 119, 119, ...
## Resampling results across tuning parameters:
##
## lambda RMSE Rsquared MAE
## 0.0 17.69824 0.3387828 12.323778
## 0.1 13.16484 0.4586224 9.573189
## 0.2 12.90509 0.4751686 9.501242
## 0.3 13.03168 0.4820743 9.650479
## 0.4 13.30036 0.4863153 9.935894
## 0.5 13.67743 0.4894886 10.256849
## 0.6 14.14675 0.4912908 10.638401
## 0.7 14.66246 0.4929628 11.020308
## 0.8 15.21765 0.4943953 11.426074
## 0.9 15.81805 0.4954898 11.900490
## 1.0 16.44342 0.4964945 12.452772
##
## Rsquared was used to select the optimal model using the largest value.
## The final value used for the model was lambda = 1.
plot(ridgeFit)
set.seed(17)
lassoFit <- train(x=x_train,
y=y_train,
method='lasso',
metric='Rsquared',
tuneGrid=data.frame(.fraction = seq(0, 0.5, by=0.05)),
trControl=trainControl(method='cv'),
preProcess=c('center','scale')
)
lassoFit
## The lasso
##
## 133 samples
## 388 predictors
##
## Pre-processing: centered (388), scaled (388)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 120, 119, 121, 120, 119, 119, ...
## Resampling results across tuning parameters:
##
## fraction RMSE Rsquared MAE
## 0.00 14.99248 NaN 11.899869
## 0.05 12.04587 0.5004433 8.961563
## 0.10 12.58317 0.4619831 8.828892
## 0.15 14.10187 0.4392581 9.776441
## 0.20 15.02759 0.4202044 10.174406
## 0.25 15.08830 0.4083345 10.103200
## 0.30 15.14589 0.4052072 10.229122
## 0.35 15.18624 0.4047464 10.293555
## 0.40 15.35506 0.3980124 10.405897
## 0.45 15.58076 0.3886588 10.627774
## 0.50 15.84203 0.3764416 10.839115
##
## Rsquared was used to select the optimal model using the largest value.
## The final value used for the model was fraction = 0.05.
plot(lassoFit)
set.seed(1)
enetFit <- train(x=x_train,
y=y_train,
method='enet',
metric='Rsquared',
tuneGrid=expand.grid(.fraction = seq(0, 1, by=0.1),
.lambda = seq(0, 1, by=0.1)),
trControl=trainControl(method='cv'),
preProcess=c('center','scale')
)
enetFit
## Elasticnet
##
## 133 samples
## 388 predictors
##
## Pre-processing: centered (388), scaled (388)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 120, 120, 120, 119, 120, 119, ...
## Resampling results across tuning parameters:
##
## lambda fraction RMSE Rsquared MAE
## 0.0 0.0 15.41970 NaN 12.091419
## 0.0 0.1 11.61723 0.4611141 8.257619
## 0.0 0.2 12.14064 0.4505336 8.783101
## 0.0 0.3 12.78913 0.4447343 9.268132
## 0.0 0.4 13.58610 0.4374408 9.827298
## 0.0 0.5 14.48706 0.4210673 10.382041
## 0.0 0.6 15.26000 0.4102524 10.671864
## 0.0 0.7 15.93989 0.4048366 10.913214
## 0.0 0.8 16.69682 0.3992267 11.321393
## 0.0 0.9 17.48924 0.3871120 11.763618
## 0.0 1.0 18.58494 0.3778306 12.330318
## 0.1 0.0 15.41970 NaN 12.091419
## 0.1 0.1 11.35996 0.4915684 8.017335
## 0.1 0.2 11.27531 0.5015887 8.043442
## 0.1 0.3 11.20513 0.5110566 8.027647
## 0.1 0.4 11.26273 0.5110763 8.129069
## 0.1 0.5 11.49214 0.5022354 8.305879
## 0.1 0.6 11.71000 0.4927793 8.430194
## 0.1 0.7 11.90351 0.4878547 8.538342
## 0.1 0.8 12.11296 0.4819953 8.661963
## 0.1 0.9 12.33892 0.4753494 8.822856
## 0.1 1.0 12.55343 0.4696305 8.992569
## 0.2 0.0 15.41970 NaN 12.091419
## 0.2 0.1 11.29819 0.4971055 8.081225
## 0.2 0.2 11.34551 0.5072120 7.928544
## 0.2 0.3 11.28566 0.5174924 7.990192
## 0.2 0.4 11.30461 0.5220751 8.080592
## 0.2 0.5 11.46627 0.5177649 8.298248
## 0.2 0.6 11.62510 0.5109649 8.433006
## 0.2 0.7 11.82620 0.5055078 8.564932
## 0.2 0.8 12.00071 0.5017981 8.651009
## 0.2 0.9 12.22749 0.4948431 8.813655
## 0.2 1.0 12.42486 0.4892806 8.962422
## 0.3 0.0 15.41970 NaN 12.091419
## 0.3 0.1 11.27466 0.5009573 8.091070
## 0.3 0.2 11.47359 0.5099557 7.881453
## 0.3 0.3 11.47396 0.5213435 8.063961
## 0.3 0.4 11.49929 0.5262336 8.167894
## 0.3 0.5 11.65714 0.5240146 8.410613
## 0.3 0.6 11.79819 0.5187685 8.587430
## 0.3 0.7 11.98769 0.5141657 8.760497
## 0.3 0.8 12.19169 0.5096006 8.902773
## 0.3 0.9 12.39220 0.5049634 9.058682
## 0.3 1.0 12.59417 0.4995325 9.233599
## 0.4 0.0 15.41970 NaN 12.091419
## 0.4 0.1 11.25404 0.5056775 8.080373
## 0.4 0.2 11.63823 0.5113009 7.858957
## 0.4 0.3 11.71973 0.5236318 8.190912
## 0.4 0.4 11.76623 0.5286027 8.344216
## 0.4 0.5 11.94615 0.5268705 8.576430
## 0.4 0.6 12.09957 0.5230824 8.833440
## 0.4 0.7 12.29631 0.5190477 9.046553
## 0.4 0.8 12.51110 0.5146867 9.232418
## 0.4 0.9 12.72124 0.5100153 9.400000
## 0.4 1.0 12.93584 0.5044749 9.573351
## 0.5 0.0 15.41970 NaN 12.091419
## 0.5 0.1 11.24428 0.5077260 8.072662
## 0.5 0.2 11.80485 0.5129863 7.828495
## 0.5 0.3 12.00793 0.5251863 8.351482
## 0.5 0.4 12.09280 0.5299907 8.561475
## 0.5 0.5 12.30083 0.5288039 8.822076
## 0.5 0.6 12.47467 0.5261406 9.117000
## 0.5 0.7 12.68591 0.5224979 9.369244
## 0.5 0.8 12.90625 0.5186852 9.569376
## 0.5 0.9 13.13479 0.5136638 9.763197
## 0.5 1.0 13.35206 0.5085455 9.954409
## 0.6 0.0 15.41970 NaN 12.091419
## 0.6 0.1 11.24510 0.5086957 8.058453
## 0.6 0.2 12.00165 0.5143831 7.848258
## 0.6 0.3 12.34253 0.5257645 8.540670
## 0.6 0.4 12.47138 0.5308852 8.805267
## 0.6 0.5 12.71386 0.5299948 9.132140
## 0.6 0.6 12.91580 0.5278109 9.424597
## 0.6 0.7 13.14201 0.5245287 9.706237
## 0.6 0.8 13.37205 0.5213038 9.919503
## 0.6 0.9 13.61274 0.5164102 10.144165
## 0.6 1.0 13.83953 0.5114326 10.351894
## 0.7 0.0 15.41970 NaN 12.091419
## 0.7 0.1 11.24866 0.5092526 8.040058
## 0.7 0.2 12.21310 0.5152180 7.909587
## 0.7 0.3 12.71338 0.5259876 8.746622
## 0.7 0.4 12.89518 0.5311812 9.102461
## 0.7 0.5 13.17641 0.5305080 9.466376
## 0.7 0.6 13.41332 0.5285888 9.771724
## 0.7 0.7 13.65402 0.5258617 10.054410
## 0.7 0.8 13.89839 0.5229935 10.301344
## 0.7 0.9 14.14595 0.5185389 10.538146
## 0.7 1.0 14.38595 0.5135687 10.753672
## 0.8 0.0 15.41970 NaN 12.091419
## 0.8 0.1 11.25438 0.5092670 8.022478
## 0.8 0.2 12.43212 0.5159349 7.999328
## 0.8 0.3 13.10814 0.5258492 8.960724
## 0.8 0.4 13.35914 0.5307945 9.435246
## 0.8 0.5 13.67805 0.5304771 9.816573
## 0.8 0.6 13.95180 0.5287587 10.124423
## 0.8 0.7 14.20887 0.5266314 10.431117
## 0.8 0.8 14.47080 0.5239292 10.705678
## 0.8 0.9 14.72472 0.5199382 10.947697
## 0.8 1.0 14.97598 0.5152163 11.169986
## 0.9 0.0 15.41970 NaN 12.091419
## 0.9 0.1 11.26398 0.5091136 8.000174
## 0.9 0.2 12.67388 0.5162232 8.143464
## 0.9 0.3 13.51715 0.5256316 9.245476
## 0.9 0.4 13.85627 0.5302268 9.805245
## 0.9 0.5 14.20852 0.5303995 10.197555
## 0.9 0.6 14.51987 0.5288872 10.527307
## 0.9 0.7 14.79832 0.5270899 10.845497
## 0.9 0.8 15.07545 0.5245864 11.125941
## 0.9 0.9 15.33779 0.5210748 11.373539
## 0.9 1.0 15.60098 0.5165384 11.612292
## 1.0 0.0 15.41970 NaN 12.091419
## 1.0 0.1 11.27752 0.5088380 7.975301
## 1.0 0.2 12.93414 0.5159634 8.304355
## 1.0 0.3 13.93146 0.5254618 9.522955
## 1.0 0.4 14.37678 0.5295917 10.179910
## 1.0 0.5 14.76285 0.5301598 10.590262
## 1.0 0.6 15.11221 0.5288528 10.965464
## 1.0 0.7 15.41130 0.5273473 11.279058
## 1.0 0.8 15.70466 0.5250636 11.577120
## 1.0 0.9 15.98121 0.5216979 11.869047
## 1.0 1.0 16.25240 0.5175524 12.137198
##
## Rsquared was used to select the optimal model using the largest value.
## The final values used for the model were fraction = 0.4 and lambda = 0.7.
plot(enetFit)
multiResample <- function(models, newdata, obs){
res = list()
methods = c()
i = 1
for (model in models){
pred <- predict(model, newdata=newdata)
metrics <- postResample(pred=pred, obs=obs)
res[[i]] <- metrics
methods[[i]] <- model$method
i <- 1 + i
}
names(res) <- methods
return(res)
}
models <- list(ridgeFit, lassoFit, enetFit)
(resampleResult <- multiResample(models, x_test, y_test))
## $ridge
## RMSE Rsquared MAE
## 17.4158936 0.5472254 12.8652027
##
## $lasso
## RMSE Rsquared MAE
## 12.9355400 0.3357307 8.6915998
##
## $enet
## RMSE Rsquared MAE
## 14.4535237 0.5122622 9.9411361
hist(permeability, col="lightyellow")
library(AppliedPredictiveModeling)
data(ChemicalManufacturingProcess)
str(ChemicalManufacturingProcess)
## 'data.frame': 176 obs. of 58 variables:
## $ Yield : num 38 42.4 42 41.4 42.5 ...
## $ BiologicalMaterial01 : num 6.25 8.01 8.01 8.01 7.47 6.12 7.48 6.94 6.94 6.94 ...
## $ BiologicalMaterial02 : num 49.6 61 61 61 63.3 ...
## $ BiologicalMaterial03 : num 57 67.5 67.5 67.5 72.2 ...
## $ BiologicalMaterial04 : num 12.7 14.6 14.6 14.6 14 ...
## $ BiologicalMaterial05 : num 19.5 19.4 19.4 19.4 17.9 ...
## $ BiologicalMaterial06 : num 43.7 53.1 53.1 53.1 54.7 ...
## $ BiologicalMaterial07 : num 100 100 100 100 100 100 100 100 100 100 ...
## $ BiologicalMaterial08 : num 16.7 19 19 19 18.2 ...
## $ BiologicalMaterial09 : num 11.4 12.6 12.6 12.6 12.8 ...
## $ BiologicalMaterial10 : num 3.46 3.46 3.46 3.46 3.05 3.78 3.04 3.85 3.85 3.85 ...
## $ BiologicalMaterial11 : num 138 154 154 154 148 ...
## $ BiologicalMaterial12 : num 18.8 21.1 21.1 21.1 21.1 ...
## $ ManufacturingProcess01: num NA 0 0 0 10.7 12 11.5 12 12 12 ...
## $ ManufacturingProcess02: num NA 0 0 0 0 0 0 0 0 0 ...
## $ ManufacturingProcess03: num NA NA NA NA NA NA 1.56 1.55 1.56 1.55 ...
## $ ManufacturingProcess04: num NA 917 912 911 918 924 933 929 928 938 ...
## $ ManufacturingProcess05: num NA 1032 1004 1015 1028 ...
## $ ManufacturingProcess06: num NA 210 207 213 206 ...
## $ ManufacturingProcess07: num NA 177 178 177 178 178 177 178 177 177 ...
## $ ManufacturingProcess08: num NA 178 178 177 178 178 178 178 177 177 ...
## $ ManufacturingProcess09: num 43 46.6 45.1 44.9 45 ...
## $ ManufacturingProcess10: num NA NA NA NA NA NA 11.6 10.2 9.7 10.1 ...
## $ ManufacturingProcess11: num NA NA NA NA NA NA 11.5 11.3 11.1 10.2 ...
## $ ManufacturingProcess12: num NA 0 0 0 0 0 0 0 0 0 ...
## $ ManufacturingProcess13: num 35.5 34 34.8 34.8 34.6 34 32.4 33.6 33.9 34.3 ...
## $ ManufacturingProcess14: num 4898 4869 4878 4897 4992 ...
## $ ManufacturingProcess15: num 6108 6095 6087 6102 6233 ...
## $ ManufacturingProcess16: num 4682 4617 4617 4635 4733 ...
## $ ManufacturingProcess17: num 35.5 34 34.8 34.8 33.9 33.4 33.8 33.6 33.9 35.3 ...
## $ ManufacturingProcess18: num 4865 4867 4877 4872 4886 ...
## $ ManufacturingProcess19: num 6049 6097 6078 6073 6102 ...
## $ ManufacturingProcess20: num 4665 4621 4621 4611 4659 ...
## $ ManufacturingProcess21: num 0 0 0 0 -0.7 -0.6 1.4 0 0 1 ...
## $ ManufacturingProcess22: num NA 3 4 5 8 9 1 2 3 4 ...
## $ ManufacturingProcess23: num NA 0 1 2 4 1 1 2 3 1 ...
## $ ManufacturingProcess24: num NA 3 4 5 18 1 1 2 3 4 ...
## $ ManufacturingProcess25: num 4873 4869 4897 4892 4930 ...
## $ ManufacturingProcess26: num 6074 6107 6116 6111 6151 ...
## $ ManufacturingProcess27: num 4685 4630 4637 4630 4684 ...
## $ ManufacturingProcess28: num 10.7 11.2 11.1 11.1 11.3 11.4 11.2 11.1 11.3 11.4 ...
## $ ManufacturingProcess29: num 21 21.4 21.3 21.3 21.6 21.7 21.2 21.2 21.5 21.7 ...
## $ ManufacturingProcess30: num 9.9 9.9 9.4 9.4 9 10.1 11.2 10.9 10.5 9.8 ...
## $ ManufacturingProcess31: num 69.1 68.7 69.3 69.3 69.4 68.2 67.6 67.9 68 68.5 ...
## $ ManufacturingProcess32: num 156 169 173 171 171 173 159 161 160 164 ...
## $ ManufacturingProcess33: num 66 66 66 68 70 70 65 65 65 66 ...
## $ ManufacturingProcess34: num 2.4 2.6 2.6 2.5 2.5 2.5 2.5 2.5 2.5 2.5 ...
## $ ManufacturingProcess35: num 486 508 509 496 468 490 475 478 491 488 ...
## $ ManufacturingProcess36: num 0.019 0.019 0.018 0.018 0.017 0.018 0.019 0.019 0.019 0.019 ...
## $ ManufacturingProcess37: num 0.5 2 0.7 1.2 0.2 0.4 0.8 1 1.2 1.8 ...
## $ ManufacturingProcess38: num 3 2 2 2 2 2 2 2 3 3 ...
## $ ManufacturingProcess39: num 7.2 7.2 7.2 7.2 7.3 7.2 7.3 7.3 7.4 7.1 ...
## $ ManufacturingProcess40: num NA 0.1 0 0 0 0 0 0 0 0 ...
## $ ManufacturingProcess41: num NA 0.15 0 0 0 0 0 0 0 0 ...
## $ ManufacturingProcess42: num 11.6 11.1 12 10.6 11 11.5 11.7 11.4 11.4 11.3 ...
## $ ManufacturingProcess43: num 3 0.9 1 1.1 1.1 2.2 0.7 0.8 0.9 0.8 ...
## $ ManufacturingProcess44: num 1.8 1.9 1.8 1.8 1.7 1.8 2 2 1.9 1.9 ...
## $ ManufacturingProcess45: num 2.4 2.2 2.3 2.1 2.1 2 2.2 2.2 2.1 2.4 ...
library(Amelia)
## Loading required package: Rcpp
## ##
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.6, built: 2019-11-24)
## ## Copyright (C) 2005-2020 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
missmap(ChemicalManufacturingProcess, col = c("red", "lightgreen"))
cmpImpute <- preProcess(ChemicalManufacturingProcess[,-c(1)], method=c('bagImpute'))
cmpImpute
## Created from 152 samples and 57 variables
##
## Pre-processing:
## - bagged tree imputation (57)
## - ignored (0)
cmp <- predict(cmpImpute, ChemicalManufacturingProcess[,-c(1)])
set.seed(43)
trainRow <- createDataPartition(ChemicalManufacturingProcess$Yield, p=0.8, list=FALSE)
x_train <- cmp[trainRow, ]
y_train <- ChemicalManufacturingProcess$Yield[trainRow]
x_test <- cmp[-trainRow, ]
y_test <- ChemicalManufacturingProcess$Yield[-trainRow]
set.seed(43)
enetFit <- train(x=x_train,
y=y_train,
method='enet',
metric='RMSE',
tuneGrid=expand.grid(.fraction = seq(0, 1, by=0.1),
.lambda = seq(0, 1, by=0.1)),
trControl=trainControl(method='cv'),
preProcess=c('center','scale')
)
enetFit
## Elasticnet
##
## 144 samples
## 57 predictor
##
## Pre-processing: centered (57), scaled (57)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 129, 128, 129, 130, 128, 132, ...
## Resampling results across tuning parameters:
##
## lambda fraction RMSE Rsquared MAE
## 0.0 0.0 1.837579 NaN 1.5027873
## 0.0 0.1 1.153943 0.6317800 0.9496645
## 0.0 0.2 1.191060 0.6415414 0.9760286
## 0.0 0.3 1.558044 0.5769679 1.1124035
## 0.0 0.4 1.562566 0.5612475 1.1299345
## 0.0 0.5 1.829464 0.5492713 1.2209208
## 0.0 0.6 1.974977 0.4871427 1.2844440
## 0.0 0.7 2.186136 0.4600333 1.3630239
## 0.0 0.8 2.578256 0.4428648 1.4911103
## 0.0 0.9 3.092682 0.4281391 1.6573366
## 0.0 1.0 3.586090 0.4161821 1.8128366
## 0.1 0.0 1.837579 NaN 1.5027873
## 0.1 0.1 1.475418 0.5827890 1.2125458
## 0.1 0.2 1.228611 0.6227175 1.0102915
## 0.1 0.3 1.151070 0.6460293 0.9345442
## 0.1 0.4 1.172960 0.6396914 0.9529404
## 0.1 0.5 1.162016 0.6473431 0.9507579
## 0.1 0.6 1.197514 0.6424282 0.9778761
## 0.1 0.7 1.261270 0.6274234 1.0017188
## 0.1 0.8 1.395986 0.6021707 1.0516632
## 0.1 0.9 1.516766 0.5861941 1.0933444
## 0.1 1.0 1.647637 0.5741893 1.1348207
## 0.2 0.0 1.837579 NaN 1.5027873
## 0.2 0.1 1.508063 0.5667096 1.2368413
## 0.2 0.2 1.258721 0.6215993 1.0383243
## 0.2 0.3 1.165234 0.6344964 0.9515930
## 0.2 0.4 1.159147 0.6437809 0.9467027
## 0.2 0.5 1.182903 0.6403331 0.9595233
## 0.2 0.6 1.237500 0.6300853 0.9918226
## 0.2 0.7 1.270587 0.6290096 1.0095032
## 0.2 0.8 1.330289 0.6163989 1.0320641
## 0.2 0.9 1.434970 0.6002883 1.0700967
## 0.2 1.0 1.538323 0.5875734 1.1035631
## 0.3 0.0 1.837579 NaN 1.5027873
## 0.3 0.1 1.518075 0.5588743 1.2445998
## 0.3 0.2 1.269082 0.6208368 1.0475042
## 0.3 0.3 1.169500 0.6295342 0.9563785
## 0.3 0.4 1.145713 0.6472907 0.9333795
## 0.3 0.5 1.190634 0.6392433 0.9645697
## 0.3 0.6 1.235301 0.6313341 0.9917801
## 0.3 0.7 1.278239 0.6303154 1.0159940
## 0.3 0.8 1.330843 0.6215903 1.0370288
## 0.3 0.9 1.373277 0.6146194 1.0537659
## 0.3 1.0 1.467398 0.6006674 1.0872921
## 0.4 0.0 1.837579 NaN 1.5027873
## 0.4 0.1 1.520080 0.5553824 1.2464751
## 0.4 0.2 1.271637 0.6199460 1.0497132
## 0.4 0.3 1.167954 0.6277479 0.9556347
## 0.4 0.4 1.150403 0.6425762 0.9284207
## 0.4 0.5 1.187015 0.6420124 0.9635872
## 0.4 0.6 1.223137 0.6378095 0.9906428
## 0.4 0.7 1.269705 0.6347668 1.0188585
## 0.4 0.8 1.313225 0.6299848 1.0363129
## 0.4 0.9 1.367931 0.6226255 1.0578202
## 0.4 1.0 1.428935 0.6140918 1.0829621
## 0.5 0.0 1.837579 NaN 1.5027873
## 0.5 0.1 1.518919 0.5530349 1.2457213
## 0.5 0.2 1.270707 0.6188545 1.0486297
## 0.5 0.3 1.168377 0.6249478 0.9556412
## 0.5 0.4 1.159409 0.6374973 0.9323243
## 0.5 0.5 1.193498 0.6417474 0.9684132
## 0.5 0.6 1.232305 0.6397542 0.9950645
## 0.5 0.7 1.266117 0.6406088 1.0222098
## 0.5 0.8 1.305402 0.6387772 1.0408881
## 0.5 0.9 1.356965 0.6336991 1.0608128
## 0.5 1.0 1.415950 0.6266753 1.0881101
## 0.6 0.0 1.837579 NaN 1.5027873
## 0.6 0.1 1.516084 0.5513297 1.2433701
## 0.6 0.2 1.268186 0.6176485 1.0457533
## 0.6 0.3 1.168988 0.6227975 0.9544037
## 0.6 0.4 1.170894 0.6327802 0.9411958
## 0.6 0.5 1.206688 0.6397143 0.9762511
## 0.6 0.6 1.249022 0.6393523 1.0025421
## 0.6 0.7 1.280771 0.6424090 1.0316929
## 0.6 0.8 1.319221 0.6427308 1.0521116
## 0.6 0.9 1.369020 0.6398181 1.0753518
## 0.6 1.0 1.424530 0.6354244 1.1029898
## 0.7 0.0 1.837579 NaN 1.5027873
## 0.7 0.1 1.512405 0.5500374 1.2401590
## 0.7 0.2 1.264887 0.6163458 1.0419766
## 0.7 0.3 1.170630 0.6204189 0.9534808
## 0.7 0.4 1.184041 0.6284292 0.9505349
## 0.7 0.5 1.223707 0.6367814 0.9851533
## 0.7 0.6 1.272073 0.6366965 1.0134607
## 0.7 0.7 1.309008 0.6400479 1.0448056
## 0.7 0.8 1.349375 0.6414351 1.0699311
## 0.7 0.9 1.398954 0.6400939 1.0949608
## 0.7 1.0 1.451618 0.6382590 1.1235148
## 0.8 0.0 1.837579 NaN 1.5027873
## 0.8 0.1 1.508176 0.5489991 1.2364588
## 0.8 0.2 1.260845 0.6150928 1.0374858
## 0.8 0.3 1.172634 0.6180908 0.9535600
## 0.8 0.4 1.197917 0.6245533 0.9598211
## 0.8 0.5 1.243611 0.6335187 0.9954982
## 0.8 0.6 1.300082 0.6329511 1.0380407
## 0.8 0.7 1.345932 0.6355565 1.0713355
## 0.8 0.8 1.390518 0.6370516 1.1007236
## 0.8 0.9 1.441642 0.6365300 1.1255091
## 0.8 1.0 1.493495 0.6361851 1.1513097
## 0.9 0.0 1.837579 NaN 1.5027873
## 0.9 0.1 1.503832 0.5478839 1.2326714
## 0.9 0.2 1.257085 0.6137412 1.0330921
## 0.9 0.3 1.175063 0.6159498 0.9534287
## 0.9 0.4 1.211042 0.6215597 0.9680753
## 0.9 0.5 1.265767 0.6300276 1.0113744
## 0.9 0.6 1.331053 0.6288177 1.0666057
## 0.9 0.7 1.387558 0.6307618 1.1083125
## 0.9 0.8 1.439188 0.6314905 1.1436082
## 0.9 0.9 1.493023 0.6314098 1.1729356
## 0.9 1.0 1.545960 0.6317086 1.2040185
## 1.0 0.0 1.837579 NaN 1.5027873
## 1.0 0.1 1.499407 0.5468504 1.2287872
## 1.0 0.2 1.253831 0.6122372 1.0288687
## 1.0 0.3 1.177862 0.6141065 0.9532678
## 1.0 0.4 1.224033 0.6190712 0.9760137
## 1.0 0.5 1.289279 0.6265507 1.0299626
## 1.0 0.6 1.364109 0.6247438 1.0947809
## 1.0 0.7 1.430693 0.6266091 1.1432180
## 1.0 0.8 1.492621 0.6260592 1.1876569
## 1.0 0.9 1.550088 0.6261552 1.2232392
## 1.0 1.0 1.605355 0.6267026 1.2614448
##
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were fraction = 0.4 and lambda = 0.3.
plot(enetFit)
enet_Pred <- predict(enetFit, newdata=x_test)
(predResult <- postResample(pred=enet_Pred, obs=y_test))
## RMSE Rsquared MAE
## 1.2777021 0.5180167 1.0508293
coeffs <- predict.enet(enetFit$finalModel, s=enetFit$bestTune[1, "fraction"], type="coef", mode="fraction")$coefficients
coeffs
## BiologicalMaterial01 BiologicalMaterial02 BiologicalMaterial03
## 0.000000000 0.090381195 0.107121492
## BiologicalMaterial04 BiologicalMaterial05 BiologicalMaterial06
## 0.000000000 0.000000000 0.185876248
## BiologicalMaterial07 BiologicalMaterial08 BiologicalMaterial09
## -0.009778043 0.000000000 0.000000000
## BiologicalMaterial10 BiologicalMaterial11 BiologicalMaterial12
## 0.000000000 0.000000000 0.000000000
## ManufacturingProcess01 ManufacturingProcess02 ManufacturingProcess03
## 0.000000000 0.000000000 0.000000000
## ManufacturingProcess04 ManufacturingProcess05 ManufacturingProcess06
## 0.000000000 0.000000000 0.066438839
## ManufacturingProcess07 ManufacturingProcess08 ManufacturingProcess09
## 0.000000000 0.000000000 0.321989134
## ManufacturingProcess10 ManufacturingProcess11 ManufacturingProcess12
## 0.000000000 0.162601998 0.000000000
## ManufacturingProcess13 ManufacturingProcess14 ManufacturingProcess15
## -0.250483963 0.000000000 0.002777623
## ManufacturingProcess16 ManufacturingProcess17 ManufacturingProcess18
## 0.000000000 -0.237350284 0.000000000
## ManufacturingProcess19 ManufacturingProcess20 ManufacturingProcess21
## 0.000000000 0.000000000 0.000000000
## ManufacturingProcess22 ManufacturingProcess23 ManufacturingProcess24
## 0.000000000 0.000000000 0.000000000
## ManufacturingProcess25 ManufacturingProcess26 ManufacturingProcess27
## 0.000000000 0.000000000 0.000000000
## ManufacturingProcess28 ManufacturingProcess29 ManufacturingProcess30
## 0.000000000 0.000000000 0.015616362
## ManufacturingProcess31 ManufacturingProcess32 ManufacturingProcess33
## 0.000000000 0.597941780 0.027730790
## ManufacturingProcess34 ManufacturingProcess35 ManufacturingProcess36
## 0.073053036 0.000000000 -0.341087523
## ManufacturingProcess37 ManufacturingProcess38 ManufacturingProcess39
## -0.084284710 0.000000000 0.105507516
## ManufacturingProcess40 ManufacturingProcess41 ManufacturingProcess42
## 0.000000000 0.000000000 0.015801489
## ManufacturingProcess43 ManufacturingProcess44 ManufacturingProcess45
## 0.000000000 0.052531419 0.000000000
coeffs.sorted <- abs(coeffs)
coeffs.sorted <- coeffs.sorted[coeffs.sorted>0]
(coeffs.sorted <- sort(coeffs.sorted, decreasing = T))
## ManufacturingProcess32 ManufacturingProcess36 ManufacturingProcess09
## 0.597941780 0.341087523 0.321989134
## ManufacturingProcess13 ManufacturingProcess17 BiologicalMaterial06
## 0.250483963 0.237350284 0.185876248
## ManufacturingProcess11 BiologicalMaterial03 ManufacturingProcess39
## 0.162601998 0.107121492 0.105507516
## BiologicalMaterial02 ManufacturingProcess37 ManufacturingProcess34
## 0.090381195 0.084284710 0.073053036
## ManufacturingProcess06 ManufacturingProcess44 ManufacturingProcess33
## 0.066438839 0.052531419 0.027730790
## ManufacturingProcess42 ManufacturingProcess30 BiologicalMaterial07
## 0.015801489 0.015616362 0.009778043
## ManufacturingProcess15
## 0.002777623
(temp <- varImp(enetFit))
## loess r-squared variable importance
##
## only 20 most important variables shown (out of 57)
##
## Overall
## ManufacturingProcess32 100.00
## BiologicalMaterial06 86.37
## ManufacturingProcess13 81.45
## ManufacturingProcess36 79.55
## BiologicalMaterial02 72.25
## BiologicalMaterial03 71.20
## BiologicalMaterial12 69.06
## ManufacturingProcess17 66.66
## ManufacturingProcess31 65.89
## ManufacturingProcess09 64.72
## ManufacturingProcess06 53.51
## ManufacturingProcess33 52.91
## BiologicalMaterial11 51.56
## BiologicalMaterial04 49.23
## ManufacturingProcess11 47.70
## BiologicalMaterial08 45.57
## ManufacturingProcess30 43.23
## BiologicalMaterial01 39.25
## ManufacturingProcess29 38.88
## ManufacturingProcess02 32.81
coeffs_mp <- coeffs.sorted[grep('ManufacturingProcess', names(coeffs.sorted))] %>% names() %>% coeffs[.]
coeffs_mp[coeffs_mp>0]
## ManufacturingProcess32 ManufacturingProcess09 ManufacturingProcess11
## 0.597941780 0.321989134 0.162601998
## ManufacturingProcess39 ManufacturingProcess34 ManufacturingProcess06
## 0.105507516 0.073053036 0.066438839
## ManufacturingProcess44 ManufacturingProcess33 ManufacturingProcess42
## 0.052531419 0.027730790 0.015801489
## ManufacturingProcess30 ManufacturingProcess15
## 0.015616362 0.002777623
coeffs_mp[coeffs_mp<0]
## ManufacturingProcess36 ManufacturingProcess13 ManufacturingProcess17
## -0.34108752 -0.25048396 -0.23735028
## ManufacturingProcess37
## -0.08428471