#Access the required library
#Required packages
library(AppliedPredictiveModeling)
library(lattice)
library(caret)
## Loading required package: ggplot2
library(corrplot)
## corrplot 0.95 loaded
library(e1071)
##
## Attaching package: 'e1071'
## The following object is masked from 'package:ggplot2':
##
## element
library(pls)
##
## Attaching package: 'pls'
## The following object is masked from 'package:corrplot':
##
## corrplot
## The following object is masked from 'package:caret':
##
## R2
## The following object is masked from 'package:stats':
##
## loadings
library(elasticnet)
## Loading required package: lars
## Loaded lars 1.3
#Access the solubility data
data(solubility)
#http://127.0.0.1:27533/library/AppliedPredictiveModeling/html/solubility.html
#This data contain traing data
str(solTrainY)
## num [1:951] -3.97 -3.98 -3.99 -4 -4.06 -4.08 -4.08 -4.1 -4.1 -4.11 ...
dim(solTrainX)
## [1] 951 228
##Test data
head(solTestY)
## [1] 0.93 0.85 0.81 0.74 0.61 0.58
head(solTestX)
## FP001 FP002 FP003 FP004 FP005 FP006 FP007 FP008 FP009 FP010 FP011 FP012
## 20 1 0 0 1 0 0 0 0 1 1 0 0
## 21 1 0 1 1 0 1 0 0 0 0 1 1
## 23 0 1 0 0 1 0 0 0 0 1 0 0
## 25 0 0 1 0 0 1 0 0 0 0 0 0
## 28 1 1 0 1 1 1 0 1 0 0 1 0
## 31 1 0 0 1 0 0 0 0 0 0 0 1
## FP013 FP014 FP015 FP016 FP017 FP018 FP019 FP020 FP021 FP022 FP023 FP024
## 20 0 0 1 0 0 0 0 0 1 0 0 0
## 21 0 0 1 1 0 0 0 0 0 0 0 0
## 23 0 0 0 0 0 0 0 0 0 0 0 0
## 25 0 0 1 0 0 0 0 0 0 0 0 0
## 28 0 0 1 0 0 0 1 0 0 0 0 1
## 31 0 0 1 0 0 0 0 1 0 0 0 0
## FP025 FP026 FP027 FP028 FP029 FP030 FP031 FP032 FP033 FP034 FP035 FP036
## 20 1 0 0 0 0 0 0 0 0 0 0 0
## 21 0 0 0 0 0 0 0 0 0 0 0 0
## 23 0 0 0 0 0 0 0 1 1 0 0 0
## 25 0 0 1 1 0 1 0 0 0 0 0 0
## 28 0 0 0 0 0 0 0 0 0 0 0 0
## 31 0 0 0 0 0 0 0 0 0 0 0 0
## FP037 FP038 FP039 FP040 FP041 FP042 FP043 FP044 FP045 FP046 FP047 FP048
## 20 0 1 0 0 0 0 0 0 0 0 0 0
## 21 0 0 0 0 0 0 0 0 0 0 0 0
## 23 0 0 0 0 0 0 0 0 0 1 0 0
## 25 0 0 0 0 0 0 0 0 0 0 0 0
## 28 0 0 0 1 0 0 0 0 0 0 1 0
## 31 0 0 0 0 0 0 0 0 0 0 0 0
## FP049 FP050 FP051 FP052 FP053 FP054 FP055 FP056 FP057 FP058 FP059 FP060
## 20 0 0 0 0 0 0 0 0 0 0 0 1
## 21 0 0 0 0 0 0 0 0 0 0 0 1
## 23 0 0 0 0 0 0 1 0 0 0 0 1
## 25 0 0 0 0 0 0 0 0 0 0 0 0
## 28 0 0 0 0 0 0 0 0 0 0 0 0
## 31 0 0 0 0 0 0 0 0 0 0 0 1
## FP061 FP062 FP063 FP064 FP065 FP066 FP067 FP068 FP069 FP070 FP071 FP072
## 20 1 1 0 1 0 0 0 0 0 0 0 1
## 21 1 1 1 1 0 1 1 1 0 0 0 1
## 23 1 0 0 0 1 0 0 0 0 0 0 1
## 25 0 0 1 0 0 1 1 1 0 0 0 0
## 28 0 1 1 0 0 0 1 1 0 0 0 1
## 31 1 1 0 0 0 1 0 0 0 0 0 1
## FP073 FP074 FP075 FP076 FP077 FP078 FP079 FP080 FP081 FP082 FP083 FP084
## 20 1 0 0 0 0 0 0 1 0 0 0 0
## 21 0 0 1 0 0 0 0 1 0 0 1 0
## 23 1 1 0 0 0 1 1 0 0 1 0 0
## 25 0 0 1 0 1 0 1 1 1 0 1 1
## 28 0 0 0 0 0 0 1 0 0 1 1 1
## 31 0 0 0 0 0 0 0 0 0 0 0 0
## FP085 FP086 FP087 FP088 FP089 FP090 FP091 FP092 FP093 FP094 FP095 FP096
## 20 0 0 0 1 0 0 0 0 0 0 0 0
## 21 0 0 0 0 0 0 0 0 0 1 0 0
## 23 0 0 1 0 0 0 0 0 0 0 1 0
## 25 0 1 1 0 0 1 1 0 1 0 1 0
## 28 0 0 1 0 0 0 1 0 0 0 0 0
## 31 0 0 0 0 0 0 0 0 0 0 0 0
## FP097 FP098 FP099 FP100 FP101 FP102 FP103 FP104 FP105 FP106 FP107 FP108
## 20 0 1 0 0 0 0 0 0 0 0 1 0
## 21 0 1 0 1 0 0 0 0 0 0 0 1
## 23 0 0 0 0 0 0 0 0 0 0 0 0
## 25 0 0 0 0 1 0 1 1 0 0 0 0
## 28 0 0 0 1 1 0 0 0 0 0 0 0
## 31 0 1 0 0 0 0 0 0 0 0 0 1
## FP109 FP110 FP111 FP112 FP113 FP114 FP115 FP116 FP117 FP118 FP119 FP120
## 20 0 0 0 0 0 0 0 0 0 1 0 0
## 21 0 1 1 0 1 0 0 0 0 0 0 1
## 23 0 0 0 0 0 0 0 0 0 0 1 0
## 25 0 0 0 0 0 0 0 1 0 1 0 0
## 28 0 1 0 0 0 0 0 1 0 0 0 0
## 31 0 0 0 0 0 0 0 0 0 0 0 0
## FP121 FP122 FP123 FP124 FP125 FP126 FP127 FP128 FP129 FP130 FP131 FP132
## 20 0 0 0 0 0 0 0 0 0 0 0 0
## 21 0 0 0 0 0 0 0 0 0 0 0 1
## 23 0 0 0 0 1 0 0 0 0 0 0 0
## 25 0 1 0 0 0 0 0 0 0 0 0 0
## 28 0 0 0 0 0 1 0 1 0 0 1 1
## 31 0 0 0 1 0 0 0 0 0 0 0 0
## FP133 FP134 FP135 FP136 FP137 FP138 FP139 FP140 FP141 FP142 FP143 FP144
## 20 0 0 0 0 0 0 0 0 0 0 0 0
## 21 0 0 0 0 0 0 0 0 0 0 0 0
## 23 0 0 0 0 0 0 0 0 0 0 0 0
## 25 0 0 1 0 0 1 0 0 0 1 0 1
## 28 0 0 0 1 0 0 0 0 0 0 0 0
## 31 0 0 0 0 0 0 0 0 0 0 0 0
## FP145 FP146 FP147 FP148 FP149 FP150 FP151 FP152 FP153 FP154 FP155 FP156
## 20 0 0 0 0 0 0 0 0 0 0 0 0
## 21 0 0 0 0 0 0 0 0 0 0 0 0
## 23 0 0 0 1 0 0 0 0 0 0 0 0
## 25 0 0 0 0 0 1 0 0 0 0 0 0
## 28 0 0 0 0 0 0 0 0 0 0 0 0
## 31 0 0 0 0 0 0 0 0 0 0 0 0
## FP157 FP158 FP159 FP160 FP161 FP162 FP163 FP164 FP165 FP166 FP167 FP168
## 20 0 0 0 0 0 0 1 0 1 0 1 0
## 21 0 0 0 0 0 0 1 0 1 0 1 0
## 23 0 0 0 0 0 1 0 1 0 1 0 1
## 25 0 0 0 0 1 0 0 0 0 0 0 0
## 28 0 0 0 0 0 1 1 1 1 1 1 1
## 31 0 0 0 0 0 0 1 0 0 0 0 0
## FP169 FP170 FP171 FP172 FP173 FP174 FP175 FP176 FP177 FP178 FP179 FP180
## 20 0 0 0 0 0 0 0 0 0 0 0 1
## 21 0 0 0 0 0 0 0 0 0 0 0 0
## 23 1 0 0 0 0 0 0 0 0 0 0 0
## 25 0 1 0 0 1 0 0 0 0 0 0 0
## 28 0 0 0 0 0 0 0 1 0 0 0 0
## 31 0 0 0 0 0 0 0 0 0 0 0 0
## FP181 FP182 FP183 FP184 FP185 FP186 FP187 FP188 FP189 FP190 FP191 FP192
## 20 0 0 0 0 0 0 0 0 0 0 0 0
## 21 0 0 0 0 0 0 0 0 0 0 0 0
## 23 0 0 1 0 0 0 0 0 0 0 0 0
## 25 0 0 0 0 0 0 0 0 0 0 0 0
## 28 0 0 0 0 0 0 0 0 0 0 0 0
## 31 0 0 0 0 0 1 0 0 0 0 1 0
## FP193 FP194 FP195 FP196 FP197 FP198 FP199 FP200 FP201 FP202 FP203 FP204
## 20 0 0 0 0 0 0 0 0 0 0 0 0
## 21 0 0 0 0 0 0 1 0 0 0 0 0
## 23 0 0 0 0 0 0 0 0 0 0 0 0
## 25 0 0 1 0 0 0 0 0 0 0 0 0
## 28 0 0 0 0 0 0 0 0 0 0 0 0
## 31 0 0 0 0 0 0 0 0 0 0 0 0
## FP205 FP206 FP207 FP208 MolWeight NumAtoms NumNonHAtoms NumBonds
## 20 0 0 0 0 94.50 8 5 7
## 21 0 0 0 0 89.11 13 6 12
## 23 1 0 0 0 110.12 14 8 14
## 25 0 0 0 0 100.19 19 7 19
## 28 0 0 0 0 122.14 15 9 15
## 31 0 0 0 0 60.06 8 4 7
## NumNonHBonds NumMultBonds NumRotBonds NumDblBonds NumAromaticBonds
## 20 4 1 0 1 0
## 21 5 1 2 1 0
## 23 8 6 0 0 6
## 25 7 0 0 0 0
## 28 9 7 1 1 6
## 31 3 1 1 1 0
## NumHydrogen NumCarbon NumNitrogen NumOxygen NumSulfer NumChlorine NumHalogen
## 20 3 2 0 2 0 1 1
## 21 7 3 1 2 0 0 0
## 23 6 6 0 2 0 0 0
## 25 12 5 2 0 0 0 0
## 28 6 6 2 1 0 0 0
## 31 4 2 0 2 0 0 0
## NumRings HydrophilicFactor SurfaceArea1 SurfaceArea2
## 20 0 0.492 37.30 37.30
## 21 0 1.317 52.32 52.32
## 23 1 0.846 40.46 40.46
## 25 1 0.984 24.06 24.06
## 28 1 0.843 55.98 55.98
## 31 0 -0.431 26.30 26.30
#Stage 1: Data pre-processing
### Some initial plots of the data
plot(solTrainY ~ solTrainX$MolWeight,
ylab = "Solubility (log)",
main = "(a)", col='blue',
xlab = "Molecular Weight")
fit = lm(solTrainY ~ solTrainX$MolWeight)
summary(fit)
##
## Call:
## lm(formula = solTrainY ~ solTrainX$MolWeight)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.5310 -0.8068 0.2151 0.9793 7.2695
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0664486 0.1182195 -0.562 0.574
## solTrainX$MolWeight -0.0131519 0.0005274 -24.936 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.592 on 949 degrees of freedom
## Multiple R-squared: 0.3958, Adjusted R-squared: 0.3952
## F-statistic: 621.8 on 1 and 949 DF, p-value: < 2.2e-16
abline(fit, col=2, lwd=2)
### correlation test for the relationship
### between solubility and molecular weight
cor.test(solTrainY,solTrainX$MolWeight)
##
## Pearson's product-moment correlation
##
## data: solTrainY and solTrainX$MolWeight
## t = -24.936, df = 949, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.6660933 -0.5891573
## sample estimates:
## cor
## -0.6291639
xyplot(solTrainY ~ solTrainX$NumRotBonds, type = c("p", "g"),
ylab = "Solubility (log)",
xlab = "Number of Rotatable Bonds")
#The function bwplot() makes box-and-whisker plots for numerical variables
bwplot(solTrainY ~ ifelse(solTrainX[,100] == 1,
"structure present",
"structure absent"),
ylab = "Solubility (log)",
main = "(b)",
horizontal = FALSE)
#The above examples showed that there exist strong correlations among #predictors, so how do we deal with significant correlations?
##We just use training set predictors after transformations for skewness and centering/scaling,
## which is solTrainXtrans
notFingerprints <- grep("FP", names(solTrainXtrans))
#Draw scatter plot for continuous predictors
featurePlot(solTrainXtrans[, -notFingerprints],
solTrainY,
between = list(x = 1, y = 1),
type = c("g", "p", "smooth"),
labels = rep("", 2))
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0059402
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.57302
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 2.4347e-15
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 0.32158
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0059402
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.57302
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 0.32158
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0059402
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.57302
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 0.32158
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0059402
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.57302
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 0.32158
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0059402
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.57302
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 2.4347e-15
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 0.32158
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0035397
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.46037
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 2.9968e-15
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 0.20869
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0035397
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.46037
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 0.20869
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0035397
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.46037
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 0.20869
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0035397
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.46037
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 6.0399e-16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 0.20869
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0035397
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.46037
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 2.9968e-15
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 0.20869
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 5.76e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 5.76e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 5.76e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 5.76e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 5.76e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 6.1471e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 6.1471e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 6.1471e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 6.1471e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 6.1471e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 6.1471e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 6.1471e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 6.1471e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 6.1471e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 6.1471e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
#library(corrplot)
#Draw the correlation matrix plot for the continous predictors
corrplot::corrplot(cor(solTrainXtrans[, -notFingerprints]),
order = "hclust",
tl.cex = .8)
#Remove high correlated predictors (cor>0.9)
tooHigh <- findCorrelation(cor(solTrainXtrans[, -notFingerprints]), .9)
corrplot::corrplot(cor(solTrainXtrans[, -notFingerprints][,-tooHigh]),
order = "hclust",
tl.cex = .8)
# Remove near zero variance predictors
nearZeroVar(solTrainXtrans)
## [1] 154 199 200
#There are three near zero variances whose index are 154 199 200
#Skewness
#library(e1071)
apply(solTrainXtrans[, -notFingerprints], 2, skewness)
## MolWeight NumAtoms NumNonHAtoms NumBonds
## -0.0002162255 -0.0713055864 -0.0555982369 -0.1881635919
## NumNonHBonds NumMultBonds NumRotBonds NumDblBonds
## 0.0432437801 -0.0946655258 0.0973568321 0.1496862360
## NumAromaticBonds NumHydrogen NumCarbon NumNitrogen
## -0.1463815894 -0.0407015938 0.0616443828 0.4262727429
## NumOxygen NumSulfer NumChlorine NumHalogen
## 0.1870352249 2.2707457390 1.4673236666 1.0331764976
## NumRings HydrophilicFactor SurfaceArea1 SurfaceArea2
## 0.0055727736 0.1003040295 -0.1316118434 -0.1681548297
#Box-Cox transformation
Original = as.matrix(solTrainXtrans[, -notFingerprints])
solTrainXtransBoxCox = BoxCoxTrans(Original)
solTrainXtransBoxCox
## Box-Cox Transformation
##
## 19020 data points used to estimate Lambda
##
## Input data summary:
## MolWeight NumAtoms NumNonHAtoms NumBonds
## Min. :3.852 Min. :1.792 Min. :1.099 Min. :1.609
## 1st Qu.:4.817 1st Qu.:2.890 1st Qu.:2.197 1st Qu.:2.890
## Median :5.194 Median :3.135 Median :2.565 Median :3.178
## Mean :5.199 Mean :3.174 Mean :2.549 Mean :3.176
## 3rd Qu.:5.581 3rd Qu.:3.466 3rd Qu.:2.890 3rd Qu.:3.481
## Max. :6.503 Max. :4.554 Max. :3.871 Max. :4.585
## NumNonHBonds NumMultBonds NumRotBonds NumDblBonds
## Min. :0.7435 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:2.7592 1st Qu.:0.7988 1st Qu.:0.0000 1st Qu.:0.0000
## Median :3.3514 Median :2.9448 Median :1.0986 Median :0.5671
## Mean :3.3623 Mean :2.5791 Mean :0.9256 Mean :0.3981
## 3rd Qu.:4.0099 3rd Qu.:4.0237 3rd Qu.:1.4979 3rd Qu.:0.8045
## Max. :5.9770 Max. :6.7030 Max. :2.8332 Max. :1.1880
## NumAromaticBonds NumHydrogen NumCarbon NumNitrogen
## Min. :0.000 Min. :0.000 Min. :0.7705 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:2.887 1st Qu.:2.6426 1st Qu.:0.0000
## Median :1.946 Median :3.691 Median :3.3175 Median :0.0000
## Mean :1.287 Mean :3.696 Mean :3.3240 Mean :0.2308
## 3rd Qu.:1.946 3rd Qu.:4.465 3rd Qu.:3.8622 3rd Qu.:0.4568
## Max. :3.258 Max. :7.314 Max. :6.2678 Max. :0.7079
## NumOxygen NumSulfer NumChlorine NumHalogen
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.6931 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.7470 Mean :0.04975 Mean :0.09098 Mean :0.1201
## 3rd Qu.:1.0986 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.3750
## Max. :2.6391 Max. :0.48000 Max. :0.49587 Max. :0.4959
## NumRings HydrophilicFactor SurfaceArea1 SurfaceArea2
## Min. :0.0000 Min. :-2.8413 Min. : 0.000 Min. : 0.000
## 1st Qu.:0.0000 1st Qu.:-1.2510 1st Qu.: 3.837 1st Qu.: 4.159
## Median :0.6931 Median :-0.3630 Median : 7.258 Median : 7.760
## Mean :0.7341 Mean :-0.4528 Mean : 6.708 Mean : 7.081
## 3rd Qu.:1.0986 3rd Qu.: 0.2799 3rd Qu.: 9.854 3rd Qu.:10.500
## Max. :2.0794 Max. : 3.5338 Max. :23.020 Max. :23.020
##
## Lambda could not be estimated; no transformation is applied
End of data preprocessing We work on the transformed predictor matrix: solTrainXtrans instead of the orginal training data: solTrainX
#Model building
#Linear Regression
### Create a control function that will be used across models. We
### create the fold assignments explicitly instead of relying on the
### random number seed being set to identical values.
set.seed(100)
#Create a series of test/training partitions
#default is 10, the funtion below creates 10 folder
indx <- createFolds(solTrainY, returnTrain = TRUE)
#control the computational nuances of the train function
ctrl <- trainControl(method = "cv", index = indx)
### Linear regression model with all of the predictors. This will
### produce some warnings that a 'rank-deficient fit may be
### misleading'. This is related to the predictors being so highly
### correlated that some of the math has broken down.
set.seed(100)
lmTune0 <- train(x = solTrainXtrans, y = solTrainY,
method = "lm",
trControl = ctrl)
## Warning in predict.lm(modelFit, newdata): prediction from rank-deficient fit;
## attr(*, "non-estim") has doubtful cases
## Warning in predict.lm(modelFit, newdata): prediction from rank-deficient fit;
## attr(*, "non-estim") has doubtful cases
lmTune0
## Linear Regression
##
## 951 samples
## 228 predictors
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 856, 855, 857, 856, 856, 855, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 0.7170016 0.8792751 0.5298775
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
summary(lmTune0) #provide regression coefficients
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.75620 -0.28304 0.01165 0.30030 1.54887
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.431e+00 2.162e+00 1.124 0.261303
## FP001 3.594e-01 3.185e-01 1.128 0.259635
## FP002 1.456e-01 2.637e-01 0.552 0.580960
## FP003 -3.969e-02 1.314e-01 -0.302 0.762617
## FP004 -3.049e-01 1.371e-01 -2.223 0.026520 *
## FP005 2.837e+00 9.598e-01 2.956 0.003223 **
## FP006 -6.886e-02 2.041e-01 -0.337 0.735917
## FP007 4.044e-02 1.152e-01 0.351 0.725643
## FP008 1.121e-01 1.636e-01 0.685 0.493331
## FP009 -8.242e-01 8.395e-01 -0.982 0.326536
## FP010 4.193e-01 3.136e-01 1.337 0.181579
## FP011 5.158e-02 2.198e-01 0.235 0.814503
## FP012 -1.346e-02 1.611e-01 -0.084 0.933452
## FP013 -4.519e-01 5.473e-01 -0.826 0.409311
## FP014 3.281e-01 4.550e-01 0.721 0.471044
## FP015 -1.839e-01 1.521e-01 -1.209 0.226971
## FP016 -1.367e-01 1.548e-01 -0.883 0.377340
## FP017 -1.704e-01 1.386e-01 -1.230 0.219187
## FP018 -3.824e-01 2.388e-01 -1.602 0.109655
## FP019 -3.131e-01 3.863e-01 -0.811 0.417862
## FP020 2.072e-01 2.135e-01 0.971 0.332078
## FP021 -5.956e-02 2.632e-01 -0.226 0.821060
## FP022 2.336e-01 3.456e-01 0.676 0.499180
## FP023 -3.193e-01 1.909e-01 -1.672 0.094866 .
## FP024 -4.272e-01 2.827e-01 -1.511 0.131162
## FP025 4.376e-01 4.538e-01 0.964 0.335184
## FP026 2.068e-01 2.564e-01 0.806 0.420273
## FP027 2.424e-01 2.429e-01 0.998 0.318594
## FP028 1.070e-01 1.200e-01 0.892 0.372547
## FP029 -9.857e-02 2.199e-01 -0.448 0.654163
## FP030 -2.361e-01 2.468e-01 -0.957 0.339048
## FP031 8.690e-02 1.346e-01 0.646 0.518754
## FP032 -1.204e+00 7.772e-01 -1.550 0.121628
## FP033 5.766e-01 4.236e-01 1.361 0.173882
## FP034 -1.794e-01 2.618e-01 -0.685 0.493486
## FP035 -2.140e-01 1.704e-01 -1.256 0.209605
## FP036 7.701e-02 1.657e-01 0.465 0.642133
## FP037 1.098e-01 1.725e-01 0.636 0.524693
## FP038 2.721e-01 1.888e-01 1.441 0.150030
## FP039 2.011e-02 2.888e-01 0.070 0.944491
## FP040 5.477e-01 1.890e-01 2.898 0.003873 **
## FP041 -4.265e-01 3.004e-01 -1.420 0.156143
## FP042 -9.901e-01 7.078e-01 -1.399 0.162294
## FP043 -3.725e-02 2.096e-01 -0.178 0.859011
## FP044 -3.860e-01 2.184e-01 -1.768 0.077562 .
## FP045 2.120e-01 1.299e-01 1.631 0.103238
## FP046 -3.504e-02 2.733e-01 -0.128 0.898010
## FP047 -1.675e-02 1.414e-01 -0.118 0.905775
## FP048 2.610e-01 2.434e-01 1.073 0.283810
## FP049 1.241e-01 1.971e-01 0.630 0.529036
## FP050 9.087e-03 1.410e-01 0.064 0.948648
## FP051 1.050e-01 2.014e-01 0.521 0.602210
## FP052 -4.569e-01 2.482e-01 -1.841 0.066029 .
## FP053 2.994e-01 2.466e-01 1.214 0.225129
## FP054 2.734e-02 1.829e-01 0.149 0.881229
## FP055 -3.662e-01 1.970e-01 -1.858 0.063530 .
## FP056 -2.961e-01 2.979e-01 -0.994 0.320541
## FP057 -1.002e-01 1.379e-01 -0.727 0.467703
## FP058 3.100e-01 8.074e-01 0.384 0.701129
## FP059 -1.615e-01 1.690e-01 -0.956 0.339514
## FP060 2.350e-01 1.474e-01 1.595 0.111209
## FP061 -6.365e-01 1.440e-01 -4.421 1.13e-05 ***
## FP062 -5.224e-01 2.961e-01 -1.764 0.078078 .
## FP063 -2.001e+00 1.287e+00 -1.554 0.120553
## FP064 2.549e-01 1.221e-01 2.087 0.037207 *
## FP065 -2.844e-01 1.197e-01 -2.377 0.017714 *
## FP066 2.093e-01 1.264e-01 1.655 0.098301 .
## FP067 -1.406e-01 1.540e-01 -0.913 0.361631
## FP068 4.964e-01 2.028e-01 2.447 0.014630 *
## FP069 1.324e-01 8.824e-02 1.501 0.133885
## FP070 3.453e-03 8.088e-02 0.043 0.965963
## FP071 1.474e-01 1.237e-01 1.192 0.233775
## FP072 -9.773e-01 2.763e-01 -3.537 0.000431 ***
## FP073 -4.671e-01 2.072e-01 -2.254 0.024474 *
## FP074 1.793e-01 1.206e-01 1.487 0.137566
## FP075 1.231e-01 1.035e-01 1.188 0.235034
## FP076 5.166e-01 1.704e-01 3.031 0.002525 **
## FP077 1.644e-01 1.236e-01 1.331 0.183739
## FP078 -3.715e-01 1.588e-01 -2.339 0.019608 *
## FP079 4.254e-01 1.881e-01 2.262 0.023992 *
## FP080 3.101e-01 1.554e-01 1.996 0.046340 *
## FP081 -3.208e-01 1.117e-01 -2.873 0.004192 **
## FP082 1.243e-01 9.524e-02 1.305 0.192379
## FP083 -6.916e-01 2.134e-01 -3.241 0.001248 **
## FP084 3.626e-01 2.381e-01 1.523 0.128171
## FP085 -3.310e-01 1.428e-01 -2.317 0.020785 *
## FP086 1.169e-02 9.774e-02 0.120 0.904834
## FP087 4.559e-02 2.797e-01 0.163 0.870568
## FP088 2.416e-01 9.959e-02 2.425 0.015534 *
## FP089 5.999e-01 2.320e-01 2.586 0.009915 **
## FP090 -2.450e-02 1.154e-01 -0.212 0.831930
## FP091 -2.858e-01 3.185e-01 -0.897 0.369847
## FP092 2.665e-01 2.069e-01 1.288 0.198156
## FP093 1.974e-01 1.087e-01 1.816 0.069803 .
## FP094 -1.991e-01 1.441e-01 -1.381 0.167707
## FP095 -1.403e-01 1.124e-01 -1.248 0.212449
## FP096 -5.024e-01 1.459e-01 -3.445 0.000605 ***
## FP097 -2.635e-01 1.666e-01 -1.582 0.114020
## FP098 -2.865e-01 1.633e-01 -1.754 0.079863 .
## FP099 2.592e-01 2.568e-01 1.009 0.313136
## FP100 -4.008e-01 3.034e-01 -1.321 0.186949
## FP101 -1.760e-01 3.019e-01 -0.583 0.560147
## FP102 2.445e-01 3.449e-01 0.709 0.478579
## FP103 -1.493e-01 9.148e-02 -1.632 0.103176
## FP104 -1.428e-01 1.176e-01 -1.214 0.225238
## FP105 -6.912e-02 1.395e-01 -0.495 0.620482
## FP106 1.128e-01 1.288e-01 0.876 0.381495
## FP107 2.778e+00 8.247e-01 3.369 0.000796 ***
## FP108 8.836e-03 1.852e-01 0.048 0.961970
## FP109 8.200e-01 2.267e-01 3.617 0.000319 ***
## FP110 3.680e-01 3.311e-01 1.111 0.266811
## FP111 -5.565e-01 1.420e-01 -3.918 9.80e-05 ***
## FP112 -1.079e-01 2.705e-01 -0.399 0.690108
## FP113 1.511e-01 9.481e-02 1.594 0.111478
## FP114 -1.201e-01 1.891e-01 -0.635 0.525628
## FP115 -1.896e-01 1.405e-01 -1.349 0.177736
## FP116 7.778e-03 1.897e-01 0.041 0.967300
## FP117 2.583e-01 1.779e-01 1.452 0.147070
## FP118 -1.964e-01 1.230e-01 -1.596 0.110940
## FP119 7.515e-01 2.630e-01 2.857 0.004402 **
## FP120 -1.814e-01 1.794e-01 -1.011 0.312362
## FP121 -4.731e-02 3.957e-01 -0.120 0.904866
## FP122 1.048e-01 1.041e-01 1.007 0.314268
## FP123 3.926e-02 1.765e-01 0.222 0.824066
## FP124 1.235e-01 1.705e-01 0.724 0.469243
## FP125 -2.633e-04 1.151e-01 -0.002 0.998175
## FP126 -2.782e-01 1.177e-01 -2.363 0.018373 *
## FP127 -6.123e-01 1.739e-01 -3.521 0.000457 ***
## FP128 -5.424e-01 1.932e-01 -2.807 0.005136 **
## FP129 -6.731e-02 2.243e-01 -0.300 0.764167
## FP130 -1.034e+00 4.106e-01 -2.518 0.012009 *
## FP131 2.158e-01 1.617e-01 1.335 0.182405
## FP132 -1.976e-01 2.382e-01 -0.830 0.406998
## FP133 -1.573e-01 1.217e-01 -1.293 0.196319
## FP134 2.496e+00 1.196e+00 2.086 0.037310 *
## FP135 1.818e-01 1.319e-01 1.379 0.168460
## FP136 -7.763e-02 3.131e-01 -0.248 0.804237
## FP137 -4.613e-02 2.978e-01 -0.155 0.876947
## FP138 -9.392e-02 1.906e-01 -0.493 0.622251
## FP139 7.659e-02 4.063e-01 0.189 0.850517
## FP140 3.145e-01 2.149e-01 1.463 0.143784
## FP141 2.219e-01 2.765e-01 0.802 0.422532
## FP142 6.272e-01 1.488e-01 4.214 2.83e-05 ***
## FP143 9.981e-01 2.929e-01 3.407 0.000692 ***
## FP144 2.207e-01 2.839e-01 0.777 0.437195
## FP145 -1.146e-01 1.188e-01 -0.964 0.335169
## FP146 -2.324e-01 2.086e-01 -1.114 0.265716
## FP147 1.502e-01 1.228e-01 1.223 0.221703
## FP148 -1.600e-01 1.319e-01 -1.213 0.225560
## FP149 1.172e-01 1.650e-01 0.710 0.477770
## FP150 9.046e-02 1.577e-01 0.574 0.566368
## FP151 2.899e-01 3.120e-01 0.929 0.353202
## FP152 -2.544e-01 2.990e-01 -0.851 0.395087
## FP153 -3.765e-01 2.773e-01 -1.358 0.175029
## FP154 -1.027e+00 2.033e-01 -5.054 5.50e-07 ***
## FP155 4.888e-01 2.916e-01 1.676 0.094163 .
## FP156 -3.602e-02 3.636e-01 -0.099 0.921109
## FP157 -4.715e-01 2.468e-01 -1.910 0.056505 .
## FP158 1.669e-02 1.925e-01 0.087 0.930943
## FP159 1.800e-01 2.432e-01 0.740 0.459378
## FP160 1.525e-02 2.177e-01 0.070 0.944155
## FP161 -2.440e-01 1.433e-01 -1.703 0.089063 .
## FP162 4.910e-02 1.859e-01 0.264 0.791710
## FP163 4.785e-01 3.121e-01 1.533 0.125659
## FP164 5.096e-01 1.899e-01 2.684 0.007446 **
## FP165 5.793e-01 2.146e-01 2.700 0.007103 **
## FP166 -6.582e-02 2.185e-01 -0.301 0.763293
## FP167 -6.044e-01 2.515e-01 -2.403 0.016502 *
## FP168 -1.187e-01 1.872e-01 -0.634 0.526173
## FP169 -1.705e-01 8.312e-02 -2.051 0.040650 *
## FP170 -7.902e-02 1.560e-01 -0.506 0.612745
## FP171 4.651e-01 1.186e-01 3.922 9.64e-05 ***
## FP172 -4.426e-01 2.440e-01 -1.814 0.070120 .
## FP173 4.243e-01 1.657e-01 2.561 0.010634 *
## FP174 -1.010e-01 2.098e-01 -0.481 0.630311
## FP175 -4.657e-02 2.481e-01 -0.188 0.851136
## FP176 9.736e-01 2.644e-01 3.682 0.000249 ***
## FP177 1.386e-01 2.393e-01 0.579 0.562538
## FP178 6.497e-02 2.079e-01 0.313 0.754691
## FP179 -3.415e-02 2.232e-01 -0.153 0.878437
## FP180 -7.905e-01 5.523e-01 -1.431 0.152839
## FP181 4.925e-01 3.218e-01 1.531 0.126309
## FP182 -1.124e-01 1.310e-01 -0.858 0.391384
## FP183 2.998e-01 7.143e-01 0.420 0.674836
## FP184 4.876e-01 1.580e-01 3.087 0.002103 **
## FP185 -3.778e-01 2.037e-01 -1.854 0.064108 .
## FP186 -3.654e-01 1.953e-01 -1.871 0.061710 .
## FP187 4.457e-01 2.682e-01 1.662 0.097015 .
## FP188 1.475e-01 1.258e-01 1.172 0.241519
## FP189 -1.984e-02 3.468e-01 -0.057 0.954384
## FP190 2.629e-01 3.018e-01 0.871 0.383981
## FP191 2.799e-01 1.465e-01 1.911 0.056388 .
## FP192 -2.404e-01 2.751e-01 -0.874 0.382534
## FP193 1.502e-01 1.494e-01 1.005 0.315159
## FP194 8.029e-01 6.379e-01 1.259 0.208566
## FP195 5.967e-02 3.435e-01 0.174 0.862158
## FP196 1.091e-02 2.544e-01 0.043 0.965812
## FP197 -3.736e-02 1.569e-01 -0.238 0.811793
## FP198 1.896e-01 2.665e-01 0.712 0.476893
## FP199 -9.932e-02 1.797e-01 -0.553 0.580702
## FP200 -6.421e-02 2.161e-01 -0.297 0.766462
## FP201 -4.838e-01 1.980e-01 -2.444 0.014771 *
## FP202 5.664e-01 1.869e-01 3.031 0.002527 **
## FP203 2.586e-01 6.447e-01 0.401 0.688462
## FP204 -1.371e-01 2.543e-01 -0.539 0.590008
## FP205 7.177e-02 1.561e-01 0.460 0.645857
## FP206 -6.769e-02 1.860e-01 -0.364 0.716094
## FP207 -5.538e-03 2.060e-01 -0.027 0.978560
## FP208 -5.338e-01 6.324e-01 -0.844 0.398925
## MolWeight -1.232e+00 2.296e-01 -5.365 1.09e-07 ***
## NumAtoms -1.478e+01 3.473e+00 -4.257 2.35e-05 ***
## NumNonHAtoms 1.795e+01 3.166e+00 5.670 2.07e-08 ***
## NumBonds 9.843e+00 2.681e+00 3.671 0.000260 ***
## NumNonHBonds -1.030e+01 1.793e+00 -5.746 1.35e-08 ***
## NumMultBonds 2.107e-01 1.754e-01 1.201 0.229990
## NumRotBonds -5.213e-01 1.334e-01 -3.908 0.000102 ***
## NumDblBonds -7.492e-01 3.163e-01 -2.369 0.018111 *
## NumAromaticBonds -2.364e+00 6.232e-01 -3.794 0.000161 ***
## NumHydrogen 8.347e-01 1.880e-01 4.439 1.04e-05 ***
## NumCarbon 1.730e-02 3.763e-01 0.046 0.963335
## NumNitrogen 6.125e+00 3.045e+00 2.011 0.044645 *
## NumOxygen 2.389e+00 4.523e-01 5.283 1.69e-07 ***
## NumSulfer -8.508e+00 3.619e+00 -2.351 0.018994 *
## NumChlorine -7.449e+00 1.989e+00 -3.744 0.000195 ***
## NumHalogen 1.408e+00 2.109e+00 0.668 0.504615
## NumRings 1.276e+00 6.716e-01 1.901 0.057731 .
## HydrophilicFactor 1.099e-02 1.137e-01 0.097 0.922998
## SurfaceArea1 8.825e-02 6.058e-02 1.457 0.145643
## SurfaceArea2 9.555e-02 5.615e-02 1.702 0.089208 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5524 on 722 degrees of freedom
## Multiple R-squared: 0.9446, Adjusted R-squared: 0.9271
## F-statistic: 54.03 on 228 and 722 DF, p-value: < 2.2e-16
### Save the test set results in a data frame
testResults <- data.frame(obs = solTestY,
Linear_Regression = predict(lmTune0, solTestXtrans))
### And another using a set of predictors reduced by unsupervised
### filtering. We apply a filter to reduce extreme between-predictor
### correlations. Note the lack of warnings.
tooHigh <- findCorrelation(cor(solTrainXtrans), .9)
trainXfiltered <- solTrainXtrans[, -tooHigh]
testXfiltered <- solTestXtrans[, -tooHigh]
set.seed(100)
lmTune <- train(x = trainXfiltered, y = solTrainY,
method = "lm",
trControl = ctrl)
lmTune
## Linear Regression
##
## 951 samples
## 190 predictors
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 856, 855, 857, 856, 856, 855, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 0.7112743 0.8805826 0.53382
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
### Save the test set results in a data frame
testResults1 <- data.frame(obs = solTestY,
Linear_Regression = predict(lmTune, solTestXtrans))
# Run PLS and PCR on solubility data and compare results
#library(pls)
set.seed(100)
plsTune <- train(x = solTrainXtrans, y = solTrainY,
method = "pls",
tuneGrid = expand.grid(ncomp = 1:50),
trControl = ctrl)
plsTune
## Partial Least Squares
##
## 951 samples
## 228 predictors
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 856, 855, 857, 856, 856, 855, ...
## Resampling results across tuning parameters:
##
## ncomp RMSE Rsquared MAE
## 1 1.7567200 0.2656794 1.4372895
## 2 1.2744189 0.6113687 0.9891602
## 3 1.0369849 0.7435114 0.8042904
## 4 0.8333567 0.8331804 0.6371070
## 5 0.7465669 0.8658913 0.5714154
## 6 0.7126571 0.8780661 0.5479709
## 7 0.6964672 0.8845762 0.5358792
## 8 0.6902700 0.8867464 0.5304327
## 9 0.6866134 0.8878356 0.5276338
## 10 0.6859736 0.8882368 0.5248576
## 11 0.6846168 0.8886602 0.5267211
## 12 0.6836218 0.8891027 0.5190374
## 13 0.6838654 0.8892094 0.5210315
## 14 0.6807350 0.8901425 0.5173906
## 15 0.6826661 0.8896885 0.5183259
## 16 0.6819079 0.8899369 0.5166051
## 17 0.6803845 0.8903528 0.5153813
## 18 0.6835355 0.8894978 0.5191376
## 19 0.6874578 0.8884091 0.5213042
## 20 0.6876702 0.8881465 0.5210240
## 21 0.6906650 0.8871996 0.5211997
## 22 0.6913664 0.8870307 0.5226134
## 23 0.6955030 0.8857345 0.5260189
## 24 0.7004351 0.8842887 0.5303128
## 25 0.7036220 0.8832878 0.5305845
## 26 0.7070175 0.8820762 0.5334096
## 27 0.7099186 0.8810620 0.5354840
## 28 0.7119219 0.8803695 0.5340200
## 29 0.7148858 0.8794524 0.5357332
## 30 0.7179197 0.8784303 0.5372692
## 31 0.7205870 0.8775123 0.5393194
## 32 0.7226853 0.8768418 0.5407595
## 33 0.7228866 0.8767838 0.5398822
## 34 0.7248003 0.8762652 0.5409011
## 35 0.7252532 0.8760801 0.5399395
## 36 0.7248396 0.8762991 0.5394633
## 37 0.7261218 0.8758859 0.5395361
## 38 0.7264810 0.8757825 0.5391117
## 39 0.7275604 0.8754313 0.5392966
## 40 0.7290232 0.8749157 0.5402907
## 41 0.7299098 0.8746315 0.5411541
## 42 0.7303183 0.8744295 0.5409782
## 43 0.7300602 0.8744625 0.5405539
## 44 0.7305048 0.8742995 0.5409475
## 45 0.7305013 0.8742489 0.5413010
## 46 0.7305273 0.8742750 0.5417646
## 47 0.7316553 0.8738556 0.5416701
## 48 0.7318788 0.8737642 0.5424856
## 49 0.7328515 0.8734481 0.5423624
## 50 0.7333185 0.8731901 0.5434880
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was ncomp = 17.
plot(plsTune)
#prediction for test data
testResults$PLS <- predict(plsTune, solTestXtrans)
set.seed(100)
pcrTune <- train(x = solTrainXtrans, y = solTrainY,
method = "pcr",
tuneGrid = expand.grid(ncomp = 1:50),
trControl = ctrl)
pcrTune
## Principal Component Analysis
##
## 951 samples
## 228 predictors
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 856, 855, 857, 856, 856, 855, ...
## Resampling results across tuning parameters:
##
## ncomp RMSE Rsquared MAE
## 1 1.9823303 0.07290019 1.5869212
## 2 1.6398355 0.36081511 1.2810055
## 3 1.3709076 0.55235719 1.0618538
## 4 1.3758521 0.54925157 1.0643075
## 5 1.3450879 0.56844597 1.0301765
## 6 1.2140930 0.64989532 0.9247502
## 7 1.1797702 0.67070995 0.8998390
## 8 1.1408942 0.69159641 0.8737832
## 9 1.0447017 0.74097508 0.8198902
## 10 0.9846736 0.76688176 0.7629658
## 11 0.9682403 0.77484715 0.7481903
## 12 0.9649781 0.77613899 0.7452582
## 13 0.9511715 0.78261820 0.7380502
## 14 0.9382676 0.78854541 0.7304692
## 15 0.9391399 0.78813473 0.7305274
## 16 0.8683560 0.81866216 0.6639231
## 17 0.8697473 0.81806492 0.6647760
## 18 0.8710394 0.81743620 0.6652573
## 19 0.8706819 0.81742338 0.6661038
## 20 0.8615899 0.82129313 0.6585646
## 21 0.8104091 0.84240747 0.6215034
## 22 0.8094659 0.84279178 0.6219602
## 23 0.8051823 0.84437865 0.6193311
## 24 0.8067893 0.84391465 0.6198146
## 25 0.8087502 0.84310259 0.6187339
## 26 0.8038728 0.84500751 0.6168930
## 27 0.8027071 0.84564671 0.6161959
## 28 0.8017716 0.84596907 0.6161530
## 29 0.7851503 0.85225581 0.6036835
## 30 0.7837446 0.85266381 0.6011218
## 31 0.7809536 0.85370837 0.5987260
## 32 0.7698948 0.85771024 0.5920064
## 33 0.7419912 0.86777296 0.5723061
## 34 0.7362850 0.86997471 0.5686044
## 35 0.7343025 0.87073561 0.5656401
## 36 0.7302412 0.87227541 0.5620624
## 37 0.7252496 0.87428725 0.5584212
## 38 0.7253938 0.87429060 0.5573722
## 39 0.7247813 0.87453779 0.5569706
## 40 0.7257744 0.87419673 0.5573496
## 41 0.7240053 0.87464218 0.5564440
## 42 0.7250126 0.87432020 0.5568892
## 43 0.7236723 0.87484057 0.5567977
## 44 0.7259451 0.87402482 0.5581918
## 45 0.7229274 0.87512646 0.5544893
## 46 0.7222965 0.87523134 0.5530671
## 47 0.7227040 0.87512338 0.5539032
## 48 0.7217612 0.87536963 0.5530784
## 49 0.7213154 0.87550719 0.5539427
## 50 0.7223208 0.87520400 0.5543803
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was ncomp = 49.
plot(pcrTune)
plsResamples <- plsTune$results
plsResamples$Model <- "PLS"
pcrResamples <- pcrTune$results
pcrResamples$Model <- "PCR"
plsPlotData <- rbind(plsResamples, pcrResamples)
xyplot(RMSE ~ ncomp,
data = plsPlotData,
#aspect = 1,
xlab = "# Components",
ylab = "RMSE (Cross-Validation)",
auto.key = list(columns = 2),
groups = Model,
type = c("o", "g"))
#Predictor importance plot for PLS model
plsImp <- varImp(plsTune, scale = FALSE)
plot(plsImp, top = 25, scales = list(y = list(cex = .95)))
#Predictor importance plot for PCR model
pcrImp <- varImp(pcrTune, scale = FALSE)
plot(pcrImp, top = 25, scales = list(y = list(cex = .95)))
# The text used the elasticnet to obtain a ridge regression model.
## There is now a simple ridge regression method.
#you may need to try different ranges of values for lambda
ridgeGrid <- expand.grid(lambda = seq(0, .1, length = 10))
### Start the clock to track time!
##The following codes takes 94.49 seconds to run.
## Your running time may be different depending
## on your cpu.
set.seed(100) #it may take a while to get results
ptm <- proc.time()
#library(elasticnet)
ridgeTune <- train(x = solTrainXtrans, y = solTrainY,
method = "ridge",
tuneGrid = ridgeGrid,
trControl = ctrl,
preProc = c("center", "scale"))
proc.time() - ptm
## user system elapsed
## 94.00 20.07 129.33
ridgeTune
## Ridge Regression
##
## 951 samples
## 228 predictors
##
## Pre-processing: centered (228), scaled (228)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 856, 855, 857, 856, 856, 855, ...
## Resampling results across tuning parameters:
##
## lambda RMSE Rsquared MAE
## 0.00000000 0.7169416 0.8793300 0.5298092
## 0.01111111 0.6913735 0.8869082 0.5232301
## 0.02222222 0.6860086 0.8887434 0.5217135
## 0.03333333 0.6861042 0.8889600 0.5236471
## 0.04444444 0.6884148 0.8885888 0.5269698
## 0.05555556 0.6919052 0.8879560 0.5305611
## 0.06666667 0.6961413 0.8871974 0.5343997
## 0.07777778 0.7009117 0.8863787 0.5385735
## 0.08888889 0.7061028 0.8855344 0.5430394
## 0.10000000 0.7116491 0.8846838 0.5480085
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was lambda = 0.02222222.
#check the names of output in ridgeTune
names(ridgeTune)
## [1] "method" "modelInfo" "modelType" "results" "pred"
## [6] "bestTune" "call" "dots" "metric" "control"
## [11] "finalModel" "preProcess" "trainingData" "ptype" "resample"
## [16] "resampledCM" "perfNames" "maximize" "yLimits" "times"
## [21] "levels"
summary(ridgeTune)
## Length Class Mode
## call 4 -none- call
## actions 261 -none- list
## allset 228 -none- numeric
## beta.pure 59508 -none- numeric
## vn 228 -none- character
## mu 1 -none- numeric
## normx 228 -none- numeric
## meanx 228 -none- numeric
## lambda 1 -none- numeric
## L1norm 261 -none- numeric
## penalty 261 -none- numeric
## df 261 -none- numeric
## Cp 261 -none- numeric
## sigma2 1 -none- numeric
## xNames 228 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 1 -none- logical
## param 0 -none- list
#prediction for test data
testResults$Ridge <- predict(ridgeTune, solTestXtrans)
ptm <- proc.time()
enetGrid <- expand.grid(lambda = c(0, 0.01, .1),
fraction = seq(.05, 1, length = 20))
set.seed(100)
enetTune <- train(x = solTrainXtrans, y = solTrainY,
method = "enet",
tuneGrid = enetGrid,
trControl = ctrl,
preProc = c("center", "scale"))
proc.time() - ptm
## user system elapsed
## 31.66 6.41 50.28
### Stop the clock
#> proc.time() - ptm
# user system elapsed
# 34.31 0.48 34.89
enetTune
## Elasticnet
##
## 951 samples
## 228 predictors
##
## Pre-processing: centered (228), scaled (228)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 856, 855, 857, 856, 856, 855, ...
## Resampling results across tuning parameters:
##
## lambda fraction RMSE Rsquared MAE
## 0.00 0.05 0.8682135 0.8364159 0.6582205
## 0.00 0.10 0.6917546 0.8861422 0.5272791
## 0.00 0.15 0.6784548 0.8902877 0.5179525
## 0.00 0.20 0.6774425 0.8907722 0.5158738
## 0.00 0.25 0.6860148 0.8881153 0.5177716
## 0.00 0.30 0.6910045 0.8865193 0.5205346
## 0.00 0.35 0.6966432 0.8848125 0.5246741
## 0.00 0.40 0.7023375 0.8830881 0.5278990
## 0.00 0.45 0.7052740 0.8822021 0.5285138
## 0.00 0.50 0.7059831 0.8820328 0.5284400
## 0.00 0.55 0.7065024 0.8819233 0.5281518
## 0.00 0.60 0.7065114 0.8820017 0.5275396
## 0.00 0.65 0.7074118 0.8818045 0.5272275
## 0.00 0.70 0.7082961 0.8815922 0.5270556
## 0.00 0.75 0.7091942 0.8813839 0.5269874
## 0.00 0.80 0.7102416 0.8811257 0.5270035
## 0.00 0.85 0.7114872 0.8808167 0.5274405
## 0.00 0.90 0.7130500 0.8804047 0.5281161
## 0.00 0.95 0.7149097 0.8798966 0.5288774
## 0.00 1.00 0.7169416 0.8793300 0.5298092
## 0.01 0.05 1.5159564 0.6476354 1.1634495
## 0.01 0.10 1.1319979 0.7709849 0.8660515
## 0.01 0.15 0.9064708 0.8256600 0.6871343
## 0.01 0.20 0.7865777 0.8580364 0.6008124
## 0.01 0.25 0.7294129 0.8746693 0.5563058
## 0.01 0.30 0.7005759 0.8833681 0.5351833
## 0.01 0.35 0.6893917 0.8866891 0.5271555
## 0.01 0.40 0.6841276 0.8883835 0.5239676
## 0.01 0.45 0.6808591 0.8894713 0.5217575
## 0.01 0.50 0.6788489 0.8901183 0.5200594
## 0.01 0.55 0.6765076 0.8909486 0.5182244
## 0.01 0.60 0.6760440 0.8911677 0.5171115
## 0.01 0.65 0.6773673 0.8908049 0.5173328
## 0.01 0.70 0.6789357 0.8903792 0.5176779
## 0.01 0.75 0.6806104 0.8899249 0.5181852
## 0.01 0.80 0.6819248 0.8895841 0.5183361
## 0.01 0.85 0.6836298 0.8891156 0.5190363
## 0.01 0.90 0.6861949 0.8883789 0.5203764
## 0.01 0.95 0.6892478 0.8874946 0.5219250
## 0.01 1.00 0.6925607 0.8865235 0.5237541
## 0.10 0.05 1.6865021 0.5163010 1.2948592
## 0.10 0.10 1.4048400 0.6996155 1.0753819
## 0.10 0.15 1.1687872 0.7629706 0.8924411
## 0.10 0.20 1.0067836 0.7907592 0.7641578
## 0.10 0.25 0.8947633 0.8229558 0.6770513
## 0.10 0.30 0.8201908 0.8439511 0.6246190
## 0.10 0.35 0.7759957 0.8570273 0.5971968
## 0.10 0.40 0.7515618 0.8651763 0.5779286
## 0.10 0.45 0.7330964 0.8719902 0.5628857
## 0.10 0.50 0.7239567 0.8758243 0.5577499
## 0.10 0.55 0.7191286 0.8780581 0.5554838
## 0.10 0.60 0.7168852 0.8794061 0.5549866
## 0.10 0.65 0.7147614 0.8807592 0.5536802
## 0.10 0.70 0.7142090 0.8815578 0.5529367
## 0.10 0.75 0.7136933 0.8822009 0.5517154
## 0.10 0.80 0.7129726 0.8828558 0.5505901
## 0.10 0.85 0.7125100 0.8833964 0.5497964
## 0.10 0.90 0.7121827 0.8838695 0.5491697
## 0.10 0.95 0.7119464 0.8842824 0.5484685
## 0.10 1.00 0.7116491 0.8846838 0.5480085
##
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were fraction = 0.6 and lambda = 0.01.
names(enetTune)
## [1] "method" "modelInfo" "modelType" "results" "pred"
## [6] "bestTune" "call" "dots" "metric" "control"
## [11] "finalModel" "preProcess" "trainingData" "ptype" "resample"
## [16] "resampledCM" "perfNames" "maximize" "yLimits" "times"
## [21] "levels"
#prediction for test data
testResults$ENET <- predict(enetTune, solTestXtrans)
#Which model has the best predictive ability
### create empty spaces to save the values of R2 and RMSE
R2 <-RMSE<-MAE<- numeric(0)
#Linear regression model
testResults$LRM<- predict(lmTune0, solTestXtrans)
R2[1] = cor(testResults$LRM, solTestY)^2
RMSE[1] = sqrt(mean((testResults$LRM - solTestY)^2))
MAE[1] = mean(abs(testResults$LRM - solTestY))
#PCR
testResults$PCR <- predict(pcrTune, solTestXtrans)
R2[2] = cor(testResults$PCR, solTestY)^2
RMSE[2] = sqrt(mean((testResults$PCR - solTestY)^2))
MAE[2] = mean(abs(testResults$PCR - solTestY))
#PLS
testResults$PLS <- predict(plsTune, solTestXtrans)
R2[3] = cor(testResults$PLS, solTestY)^2
RMSE[3] = sqrt(mean((testResults$PLS - solTestY)^2))
MAE[3] = mean(abs(testResults$PLS - solTestY))
#Ridge regression
testResults$Ridge <- predict(ridgeTune, solTestXtrans)
R2[4] = cor(testResults$Ridge, solTestY)^2
RMSE[4] = sqrt(mean((testResults$Ridge - solTestY)^2))
MAE[4] = mean(abs(testResults$Ridge - solTestY))
#ENET regression
testResults$ENET <- predict(enetTune, solTestXtrans)
R2[5] = cor(testResults$ENET, solTestY)^2
RMSE[5] = sqrt(mean((testResults$ENET - solTestY)^2))
MAE[5] = mean(abs(testResults$ENET - solTestY))
results = cbind(R2, RMSE, MAE)
row.names(results) = c("LRM", "PCR", "PLS", "Ridge", "ENET")
results
## R2 RMSE MAE
## LRM 0.8722236 0.7455802 0.5497605
## PCR 0.8551094 0.7920149 0.6030362
## PLS 0.8805903 0.7192161 0.5401932
## Ridge 0.8801309 0.7215616 0.5369285
## ENET 0.8841315 0.7072226 0.5300267
You may conclude the ENET performs the best in terms of RMSE and MAE.