#Access the required library 

#Required packages
library(AppliedPredictiveModeling)
library(lattice)
library(caret)
## Loading required package: ggplot2
library(corrplot)
## corrplot 0.95 loaded
library(e1071)
## 
## Attaching package: 'e1071'
## The following object is masked from 'package:ggplot2':
## 
##     element
library(pls)
## 
## Attaching package: 'pls'
## The following object is masked from 'package:corrplot':
## 
##     corrplot
## The following object is masked from 'package:caret':
## 
##     R2
## The following object is masked from 'package:stats':
## 
##     loadings
library(elasticnet)
## Loading required package: lars
## Loaded lars 1.3

#Access the solubility data

data(solubility) 
#http://127.0.0.1:27533/library/AppliedPredictiveModeling/html/solubility.html

#This data contain traing data
str(solTrainY)
##  num [1:951] -3.97 -3.98 -3.99 -4 -4.06 -4.08 -4.08 -4.1 -4.1 -4.11 ...
dim(solTrainX)
## [1] 951 228
##Test data
head(solTestY)
## [1] 0.93 0.85 0.81 0.74 0.61 0.58
head(solTestX)
##    FP001 FP002 FP003 FP004 FP005 FP006 FP007 FP008 FP009 FP010 FP011 FP012
## 20     1     0     0     1     0     0     0     0     1     1     0     0
## 21     1     0     1     1     0     1     0     0     0     0     1     1
## 23     0     1     0     0     1     0     0     0     0     1     0     0
## 25     0     0     1     0     0     1     0     0     0     0     0     0
## 28     1     1     0     1     1     1     0     1     0     0     1     0
## 31     1     0     0     1     0     0     0     0     0     0     0     1
##    FP013 FP014 FP015 FP016 FP017 FP018 FP019 FP020 FP021 FP022 FP023 FP024
## 20     0     0     1     0     0     0     0     0     1     0     0     0
## 21     0     0     1     1     0     0     0     0     0     0     0     0
## 23     0     0     0     0     0     0     0     0     0     0     0     0
## 25     0     0     1     0     0     0     0     0     0     0     0     0
## 28     0     0     1     0     0     0     1     0     0     0     0     1
## 31     0     0     1     0     0     0     0     1     0     0     0     0
##    FP025 FP026 FP027 FP028 FP029 FP030 FP031 FP032 FP033 FP034 FP035 FP036
## 20     1     0     0     0     0     0     0     0     0     0     0     0
## 21     0     0     0     0     0     0     0     0     0     0     0     0
## 23     0     0     0     0     0     0     0     1     1     0     0     0
## 25     0     0     1     1     0     1     0     0     0     0     0     0
## 28     0     0     0     0     0     0     0     0     0     0     0     0
## 31     0     0     0     0     0     0     0     0     0     0     0     0
##    FP037 FP038 FP039 FP040 FP041 FP042 FP043 FP044 FP045 FP046 FP047 FP048
## 20     0     1     0     0     0     0     0     0     0     0     0     0
## 21     0     0     0     0     0     0     0     0     0     0     0     0
## 23     0     0     0     0     0     0     0     0     0     1     0     0
## 25     0     0     0     0     0     0     0     0     0     0     0     0
## 28     0     0     0     1     0     0     0     0     0     0     1     0
## 31     0     0     0     0     0     0     0     0     0     0     0     0
##    FP049 FP050 FP051 FP052 FP053 FP054 FP055 FP056 FP057 FP058 FP059 FP060
## 20     0     0     0     0     0     0     0     0     0     0     0     1
## 21     0     0     0     0     0     0     0     0     0     0     0     1
## 23     0     0     0     0     0     0     1     0     0     0     0     1
## 25     0     0     0     0     0     0     0     0     0     0     0     0
## 28     0     0     0     0     0     0     0     0     0     0     0     0
## 31     0     0     0     0     0     0     0     0     0     0     0     1
##    FP061 FP062 FP063 FP064 FP065 FP066 FP067 FP068 FP069 FP070 FP071 FP072
## 20     1     1     0     1     0     0     0     0     0     0     0     1
## 21     1     1     1     1     0     1     1     1     0     0     0     1
## 23     1     0     0     0     1     0     0     0     0     0     0     1
## 25     0     0     1     0     0     1     1     1     0     0     0     0
## 28     0     1     1     0     0     0     1     1     0     0     0     1
## 31     1     1     0     0     0     1     0     0     0     0     0     1
##    FP073 FP074 FP075 FP076 FP077 FP078 FP079 FP080 FP081 FP082 FP083 FP084
## 20     1     0     0     0     0     0     0     1     0     0     0     0
## 21     0     0     1     0     0     0     0     1     0     0     1     0
## 23     1     1     0     0     0     1     1     0     0     1     0     0
## 25     0     0     1     0     1     0     1     1     1     0     1     1
## 28     0     0     0     0     0     0     1     0     0     1     1     1
## 31     0     0     0     0     0     0     0     0     0     0     0     0
##    FP085 FP086 FP087 FP088 FP089 FP090 FP091 FP092 FP093 FP094 FP095 FP096
## 20     0     0     0     1     0     0     0     0     0     0     0     0
## 21     0     0     0     0     0     0     0     0     0     1     0     0
## 23     0     0     1     0     0     0     0     0     0     0     1     0
## 25     0     1     1     0     0     1     1     0     1     0     1     0
## 28     0     0     1     0     0     0     1     0     0     0     0     0
## 31     0     0     0     0     0     0     0     0     0     0     0     0
##    FP097 FP098 FP099 FP100 FP101 FP102 FP103 FP104 FP105 FP106 FP107 FP108
## 20     0     1     0     0     0     0     0     0     0     0     1     0
## 21     0     1     0     1     0     0     0     0     0     0     0     1
## 23     0     0     0     0     0     0     0     0     0     0     0     0
## 25     0     0     0     0     1     0     1     1     0     0     0     0
## 28     0     0     0     1     1     0     0     0     0     0     0     0
## 31     0     1     0     0     0     0     0     0     0     0     0     1
##    FP109 FP110 FP111 FP112 FP113 FP114 FP115 FP116 FP117 FP118 FP119 FP120
## 20     0     0     0     0     0     0     0     0     0     1     0     0
## 21     0     1     1     0     1     0     0     0     0     0     0     1
## 23     0     0     0     0     0     0     0     0     0     0     1     0
## 25     0     0     0     0     0     0     0     1     0     1     0     0
## 28     0     1     0     0     0     0     0     1     0     0     0     0
## 31     0     0     0     0     0     0     0     0     0     0     0     0
##    FP121 FP122 FP123 FP124 FP125 FP126 FP127 FP128 FP129 FP130 FP131 FP132
## 20     0     0     0     0     0     0     0     0     0     0     0     0
## 21     0     0     0     0     0     0     0     0     0     0     0     1
## 23     0     0     0     0     1     0     0     0     0     0     0     0
## 25     0     1     0     0     0     0     0     0     0     0     0     0
## 28     0     0     0     0     0     1     0     1     0     0     1     1
## 31     0     0     0     1     0     0     0     0     0     0     0     0
##    FP133 FP134 FP135 FP136 FP137 FP138 FP139 FP140 FP141 FP142 FP143 FP144
## 20     0     0     0     0     0     0     0     0     0     0     0     0
## 21     0     0     0     0     0     0     0     0     0     0     0     0
## 23     0     0     0     0     0     0     0     0     0     0     0     0
## 25     0     0     1     0     0     1     0     0     0     1     0     1
## 28     0     0     0     1     0     0     0     0     0     0     0     0
## 31     0     0     0     0     0     0     0     0     0     0     0     0
##    FP145 FP146 FP147 FP148 FP149 FP150 FP151 FP152 FP153 FP154 FP155 FP156
## 20     0     0     0     0     0     0     0     0     0     0     0     0
## 21     0     0     0     0     0     0     0     0     0     0     0     0
## 23     0     0     0     1     0     0     0     0     0     0     0     0
## 25     0     0     0     0     0     1     0     0     0     0     0     0
## 28     0     0     0     0     0     0     0     0     0     0     0     0
## 31     0     0     0     0     0     0     0     0     0     0     0     0
##    FP157 FP158 FP159 FP160 FP161 FP162 FP163 FP164 FP165 FP166 FP167 FP168
## 20     0     0     0     0     0     0     1     0     1     0     1     0
## 21     0     0     0     0     0     0     1     0     1     0     1     0
## 23     0     0     0     0     0     1     0     1     0     1     0     1
## 25     0     0     0     0     1     0     0     0     0     0     0     0
## 28     0     0     0     0     0     1     1     1     1     1     1     1
## 31     0     0     0     0     0     0     1     0     0     0     0     0
##    FP169 FP170 FP171 FP172 FP173 FP174 FP175 FP176 FP177 FP178 FP179 FP180
## 20     0     0     0     0     0     0     0     0     0     0     0     1
## 21     0     0     0     0     0     0     0     0     0     0     0     0
## 23     1     0     0     0     0     0     0     0     0     0     0     0
## 25     0     1     0     0     1     0     0     0     0     0     0     0
## 28     0     0     0     0     0     0     0     1     0     0     0     0
## 31     0     0     0     0     0     0     0     0     0     0     0     0
##    FP181 FP182 FP183 FP184 FP185 FP186 FP187 FP188 FP189 FP190 FP191 FP192
## 20     0     0     0     0     0     0     0     0     0     0     0     0
## 21     0     0     0     0     0     0     0     0     0     0     0     0
## 23     0     0     1     0     0     0     0     0     0     0     0     0
## 25     0     0     0     0     0     0     0     0     0     0     0     0
## 28     0     0     0     0     0     0     0     0     0     0     0     0
## 31     0     0     0     0     0     1     0     0     0     0     1     0
##    FP193 FP194 FP195 FP196 FP197 FP198 FP199 FP200 FP201 FP202 FP203 FP204
## 20     0     0     0     0     0     0     0     0     0     0     0     0
## 21     0     0     0     0     0     0     1     0     0     0     0     0
## 23     0     0     0     0     0     0     0     0     0     0     0     0
## 25     0     0     1     0     0     0     0     0     0     0     0     0
## 28     0     0     0     0     0     0     0     0     0     0     0     0
## 31     0     0     0     0     0     0     0     0     0     0     0     0
##    FP205 FP206 FP207 FP208 MolWeight NumAtoms NumNonHAtoms NumBonds
## 20     0     0     0     0     94.50        8            5        7
## 21     0     0     0     0     89.11       13            6       12
## 23     1     0     0     0    110.12       14            8       14
## 25     0     0     0     0    100.19       19            7       19
## 28     0     0     0     0    122.14       15            9       15
## 31     0     0     0     0     60.06        8            4        7
##    NumNonHBonds NumMultBonds NumRotBonds NumDblBonds NumAromaticBonds
## 20            4            1           0           1                0
## 21            5            1           2           1                0
## 23            8            6           0           0                6
## 25            7            0           0           0                0
## 28            9            7           1           1                6
## 31            3            1           1           1                0
##    NumHydrogen NumCarbon NumNitrogen NumOxygen NumSulfer NumChlorine NumHalogen
## 20           3         2           0         2         0           1          1
## 21           7         3           1         2         0           0          0
## 23           6         6           0         2         0           0          0
## 25          12         5           2         0         0           0          0
## 28           6         6           2         1         0           0          0
## 31           4         2           0         2         0           0          0
##    NumRings HydrophilicFactor SurfaceArea1 SurfaceArea2
## 20        0             0.492        37.30        37.30
## 21        0             1.317        52.32        52.32
## 23        1             0.846        40.46        40.46
## 25        1             0.984        24.06        24.06
## 28        1             0.843        55.98        55.98
## 31        0            -0.431        26.30        26.30

#Stage 1: Data pre-processing

### Some initial plots of the data
plot(solTrainY ~ solTrainX$MolWeight,
       ylab = "Solubility (log)",
       main = "(a)", col='blue',
       xlab = "Molecular Weight")

fit = lm(solTrainY ~ solTrainX$MolWeight)
summary(fit)
## 
## Call:
## lm(formula = solTrainY ~ solTrainX$MolWeight)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.5310 -0.8068  0.2151  0.9793  7.2695 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -0.0664486  0.1182195  -0.562    0.574    
## solTrainX$MolWeight -0.0131519  0.0005274 -24.936   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.592 on 949 degrees of freedom
## Multiple R-squared:  0.3958, Adjusted R-squared:  0.3952 
## F-statistic: 621.8 on 1 and 949 DF,  p-value: < 2.2e-16
abline(fit, col=2, lwd=2)

### correlation test for the relationship
### between solubility and molecular weight
cor.test(solTrainY,solTrainX$MolWeight)
## 
##  Pearson's product-moment correlation
## 
## data:  solTrainY and solTrainX$MolWeight
## t = -24.936, df = 949, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.6660933 -0.5891573
## sample estimates:
##        cor 
## -0.6291639
xyplot(solTrainY ~ solTrainX$NumRotBonds, type = c("p", "g"),
       ylab = "Solubility (log)",
       xlab = "Number of Rotatable Bonds")

#The function bwplot() makes box-and-whisker plots for numerical variables
bwplot(solTrainY ~ ifelse(solTrainX[,100] == 1, 
                          "structure present", 
                          "structure absent"),
       ylab = "Solubility (log)",
       main = "(b)",
       horizontal = FALSE)

#The above examples showed that there exist strong correlations among #predictors, so how do we deal with significant correlations?

Find the columns that are not fingerprints (i.e. the continuous

predictors). grep will return a list of integers corresponding to

column names that contain the pattern “FP”.

  1. Write down the least square regression equation and circle the results from your outputs.
##We just use training set predictors after transformations for skewness and centering/scaling,
## which is solTrainXtrans  
notFingerprints <- grep("FP", names(solTrainXtrans))
#Draw scatter plot for continuous predictors 
featurePlot(solTrainXtrans[, -notFingerprints],
            solTrainY,
            between = list(x = 1, y = 1),
            type = c("g", "p", "smooth"),
            labels = rep("", 2))
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0059402
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.57302
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 2.4347e-15
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 0.32158
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0059402
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.57302
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 0.32158
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0059402
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.57302
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 0.32158
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0059402
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.57302
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 0.32158
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0059402
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.57302
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 2.4347e-15
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 0.32158
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0035397
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.46037
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 2.9968e-15
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 0.20869
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0035397
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.46037
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 0.20869
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0035397
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.46037
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 0.20869
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0035397
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.46037
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 6.0399e-16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 0.20869
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0035397
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.46037
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 2.9968e-15
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 0.20869
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 5.76e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 5.76e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 5.76e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 5.76e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 5.76e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 6.1471e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 6.1471e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 6.1471e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 6.1471e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 6.1471e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 6.1471e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 6.1471e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 6.1471e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 6.1471e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, : at
## -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## radius 6.1471e-06
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at -0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 0.0024793
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## zero-width neighborhood. make span bigger

#library(corrplot)
#Draw the correlation matrix plot for the continous predictors 
corrplot::corrplot(cor(solTrainXtrans[, -notFingerprints]), 
                   order = "hclust", 
                   tl.cex = .8)

#Remove high correlated predictors (cor>0.9)
tooHigh <- findCorrelation(cor(solTrainXtrans[, -notFingerprints]), .9)
corrplot::corrplot(cor(solTrainXtrans[, -notFingerprints][,-tooHigh]), 
                   order = "hclust", 
                   tl.cex = .8)

# Remove near zero variance predictors 
nearZeroVar(solTrainXtrans)
## [1] 154 199 200
#There are three near zero variances whose index are 154 199 200

#Skewness
#library(e1071)
apply(solTrainXtrans[, -notFingerprints], 2, skewness)
##         MolWeight          NumAtoms      NumNonHAtoms          NumBonds 
##     -0.0002162255     -0.0713055864     -0.0555982369     -0.1881635919 
##      NumNonHBonds      NumMultBonds       NumRotBonds       NumDblBonds 
##      0.0432437801     -0.0946655258      0.0973568321      0.1496862360 
##  NumAromaticBonds       NumHydrogen         NumCarbon       NumNitrogen 
##     -0.1463815894     -0.0407015938      0.0616443828      0.4262727429 
##         NumOxygen         NumSulfer       NumChlorine        NumHalogen 
##      0.1870352249      2.2707457390      1.4673236666      1.0331764976 
##          NumRings HydrophilicFactor      SurfaceArea1      SurfaceArea2 
##      0.0055727736      0.1003040295     -0.1316118434     -0.1681548297
#Box-Cox transformation
Original = as.matrix(solTrainXtrans[, -notFingerprints])
solTrainXtransBoxCox = BoxCoxTrans(Original)
solTrainXtransBoxCox
## Box-Cox Transformation
## 
## 19020 data points used to estimate Lambda
## 
## Input data summary:
##    MolWeight        NumAtoms      NumNonHAtoms      NumBonds    
##  Min.   :3.852   Min.   :1.792   Min.   :1.099   Min.   :1.609  
##  1st Qu.:4.817   1st Qu.:2.890   1st Qu.:2.197   1st Qu.:2.890  
##  Median :5.194   Median :3.135   Median :2.565   Median :3.178  
##  Mean   :5.199   Mean   :3.174   Mean   :2.549   Mean   :3.176  
##  3rd Qu.:5.581   3rd Qu.:3.466   3rd Qu.:2.890   3rd Qu.:3.481  
##  Max.   :6.503   Max.   :4.554   Max.   :3.871   Max.   :4.585  
##   NumNonHBonds     NumMultBonds     NumRotBonds      NumDblBonds    
##  Min.   :0.7435   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:2.7592   1st Qu.:0.7988   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :3.3514   Median :2.9448   Median :1.0986   Median :0.5671  
##  Mean   :3.3623   Mean   :2.5791   Mean   :0.9256   Mean   :0.3981  
##  3rd Qu.:4.0099   3rd Qu.:4.0237   3rd Qu.:1.4979   3rd Qu.:0.8045  
##  Max.   :5.9770   Max.   :6.7030   Max.   :2.8332   Max.   :1.1880  
##  NumAromaticBonds  NumHydrogen      NumCarbon       NumNitrogen    
##  Min.   :0.000    Min.   :0.000   Min.   :0.7705   Min.   :0.0000  
##  1st Qu.:0.000    1st Qu.:2.887   1st Qu.:2.6426   1st Qu.:0.0000  
##  Median :1.946    Median :3.691   Median :3.3175   Median :0.0000  
##  Mean   :1.287    Mean   :3.696   Mean   :3.3240   Mean   :0.2308  
##  3rd Qu.:1.946    3rd Qu.:4.465   3rd Qu.:3.8622   3rd Qu.:0.4568  
##  Max.   :3.258    Max.   :7.314   Max.   :6.2678   Max.   :0.7079  
##    NumOxygen        NumSulfer        NumChlorine        NumHalogen    
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.6931   Median :0.00000   Median :0.00000   Median :0.0000  
##  Mean   :0.7470   Mean   :0.04975   Mean   :0.09098   Mean   :0.1201  
##  3rd Qu.:1.0986   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.3750  
##  Max.   :2.6391   Max.   :0.48000   Max.   :0.49587   Max.   :0.4959  
##     NumRings      HydrophilicFactor  SurfaceArea1     SurfaceArea2   
##  Min.   :0.0000   Min.   :-2.8413   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.:0.0000   1st Qu.:-1.2510   1st Qu.: 3.837   1st Qu.: 4.159  
##  Median :0.6931   Median :-0.3630   Median : 7.258   Median : 7.760  
##  Mean   :0.7341   Mean   :-0.4528   Mean   : 6.708   Mean   : 7.081  
##  3rd Qu.:1.0986   3rd Qu.: 0.2799   3rd Qu.: 9.854   3rd Qu.:10.500  
##  Max.   :2.0794   Max.   : 3.5338   Max.   :23.020   Max.   :23.020  
## 
## Lambda could not be estimated; no transformation is applied

End of data preprocessing We work on the transformed predictor matrix: solTrainXtrans instead of the orginal training data: solTrainX

#Model building

#Linear Regression

### Create a control function that will be used across models. We
### create the fold assignments explicitly instead of relying on the
### random number seed being set to identical values.

set.seed(100)
#Create a series of test/training partitions
#default is 10, the funtion below creates 10 folder
indx <- createFolds(solTrainY, returnTrain = TRUE)

#control the computational nuances of the train function
ctrl <- trainControl(method = "cv", index = indx)

### Linear regression model with all of the predictors. This will
### produce some warnings that a 'rank-deficient fit may be
### misleading'. This is related to the predictors being so highly
### correlated that some of the math has broken down.

set.seed(100)
lmTune0 <- train(x = solTrainXtrans, y = solTrainY,
                 method = "lm",
                 trControl = ctrl)
## Warning in predict.lm(modelFit, newdata): prediction from rank-deficient fit;
## attr(*, "non-estim") has doubtful cases
## Warning in predict.lm(modelFit, newdata): prediction from rank-deficient fit;
## attr(*, "non-estim") has doubtful cases
lmTune0
## Linear Regression 
## 
## 951 samples
## 228 predictors
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 856, 855, 857, 856, 856, 855, ... 
## Resampling results:
## 
##   RMSE       Rsquared   MAE      
##   0.7170016  0.8792751  0.5298775
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE
summary(lmTune0) #provide regression coefficients                
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.75620 -0.28304  0.01165  0.30030  1.54887 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        2.431e+00  2.162e+00   1.124 0.261303    
## FP001              3.594e-01  3.185e-01   1.128 0.259635    
## FP002              1.456e-01  2.637e-01   0.552 0.580960    
## FP003             -3.969e-02  1.314e-01  -0.302 0.762617    
## FP004             -3.049e-01  1.371e-01  -2.223 0.026520 *  
## FP005              2.837e+00  9.598e-01   2.956 0.003223 ** 
## FP006             -6.886e-02  2.041e-01  -0.337 0.735917    
## FP007              4.044e-02  1.152e-01   0.351 0.725643    
## FP008              1.121e-01  1.636e-01   0.685 0.493331    
## FP009             -8.242e-01  8.395e-01  -0.982 0.326536    
## FP010              4.193e-01  3.136e-01   1.337 0.181579    
## FP011              5.158e-02  2.198e-01   0.235 0.814503    
## FP012             -1.346e-02  1.611e-01  -0.084 0.933452    
## FP013             -4.519e-01  5.473e-01  -0.826 0.409311    
## FP014              3.281e-01  4.550e-01   0.721 0.471044    
## FP015             -1.839e-01  1.521e-01  -1.209 0.226971    
## FP016             -1.367e-01  1.548e-01  -0.883 0.377340    
## FP017             -1.704e-01  1.386e-01  -1.230 0.219187    
## FP018             -3.824e-01  2.388e-01  -1.602 0.109655    
## FP019             -3.131e-01  3.863e-01  -0.811 0.417862    
## FP020              2.072e-01  2.135e-01   0.971 0.332078    
## FP021             -5.956e-02  2.632e-01  -0.226 0.821060    
## FP022              2.336e-01  3.456e-01   0.676 0.499180    
## FP023             -3.193e-01  1.909e-01  -1.672 0.094866 .  
## FP024             -4.272e-01  2.827e-01  -1.511 0.131162    
## FP025              4.376e-01  4.538e-01   0.964 0.335184    
## FP026              2.068e-01  2.564e-01   0.806 0.420273    
## FP027              2.424e-01  2.429e-01   0.998 0.318594    
## FP028              1.070e-01  1.200e-01   0.892 0.372547    
## FP029             -9.857e-02  2.199e-01  -0.448 0.654163    
## FP030             -2.361e-01  2.468e-01  -0.957 0.339048    
## FP031              8.690e-02  1.346e-01   0.646 0.518754    
## FP032             -1.204e+00  7.772e-01  -1.550 0.121628    
## FP033              5.766e-01  4.236e-01   1.361 0.173882    
## FP034             -1.794e-01  2.618e-01  -0.685 0.493486    
## FP035             -2.140e-01  1.704e-01  -1.256 0.209605    
## FP036              7.701e-02  1.657e-01   0.465 0.642133    
## FP037              1.098e-01  1.725e-01   0.636 0.524693    
## FP038              2.721e-01  1.888e-01   1.441 0.150030    
## FP039              2.011e-02  2.888e-01   0.070 0.944491    
## FP040              5.477e-01  1.890e-01   2.898 0.003873 ** 
## FP041             -4.265e-01  3.004e-01  -1.420 0.156143    
## FP042             -9.901e-01  7.078e-01  -1.399 0.162294    
## FP043             -3.725e-02  2.096e-01  -0.178 0.859011    
## FP044             -3.860e-01  2.184e-01  -1.768 0.077562 .  
## FP045              2.120e-01  1.299e-01   1.631 0.103238    
## FP046             -3.504e-02  2.733e-01  -0.128 0.898010    
## FP047             -1.675e-02  1.414e-01  -0.118 0.905775    
## FP048              2.610e-01  2.434e-01   1.073 0.283810    
## FP049              1.241e-01  1.971e-01   0.630 0.529036    
## FP050              9.087e-03  1.410e-01   0.064 0.948648    
## FP051              1.050e-01  2.014e-01   0.521 0.602210    
## FP052             -4.569e-01  2.482e-01  -1.841 0.066029 .  
## FP053              2.994e-01  2.466e-01   1.214 0.225129    
## FP054              2.734e-02  1.829e-01   0.149 0.881229    
## FP055             -3.662e-01  1.970e-01  -1.858 0.063530 .  
## FP056             -2.961e-01  2.979e-01  -0.994 0.320541    
## FP057             -1.002e-01  1.379e-01  -0.727 0.467703    
## FP058              3.100e-01  8.074e-01   0.384 0.701129    
## FP059             -1.615e-01  1.690e-01  -0.956 0.339514    
## FP060              2.350e-01  1.474e-01   1.595 0.111209    
## FP061             -6.365e-01  1.440e-01  -4.421 1.13e-05 ***
## FP062             -5.224e-01  2.961e-01  -1.764 0.078078 .  
## FP063             -2.001e+00  1.287e+00  -1.554 0.120553    
## FP064              2.549e-01  1.221e-01   2.087 0.037207 *  
## FP065             -2.844e-01  1.197e-01  -2.377 0.017714 *  
## FP066              2.093e-01  1.264e-01   1.655 0.098301 .  
## FP067             -1.406e-01  1.540e-01  -0.913 0.361631    
## FP068              4.964e-01  2.028e-01   2.447 0.014630 *  
## FP069              1.324e-01  8.824e-02   1.501 0.133885    
## FP070              3.453e-03  8.088e-02   0.043 0.965963    
## FP071              1.474e-01  1.237e-01   1.192 0.233775    
## FP072             -9.773e-01  2.763e-01  -3.537 0.000431 ***
## FP073             -4.671e-01  2.072e-01  -2.254 0.024474 *  
## FP074              1.793e-01  1.206e-01   1.487 0.137566    
## FP075              1.231e-01  1.035e-01   1.188 0.235034    
## FP076              5.166e-01  1.704e-01   3.031 0.002525 ** 
## FP077              1.644e-01  1.236e-01   1.331 0.183739    
## FP078             -3.715e-01  1.588e-01  -2.339 0.019608 *  
## FP079              4.254e-01  1.881e-01   2.262 0.023992 *  
## FP080              3.101e-01  1.554e-01   1.996 0.046340 *  
## FP081             -3.208e-01  1.117e-01  -2.873 0.004192 ** 
## FP082              1.243e-01  9.524e-02   1.305 0.192379    
## FP083             -6.916e-01  2.134e-01  -3.241 0.001248 ** 
## FP084              3.626e-01  2.381e-01   1.523 0.128171    
## FP085             -3.310e-01  1.428e-01  -2.317 0.020785 *  
## FP086              1.169e-02  9.774e-02   0.120 0.904834    
## FP087              4.559e-02  2.797e-01   0.163 0.870568    
## FP088              2.416e-01  9.959e-02   2.425 0.015534 *  
## FP089              5.999e-01  2.320e-01   2.586 0.009915 ** 
## FP090             -2.450e-02  1.154e-01  -0.212 0.831930    
## FP091             -2.858e-01  3.185e-01  -0.897 0.369847    
## FP092              2.665e-01  2.069e-01   1.288 0.198156    
## FP093              1.974e-01  1.087e-01   1.816 0.069803 .  
## FP094             -1.991e-01  1.441e-01  -1.381 0.167707    
## FP095             -1.403e-01  1.124e-01  -1.248 0.212449    
## FP096             -5.024e-01  1.459e-01  -3.445 0.000605 ***
## FP097             -2.635e-01  1.666e-01  -1.582 0.114020    
## FP098             -2.865e-01  1.633e-01  -1.754 0.079863 .  
## FP099              2.592e-01  2.568e-01   1.009 0.313136    
## FP100             -4.008e-01  3.034e-01  -1.321 0.186949    
## FP101             -1.760e-01  3.019e-01  -0.583 0.560147    
## FP102              2.445e-01  3.449e-01   0.709 0.478579    
## FP103             -1.493e-01  9.148e-02  -1.632 0.103176    
## FP104             -1.428e-01  1.176e-01  -1.214 0.225238    
## FP105             -6.912e-02  1.395e-01  -0.495 0.620482    
## FP106              1.128e-01  1.288e-01   0.876 0.381495    
## FP107              2.778e+00  8.247e-01   3.369 0.000796 ***
## FP108              8.836e-03  1.852e-01   0.048 0.961970    
## FP109              8.200e-01  2.267e-01   3.617 0.000319 ***
## FP110              3.680e-01  3.311e-01   1.111 0.266811    
## FP111             -5.565e-01  1.420e-01  -3.918 9.80e-05 ***
## FP112             -1.079e-01  2.705e-01  -0.399 0.690108    
## FP113              1.511e-01  9.481e-02   1.594 0.111478    
## FP114             -1.201e-01  1.891e-01  -0.635 0.525628    
## FP115             -1.896e-01  1.405e-01  -1.349 0.177736    
## FP116              7.778e-03  1.897e-01   0.041 0.967300    
## FP117              2.583e-01  1.779e-01   1.452 0.147070    
## FP118             -1.964e-01  1.230e-01  -1.596 0.110940    
## FP119              7.515e-01  2.630e-01   2.857 0.004402 ** 
## FP120             -1.814e-01  1.794e-01  -1.011 0.312362    
## FP121             -4.731e-02  3.957e-01  -0.120 0.904866    
## FP122              1.048e-01  1.041e-01   1.007 0.314268    
## FP123              3.926e-02  1.765e-01   0.222 0.824066    
## FP124              1.235e-01  1.705e-01   0.724 0.469243    
## FP125             -2.633e-04  1.151e-01  -0.002 0.998175    
## FP126             -2.782e-01  1.177e-01  -2.363 0.018373 *  
## FP127             -6.123e-01  1.739e-01  -3.521 0.000457 ***
## FP128             -5.424e-01  1.932e-01  -2.807 0.005136 ** 
## FP129             -6.731e-02  2.243e-01  -0.300 0.764167    
## FP130             -1.034e+00  4.106e-01  -2.518 0.012009 *  
## FP131              2.158e-01  1.617e-01   1.335 0.182405    
## FP132             -1.976e-01  2.382e-01  -0.830 0.406998    
## FP133             -1.573e-01  1.217e-01  -1.293 0.196319    
## FP134              2.496e+00  1.196e+00   2.086 0.037310 *  
## FP135              1.818e-01  1.319e-01   1.379 0.168460    
## FP136             -7.763e-02  3.131e-01  -0.248 0.804237    
## FP137             -4.613e-02  2.978e-01  -0.155 0.876947    
## FP138             -9.392e-02  1.906e-01  -0.493 0.622251    
## FP139              7.659e-02  4.063e-01   0.189 0.850517    
## FP140              3.145e-01  2.149e-01   1.463 0.143784    
## FP141              2.219e-01  2.765e-01   0.802 0.422532    
## FP142              6.272e-01  1.488e-01   4.214 2.83e-05 ***
## FP143              9.981e-01  2.929e-01   3.407 0.000692 ***
## FP144              2.207e-01  2.839e-01   0.777 0.437195    
## FP145             -1.146e-01  1.188e-01  -0.964 0.335169    
## FP146             -2.324e-01  2.086e-01  -1.114 0.265716    
## FP147              1.502e-01  1.228e-01   1.223 0.221703    
## FP148             -1.600e-01  1.319e-01  -1.213 0.225560    
## FP149              1.172e-01  1.650e-01   0.710 0.477770    
## FP150              9.046e-02  1.577e-01   0.574 0.566368    
## FP151              2.899e-01  3.120e-01   0.929 0.353202    
## FP152             -2.544e-01  2.990e-01  -0.851 0.395087    
## FP153             -3.765e-01  2.773e-01  -1.358 0.175029    
## FP154             -1.027e+00  2.033e-01  -5.054 5.50e-07 ***
## FP155              4.888e-01  2.916e-01   1.676 0.094163 .  
## FP156             -3.602e-02  3.636e-01  -0.099 0.921109    
## FP157             -4.715e-01  2.468e-01  -1.910 0.056505 .  
## FP158              1.669e-02  1.925e-01   0.087 0.930943    
## FP159              1.800e-01  2.432e-01   0.740 0.459378    
## FP160              1.525e-02  2.177e-01   0.070 0.944155    
## FP161             -2.440e-01  1.433e-01  -1.703 0.089063 .  
## FP162              4.910e-02  1.859e-01   0.264 0.791710    
## FP163              4.785e-01  3.121e-01   1.533 0.125659    
## FP164              5.096e-01  1.899e-01   2.684 0.007446 ** 
## FP165              5.793e-01  2.146e-01   2.700 0.007103 ** 
## FP166             -6.582e-02  2.185e-01  -0.301 0.763293    
## FP167             -6.044e-01  2.515e-01  -2.403 0.016502 *  
## FP168             -1.187e-01  1.872e-01  -0.634 0.526173    
## FP169             -1.705e-01  8.312e-02  -2.051 0.040650 *  
## FP170             -7.902e-02  1.560e-01  -0.506 0.612745    
## FP171              4.651e-01  1.186e-01   3.922 9.64e-05 ***
## FP172             -4.426e-01  2.440e-01  -1.814 0.070120 .  
## FP173              4.243e-01  1.657e-01   2.561 0.010634 *  
## FP174             -1.010e-01  2.098e-01  -0.481 0.630311    
## FP175             -4.657e-02  2.481e-01  -0.188 0.851136    
## FP176              9.736e-01  2.644e-01   3.682 0.000249 ***
## FP177              1.386e-01  2.393e-01   0.579 0.562538    
## FP178              6.497e-02  2.079e-01   0.313 0.754691    
## FP179             -3.415e-02  2.232e-01  -0.153 0.878437    
## FP180             -7.905e-01  5.523e-01  -1.431 0.152839    
## FP181              4.925e-01  3.218e-01   1.531 0.126309    
## FP182             -1.124e-01  1.310e-01  -0.858 0.391384    
## FP183              2.998e-01  7.143e-01   0.420 0.674836    
## FP184              4.876e-01  1.580e-01   3.087 0.002103 ** 
## FP185             -3.778e-01  2.037e-01  -1.854 0.064108 .  
## FP186             -3.654e-01  1.953e-01  -1.871 0.061710 .  
## FP187              4.457e-01  2.682e-01   1.662 0.097015 .  
## FP188              1.475e-01  1.258e-01   1.172 0.241519    
## FP189             -1.984e-02  3.468e-01  -0.057 0.954384    
## FP190              2.629e-01  3.018e-01   0.871 0.383981    
## FP191              2.799e-01  1.465e-01   1.911 0.056388 .  
## FP192             -2.404e-01  2.751e-01  -0.874 0.382534    
## FP193              1.502e-01  1.494e-01   1.005 0.315159    
## FP194              8.029e-01  6.379e-01   1.259 0.208566    
## FP195              5.967e-02  3.435e-01   0.174 0.862158    
## FP196              1.091e-02  2.544e-01   0.043 0.965812    
## FP197             -3.736e-02  1.569e-01  -0.238 0.811793    
## FP198              1.896e-01  2.665e-01   0.712 0.476893    
## FP199             -9.932e-02  1.797e-01  -0.553 0.580702    
## FP200             -6.421e-02  2.161e-01  -0.297 0.766462    
## FP201             -4.838e-01  1.980e-01  -2.444 0.014771 *  
## FP202              5.664e-01  1.869e-01   3.031 0.002527 ** 
## FP203              2.586e-01  6.447e-01   0.401 0.688462    
## FP204             -1.371e-01  2.543e-01  -0.539 0.590008    
## FP205              7.177e-02  1.561e-01   0.460 0.645857    
## FP206             -6.769e-02  1.860e-01  -0.364 0.716094    
## FP207             -5.538e-03  2.060e-01  -0.027 0.978560    
## FP208             -5.338e-01  6.324e-01  -0.844 0.398925    
## MolWeight         -1.232e+00  2.296e-01  -5.365 1.09e-07 ***
## NumAtoms          -1.478e+01  3.473e+00  -4.257 2.35e-05 ***
## NumNonHAtoms       1.795e+01  3.166e+00   5.670 2.07e-08 ***
## NumBonds           9.843e+00  2.681e+00   3.671 0.000260 ***
## NumNonHBonds      -1.030e+01  1.793e+00  -5.746 1.35e-08 ***
## NumMultBonds       2.107e-01  1.754e-01   1.201 0.229990    
## NumRotBonds       -5.213e-01  1.334e-01  -3.908 0.000102 ***
## NumDblBonds       -7.492e-01  3.163e-01  -2.369 0.018111 *  
## NumAromaticBonds  -2.364e+00  6.232e-01  -3.794 0.000161 ***
## NumHydrogen        8.347e-01  1.880e-01   4.439 1.04e-05 ***
## NumCarbon          1.730e-02  3.763e-01   0.046 0.963335    
## NumNitrogen        6.125e+00  3.045e+00   2.011 0.044645 *  
## NumOxygen          2.389e+00  4.523e-01   5.283 1.69e-07 ***
## NumSulfer         -8.508e+00  3.619e+00  -2.351 0.018994 *  
## NumChlorine       -7.449e+00  1.989e+00  -3.744 0.000195 ***
## NumHalogen         1.408e+00  2.109e+00   0.668 0.504615    
## NumRings           1.276e+00  6.716e-01   1.901 0.057731 .  
## HydrophilicFactor  1.099e-02  1.137e-01   0.097 0.922998    
## SurfaceArea1       8.825e-02  6.058e-02   1.457 0.145643    
## SurfaceArea2       9.555e-02  5.615e-02   1.702 0.089208 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5524 on 722 degrees of freedom
## Multiple R-squared:  0.9446, Adjusted R-squared:  0.9271 
## F-statistic: 54.03 on 228 and 722 DF,  p-value: < 2.2e-16
### Save the test set results in a data frame                 
testResults <- data.frame(obs = solTestY,
                          Linear_Regression = predict(lmTune0, solTestXtrans))

### And another using a set of predictors reduced by unsupervised
### filtering. We apply a filter to reduce extreme between-predictor
### correlations. Note the lack of warnings.

tooHigh <- findCorrelation(cor(solTrainXtrans), .9)
trainXfiltered <- solTrainXtrans[, -tooHigh]
testXfiltered  <-  solTestXtrans[, -tooHigh]

set.seed(100)
lmTune <- train(x = trainXfiltered, y = solTrainY,
                method = "lm",
                trControl = ctrl)

lmTune
## Linear Regression 
## 
## 951 samples
## 190 predictors
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 856, 855, 857, 856, 856, 855, ... 
## Resampling results:
## 
##   RMSE       Rsquared   MAE    
##   0.7112743  0.8805826  0.53382
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE
### Save the test set results in a data frame                 
testResults1 <- data.frame(obs = solTestY,
                          Linear_Regression = predict(lmTune, solTestXtrans))

Partial Least Squares (PLS) and Principal Component Regression (PCR)

# Run PLS and PCR on solubility data and compare results
#library(pls)
set.seed(100)
plsTune <- train(x = solTrainXtrans, y = solTrainY,
                 method = "pls",
                 tuneGrid = expand.grid(ncomp = 1:50),
                 trControl = ctrl)
plsTune
## Partial Least Squares 
## 
## 951 samples
## 228 predictors
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 856, 855, 857, 856, 856, 855, ... 
## Resampling results across tuning parameters:
## 
##   ncomp  RMSE       Rsquared   MAE      
##    1     1.7567200  0.2656794  1.4372895
##    2     1.2744189  0.6113687  0.9891602
##    3     1.0369849  0.7435114  0.8042904
##    4     0.8333567  0.8331804  0.6371070
##    5     0.7465669  0.8658913  0.5714154
##    6     0.7126571  0.8780661  0.5479709
##    7     0.6964672  0.8845762  0.5358792
##    8     0.6902700  0.8867464  0.5304327
##    9     0.6866134  0.8878356  0.5276338
##   10     0.6859736  0.8882368  0.5248576
##   11     0.6846168  0.8886602  0.5267211
##   12     0.6836218  0.8891027  0.5190374
##   13     0.6838654  0.8892094  0.5210315
##   14     0.6807350  0.8901425  0.5173906
##   15     0.6826661  0.8896885  0.5183259
##   16     0.6819079  0.8899369  0.5166051
##   17     0.6803845  0.8903528  0.5153813
##   18     0.6835355  0.8894978  0.5191376
##   19     0.6874578  0.8884091  0.5213042
##   20     0.6876702  0.8881465  0.5210240
##   21     0.6906650  0.8871996  0.5211997
##   22     0.6913664  0.8870307  0.5226134
##   23     0.6955030  0.8857345  0.5260189
##   24     0.7004351  0.8842887  0.5303128
##   25     0.7036220  0.8832878  0.5305845
##   26     0.7070175  0.8820762  0.5334096
##   27     0.7099186  0.8810620  0.5354840
##   28     0.7119219  0.8803695  0.5340200
##   29     0.7148858  0.8794524  0.5357332
##   30     0.7179197  0.8784303  0.5372692
##   31     0.7205870  0.8775123  0.5393194
##   32     0.7226853  0.8768418  0.5407595
##   33     0.7228866  0.8767838  0.5398822
##   34     0.7248003  0.8762652  0.5409011
##   35     0.7252532  0.8760801  0.5399395
##   36     0.7248396  0.8762991  0.5394633
##   37     0.7261218  0.8758859  0.5395361
##   38     0.7264810  0.8757825  0.5391117
##   39     0.7275604  0.8754313  0.5392966
##   40     0.7290232  0.8749157  0.5402907
##   41     0.7299098  0.8746315  0.5411541
##   42     0.7303183  0.8744295  0.5409782
##   43     0.7300602  0.8744625  0.5405539
##   44     0.7305048  0.8742995  0.5409475
##   45     0.7305013  0.8742489  0.5413010
##   46     0.7305273  0.8742750  0.5417646
##   47     0.7316553  0.8738556  0.5416701
##   48     0.7318788  0.8737642  0.5424856
##   49     0.7328515  0.8734481  0.5423624
##   50     0.7333185  0.8731901  0.5434880
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was ncomp = 17.
plot(plsTune)

#prediction for test data
testResults$PLS <- predict(plsTune, solTestXtrans)

set.seed(100)
pcrTune <- train(x = solTrainXtrans, y = solTrainY,
                 method = "pcr",
                 tuneGrid = expand.grid(ncomp = 1:50),
                 trControl = ctrl)
pcrTune                  
## Principal Component Analysis 
## 
## 951 samples
## 228 predictors
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 856, 855, 857, 856, 856, 855, ... 
## Resampling results across tuning parameters:
## 
##   ncomp  RMSE       Rsquared    MAE      
##    1     1.9823303  0.07290019  1.5869212
##    2     1.6398355  0.36081511  1.2810055
##    3     1.3709076  0.55235719  1.0618538
##    4     1.3758521  0.54925157  1.0643075
##    5     1.3450879  0.56844597  1.0301765
##    6     1.2140930  0.64989532  0.9247502
##    7     1.1797702  0.67070995  0.8998390
##    8     1.1408942  0.69159641  0.8737832
##    9     1.0447017  0.74097508  0.8198902
##   10     0.9846736  0.76688176  0.7629658
##   11     0.9682403  0.77484715  0.7481903
##   12     0.9649781  0.77613899  0.7452582
##   13     0.9511715  0.78261820  0.7380502
##   14     0.9382676  0.78854541  0.7304692
##   15     0.9391399  0.78813473  0.7305274
##   16     0.8683560  0.81866216  0.6639231
##   17     0.8697473  0.81806492  0.6647760
##   18     0.8710394  0.81743620  0.6652573
##   19     0.8706819  0.81742338  0.6661038
##   20     0.8615899  0.82129313  0.6585646
##   21     0.8104091  0.84240747  0.6215034
##   22     0.8094659  0.84279178  0.6219602
##   23     0.8051823  0.84437865  0.6193311
##   24     0.8067893  0.84391465  0.6198146
##   25     0.8087502  0.84310259  0.6187339
##   26     0.8038728  0.84500751  0.6168930
##   27     0.8027071  0.84564671  0.6161959
##   28     0.8017716  0.84596907  0.6161530
##   29     0.7851503  0.85225581  0.6036835
##   30     0.7837446  0.85266381  0.6011218
##   31     0.7809536  0.85370837  0.5987260
##   32     0.7698948  0.85771024  0.5920064
##   33     0.7419912  0.86777296  0.5723061
##   34     0.7362850  0.86997471  0.5686044
##   35     0.7343025  0.87073561  0.5656401
##   36     0.7302412  0.87227541  0.5620624
##   37     0.7252496  0.87428725  0.5584212
##   38     0.7253938  0.87429060  0.5573722
##   39     0.7247813  0.87453779  0.5569706
##   40     0.7257744  0.87419673  0.5573496
##   41     0.7240053  0.87464218  0.5564440
##   42     0.7250126  0.87432020  0.5568892
##   43     0.7236723  0.87484057  0.5567977
##   44     0.7259451  0.87402482  0.5581918
##   45     0.7229274  0.87512646  0.5544893
##   46     0.7222965  0.87523134  0.5530671
##   47     0.7227040  0.87512338  0.5539032
##   48     0.7217612  0.87536963  0.5530784
##   49     0.7213154  0.87550719  0.5539427
##   50     0.7223208  0.87520400  0.5543803
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was ncomp = 49.
plot(pcrTune)

plsResamples <- plsTune$results
plsResamples$Model <- "PLS"
pcrResamples <- pcrTune$results
pcrResamples$Model <- "PCR"
plsPlotData <- rbind(plsResamples, pcrResamples)

xyplot(RMSE ~ ncomp,
       data = plsPlotData,
       #aspect = 1,
       xlab = "# Components",
       ylab = "RMSE (Cross-Validation)",
       auto.key = list(columns = 2),
       groups = Model,
       type = c("o", "g"))

#Predictor importance plot for PLS model
plsImp <- varImp(plsTune, scale = FALSE)
plot(plsImp, top = 25, scales = list(y = list(cex = .95)))

#Predictor importance plot for PCR model
pcrImp <- varImp(pcrTune, scale = FALSE)
plot(pcrImp, top = 25, scales = list(y = list(cex = .95)))

Penalized Models

# The text used the elasticnet to obtain a ridge regression model.
## There is now a simple ridge regression method.

#you may need to try different ranges of values for lambda
ridgeGrid <- expand.grid(lambda = seq(0, .1, length = 10))


### Start the clock to track time!

##The following codes takes 94.49 seconds to run.
## Your running time may be different depending
## on your cpu. 
set.seed(100) #it may take a while to get results 
ptm <- proc.time()
#library(elasticnet)
ridgeTune <- train(x = solTrainXtrans, y = solTrainY,
                   method = "ridge",
                   tuneGrid = ridgeGrid,
                   trControl = ctrl,
                   preProc = c("center", "scale"))

proc.time() - ptm
##    user  system elapsed 
##   94.00   20.07  129.33
ridgeTune
## Ridge Regression 
## 
## 951 samples
## 228 predictors
## 
## Pre-processing: centered (228), scaled (228) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 856, 855, 857, 856, 856, 855, ... 
## Resampling results across tuning parameters:
## 
##   lambda      RMSE       Rsquared   MAE      
##   0.00000000  0.7169416  0.8793300  0.5298092
##   0.01111111  0.6913735  0.8869082  0.5232301
##   0.02222222  0.6860086  0.8887434  0.5217135
##   0.03333333  0.6861042  0.8889600  0.5236471
##   0.04444444  0.6884148  0.8885888  0.5269698
##   0.05555556  0.6919052  0.8879560  0.5305611
##   0.06666667  0.6961413  0.8871974  0.5343997
##   0.07777778  0.7009117  0.8863787  0.5385735
##   0.08888889  0.7061028  0.8855344  0.5430394
##   0.10000000  0.7116491  0.8846838  0.5480085
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was lambda = 0.02222222.
#check the names of output in ridgeTune
names(ridgeTune)
##  [1] "method"       "modelInfo"    "modelType"    "results"      "pred"        
##  [6] "bestTune"     "call"         "dots"         "metric"       "control"     
## [11] "finalModel"   "preProcess"   "trainingData" "ptype"        "resample"    
## [16] "resampledCM"  "perfNames"    "maximize"     "yLimits"      "times"       
## [21] "levels"
summary(ridgeTune)
##             Length Class      Mode     
## call            4  -none-     call     
## actions       261  -none-     list     
## allset        228  -none-     numeric  
## beta.pure   59508  -none-     numeric  
## vn            228  -none-     character
## mu              1  -none-     numeric  
## normx         228  -none-     numeric  
## meanx         228  -none-     numeric  
## lambda          1  -none-     numeric  
## L1norm        261  -none-     numeric  
## penalty       261  -none-     numeric  
## df            261  -none-     numeric  
## Cp            261  -none-     numeric  
## sigma2          1  -none-     numeric  
## xNames        228  -none-     character
## problemType     1  -none-     character
## tuneValue       1  data.frame list     
## obsLevels       1  -none-     logical  
## param           0  -none-     list
#prediction for test data
testResults$Ridge <- predict(ridgeTune, solTestXtrans)

ENET

ptm <- proc.time()
enetGrid <- expand.grid(lambda = c(0, 0.01, .1), 
                        fraction = seq(.05, 1, length = 20))
set.seed(100)
enetTune <- train(x = solTrainXtrans, y = solTrainY,
                  method = "enet",
                  tuneGrid = enetGrid,
                  trControl = ctrl,
                  preProc = c("center", "scale"))
proc.time() - ptm
##    user  system elapsed 
##   31.66    6.41   50.28
### Stop the clock
#> proc.time() - ptm
#   user  system elapsed 
#  34.31    0.48   34.89 


enetTune
## Elasticnet 
## 
## 951 samples
## 228 predictors
## 
## Pre-processing: centered (228), scaled (228) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 856, 855, 857, 856, 856, 855, ... 
## Resampling results across tuning parameters:
## 
##   lambda  fraction  RMSE       Rsquared   MAE      
##   0.00    0.05      0.8682135  0.8364159  0.6582205
##   0.00    0.10      0.6917546  0.8861422  0.5272791
##   0.00    0.15      0.6784548  0.8902877  0.5179525
##   0.00    0.20      0.6774425  0.8907722  0.5158738
##   0.00    0.25      0.6860148  0.8881153  0.5177716
##   0.00    0.30      0.6910045  0.8865193  0.5205346
##   0.00    0.35      0.6966432  0.8848125  0.5246741
##   0.00    0.40      0.7023375  0.8830881  0.5278990
##   0.00    0.45      0.7052740  0.8822021  0.5285138
##   0.00    0.50      0.7059831  0.8820328  0.5284400
##   0.00    0.55      0.7065024  0.8819233  0.5281518
##   0.00    0.60      0.7065114  0.8820017  0.5275396
##   0.00    0.65      0.7074118  0.8818045  0.5272275
##   0.00    0.70      0.7082961  0.8815922  0.5270556
##   0.00    0.75      0.7091942  0.8813839  0.5269874
##   0.00    0.80      0.7102416  0.8811257  0.5270035
##   0.00    0.85      0.7114872  0.8808167  0.5274405
##   0.00    0.90      0.7130500  0.8804047  0.5281161
##   0.00    0.95      0.7149097  0.8798966  0.5288774
##   0.00    1.00      0.7169416  0.8793300  0.5298092
##   0.01    0.05      1.5159564  0.6476354  1.1634495
##   0.01    0.10      1.1319979  0.7709849  0.8660515
##   0.01    0.15      0.9064708  0.8256600  0.6871343
##   0.01    0.20      0.7865777  0.8580364  0.6008124
##   0.01    0.25      0.7294129  0.8746693  0.5563058
##   0.01    0.30      0.7005759  0.8833681  0.5351833
##   0.01    0.35      0.6893917  0.8866891  0.5271555
##   0.01    0.40      0.6841276  0.8883835  0.5239676
##   0.01    0.45      0.6808591  0.8894713  0.5217575
##   0.01    0.50      0.6788489  0.8901183  0.5200594
##   0.01    0.55      0.6765076  0.8909486  0.5182244
##   0.01    0.60      0.6760440  0.8911677  0.5171115
##   0.01    0.65      0.6773673  0.8908049  0.5173328
##   0.01    0.70      0.6789357  0.8903792  0.5176779
##   0.01    0.75      0.6806104  0.8899249  0.5181852
##   0.01    0.80      0.6819248  0.8895841  0.5183361
##   0.01    0.85      0.6836298  0.8891156  0.5190363
##   0.01    0.90      0.6861949  0.8883789  0.5203764
##   0.01    0.95      0.6892478  0.8874946  0.5219250
##   0.01    1.00      0.6925607  0.8865235  0.5237541
##   0.10    0.05      1.6865021  0.5163010  1.2948592
##   0.10    0.10      1.4048400  0.6996155  1.0753819
##   0.10    0.15      1.1687872  0.7629706  0.8924411
##   0.10    0.20      1.0067836  0.7907592  0.7641578
##   0.10    0.25      0.8947633  0.8229558  0.6770513
##   0.10    0.30      0.8201908  0.8439511  0.6246190
##   0.10    0.35      0.7759957  0.8570273  0.5971968
##   0.10    0.40      0.7515618  0.8651763  0.5779286
##   0.10    0.45      0.7330964  0.8719902  0.5628857
##   0.10    0.50      0.7239567  0.8758243  0.5577499
##   0.10    0.55      0.7191286  0.8780581  0.5554838
##   0.10    0.60      0.7168852  0.8794061  0.5549866
##   0.10    0.65      0.7147614  0.8807592  0.5536802
##   0.10    0.70      0.7142090  0.8815578  0.5529367
##   0.10    0.75      0.7136933  0.8822009  0.5517154
##   0.10    0.80      0.7129726  0.8828558  0.5505901
##   0.10    0.85      0.7125100  0.8833964  0.5497964
##   0.10    0.90      0.7121827  0.8838695  0.5491697
##   0.10    0.95      0.7119464  0.8842824  0.5484685
##   0.10    1.00      0.7116491  0.8846838  0.5480085
## 
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were fraction = 0.6 and lambda = 0.01.
names(enetTune)
##  [1] "method"       "modelInfo"    "modelType"    "results"      "pred"        
##  [6] "bestTune"     "call"         "dots"         "metric"       "control"     
## [11] "finalModel"   "preProcess"   "trainingData" "ptype"        "resample"    
## [16] "resampledCM"  "perfNames"    "maximize"     "yLimits"      "times"       
## [21] "levels"
#prediction for test data
testResults$ENET <- predict(enetTune, solTestXtrans)

#Which model has the best predictive ability

### create empty spaces to save the values of R2 and RMSE                  
R2 <-RMSE<-MAE<- numeric(0)

#Linear regression model
testResults$LRM<- predict(lmTune0, solTestXtrans)
R2[1] = cor(testResults$LRM, solTestY)^2
RMSE[1] = sqrt(mean((testResults$LRM - solTestY)^2))
MAE[1] = mean(abs(testResults$LRM - solTestY))

#PCR
testResults$PCR <- predict(pcrTune, solTestXtrans)
R2[2] = cor(testResults$PCR, solTestY)^2
RMSE[2] = sqrt(mean((testResults$PCR - solTestY)^2))
MAE[2] = mean(abs(testResults$PCR - solTestY))

#PLS
testResults$PLS <- predict(plsTune, solTestXtrans)
R2[3] = cor(testResults$PLS, solTestY)^2
RMSE[3] = sqrt(mean((testResults$PLS - solTestY)^2))
MAE[3] = mean(abs(testResults$PLS - solTestY))

#Ridge regression
testResults$Ridge <- predict(ridgeTune, solTestXtrans)
R2[4] = cor(testResults$Ridge, solTestY)^2
RMSE[4] = sqrt(mean((testResults$Ridge - solTestY)^2))
MAE[4] = mean(abs(testResults$Ridge - solTestY))

#ENET regression
testResults$ENET <- predict(enetTune, solTestXtrans)
R2[5] = cor(testResults$ENET, solTestY)^2
RMSE[5] = sqrt(mean((testResults$ENET - solTestY)^2))
MAE[5] = mean(abs(testResults$ENET - solTestY))

results = cbind(R2, RMSE, MAE)
row.names(results) = c("LRM", "PCR", "PLS", "Ridge", "ENET")
results
##              R2      RMSE       MAE
## LRM   0.8722236 0.7455802 0.5497605
## PCR   0.8551094 0.7920149 0.6030362
## PLS   0.8805903 0.7192161 0.5401932
## Ridge 0.8801309 0.7215616 0.5369285
## ENET  0.8841315 0.7072226 0.5300267

You may conclude the ENET performs the best in terms of RMSE and MAE.