6.2

(a)

## Loading required package: lattice
## Loading required package: ggplot2
## Loading required package: Rcpp
## ## 
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.6, built: 2019-11-24)
## ## Copyright (C) 2005-2020 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##

(b)

fingerprints dataset contains 1107 columns and 165 rows. 179 columns have near zero variance and 388 columns have non zero variance.

near0 <- nearZeroVar(fingerprints)
length(near0)
## [1] 719
ncol(fingerprints[, -near0])
## [1] 388

(c)

The optimal number of variable included in the PLS model is 10. This captures 72.48% of the variation in the predictors and 84.54% of the variation in the outcome variable. R^2 value for 10 components is 0.5534189

finger_data <- fingerprints[, -near0]
set.seed(1)
dp <- createDataPartition(permeability, p = 0.8, list = F, times = 1)

xtrain  <- finger_data[dp, ]
xtest <- finger_data[-dp, ] 
ytrain  <- permeability[dp, ]
ytest <- permeability[-dp, ] 
ctrl <- trainControl(method = "cv", number = 10)

pmod <- train(x = xtrain, y = ytrain, 
              method = "pls", tuneLength = 20, 
              trControl = ctrl, 
              preProc = c("center","scale"))

plot(pmod)

pmod$bestTune
summary(pmod$finalModel)
## Data:    X dimension: 133 388 
##  Y dimension: 133 1
## Fit method: oscorespls
## Number of components considered: 10
## TRAINING: % variance explained
##           1 comps  2 comps  3 comps  4 comps  5 comps  6 comps  7 comps
## X           21.09    34.70    40.33    45.02    51.49    58.92    62.07
## .outcome    34.57    51.73    59.70    66.80    72.46    76.41    79.79
##           8 comps  9 comps  10 comps
## X           66.69    69.49     72.48
## .outcome    81.40    83.47     84.54

(d)

R^2 value is 0.3274115, which is worse than the train set R^2. Model might be overfitting to the train set.

predictions <-  predict(pmod, xtest)

#RMSE
RMSE(predictions, ytest)
## [1] 14.6758
#R2
R2(predictions, ytest)
## [1] 0.3274115

(e)

Based on the R2 values below model, elastic net seems to be a better fit followed by ridge regression.

Elastic Net R2 : 0.5260718 Ridge Regression R2 : 0.5174488

set.seed(100) 
ridgeGrid <- data.frame(.lambda = seq(0, .1, length = 15)) 
dp <- createDataPartition(permeability, p=0.8, list=FALSE)
xtrain <- finger_data[dp, ]
ytrain <- permeability[dp, ]
xtest <- finger_data[-dp, ]
ytest <- permeability[-dp, ]

enetmodel<- train(x=xtrain,y=ytrain,method='enet',
                metric='Rsquared',
                tuneLength=20,
                trControl=trainControl(method='cv'),
                preProcess=c('center', 'scale','knnImpute')
                )
## Warning: model fit failed for Fold08: lambda=0.0000000, fraction=1 Error in if (zmin < gamhat) { : missing value where TRUE/FALSE needed
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
enetmodel
## Elasticnet 
## 
## 133 samples
## 388 predictors
## 
## Pre-processing: centered (388), scaled (388), nearest neighbor imputation (388) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 119, 121, 119, 120, 120, 120, ... 
## Resampling results across tuning parameters:
## 
##   lambda        fraction  RMSE          Rsquared   MAE         
##   0.0000000000  0.05          11.73026  0.5112930  8.880953e+00
##   0.0000000000  0.10          10.96060  0.5356455  8.156204e+00
##   0.0000000000  0.15          10.57113  0.5514554  7.781082e+00
##   0.0000000000  0.20          10.46411  0.5488368  7.722965e+00
##   0.0000000000  0.25          10.43059  0.5472689  7.662265e+00
##   0.0000000000  0.30          10.53928  0.5346824  7.712603e+00
##   0.0000000000  0.35          10.62082  0.5278685  7.739882e+00
##   0.0000000000  0.40          10.68500  0.5234071  7.873670e+00
##   0.0000000000  0.45          10.80291  0.5180283  8.019113e+00
##   0.0000000000  0.50          10.84283  0.5167099  8.058093e+00
##   0.0000000000  0.55          10.91289  0.5142547  8.107383e+00
##   0.0000000000  0.60          11.08055  0.5068495  8.252246e+00
##   0.0000000000  0.65          11.22801  0.5017611  8.395168e+00
##   0.0000000000  0.70          11.41195  0.4954862  8.578705e+00
##   0.0000000000  0.75          11.56735  0.4899989  8.715175e+00
##   0.0000000000  0.80          11.74535  0.4825328  8.842491e+00
##   0.0000000000  0.85          11.94446  0.4747595  8.975029e+00
##   0.0000000000  0.90          12.16190  0.4665919  9.122924e+00
##   0.0000000000  0.95          12.40922  0.4572871  9.300176e+00
##   0.0000000000  1.00          12.58892  0.4519220  9.346084e+00
##   0.0001000000  0.05         129.49363  0.2214589  6.781962e+01
##   0.0001000000  0.10         231.65992  0.2410610  1.209907e+02
##   0.0001000000  0.15         318.11068  0.2255764  1.720461e+02
##   0.0001000000  0.20         406.33944  0.2220196  2.255537e+02
##   0.0001000000  0.25         499.61126  0.2128017  2.792479e+02
##   0.0001000000  0.30         596.18842  0.1975489  3.325437e+02
##   0.0001000000  0.35         695.90779  0.1774370  3.863829e+02
##   0.0001000000  0.40         796.35336  0.1644050  4.400536e+02
##   0.0001000000  0.45         897.17326  0.1470067  4.933855e+02
##   0.0001000000  0.50         999.01415  0.1316774  5.462772e+02
##   0.0001000000  0.55        1100.79542  0.1236729  5.987292e+02
##   0.0001000000  0.60        1199.30351  0.1191616  6.491226e+02
##   0.0001000000  0.65        1294.23367  0.1184123  6.977653e+02
##   0.0001000000  0.70        1389.17023  0.1163315  7.462313e+02
##   0.0001000000  0.75        1484.12888  0.1137081  7.948504e+02
##   0.0001000000  0.80        1575.93324  0.1196733  8.416750e+02
##   0.0001000000  0.85        1666.04132  0.1239209  8.876706e+02
##   0.0001000000  0.90        1751.68372  0.1249225  9.315195e+02
##   0.0001000000  0.95        1832.48501  0.1260581  9.730975e+02
##   0.0001000000  1.00        1913.11248  0.1249479  1.014437e+03
##   0.0001467799  0.05        1000.48299  0.2530552  5.423035e+02
##   0.0001467799  0.10        1884.87769  0.2512423  1.027855e+03
##   0.0001467799  0.15        2707.69410  0.2422539  1.481979e+03
##   0.0001467799  0.20        3494.54974  0.2317828  1.914931e+03
##   0.0001467799  0.25        4279.83810  0.2210954  2.345487e+03
##   0.0001467799  0.30        4984.55002  0.2069287  2.728794e+03
##   0.0001467799  0.35        5683.97125  0.1936977  3.102438e+03
##   0.0001467799  0.40        6372.22299  0.1839923  3.458577e+03
##   0.0001467799  0.45        7039.66830  0.1693264  3.792883e+03
##   0.0001467799  0.50        7680.72308  0.1609887  4.086602e+03
##   0.0001467799  0.55        8322.05743  0.1529655  4.357807e+03
##   0.0001467799  0.60        8973.26649  0.1486384  4.740340e+03
##   0.0001467799  0.65        9602.54193  0.1422412  5.093666e+03
##   0.0001467799  0.70       10239.69853  0.1331670  5.433680e+03
##   0.0001467799  0.75       10865.53893  0.1255718  5.764683e+03
##   0.0001467799  0.80       11482.30187  0.1168392  6.080062e+03
##   0.0001467799  0.85       12097.53780  0.1121511  6.394341e+03
##   0.0001467799  0.90       12702.13036  0.1091484  6.700405e+03
##   0.0001467799  0.95       13291.80405  0.1055558  6.995199e+03
##   0.0001467799  1.00       13879.52277  0.1033603  7.288950e+03
##   0.0002154435  0.05       12699.28735  0.3166914  7.629310e+03
##   0.0002154435  0.10       22810.92097  0.3332481  1.381147e+04
##   0.0002154435  0.15       33379.05994  0.3231521  2.009753e+04
##   0.0002154435  0.20       43079.20289  0.3193254  2.598324e+04
##   0.0002154435  0.25       52769.40167  0.3112939  3.189438e+04
##   0.0002154435  0.30       62607.21143  0.2993253  3.785886e+04
##   0.0002154435  0.35       72528.98732  0.2863848  4.386765e+04
##   0.0002154435  0.40       82457.12288  0.2676579  4.987409e+04
##   0.0002154435  0.45       92063.17913  0.2425733  5.568503e+04
##   0.0002154435  0.50      100507.42922  0.2184252  6.095065e+04
##   0.0002154435  0.55      108270.27255  0.1913246  6.588700e+04
##   0.0002154435  0.60      116032.73320  0.1725215  7.080471e+04
##   0.0002154435  0.65      122977.82080  0.1583546  7.533387e+04
##   0.0002154435  0.70      129932.57414  0.1453684  7.977296e+04
##   0.0002154435  0.75      137238.13384  0.1340609  8.432719e+04
##   0.0002154435  0.80      144574.11743  0.1240208  8.882496e+04
##   0.0002154435  0.85      151551.05517  0.1130956  9.312778e+04
##   0.0002154435  0.90      158886.59065  0.1120048  9.762228e+04
##   0.0002154435  0.95      166981.91515  0.1130281  1.024669e+05
##   0.0002154435  1.00      175449.36812  0.1112229  1.074482e+05
##   0.0003162278  0.05          31.30934  0.4607036  1.912979e+01
##   0.0003162278  0.10          43.88456  0.4630086  2.847367e+01
##   0.0003162278  0.15          58.27657  0.4464020  3.903070e+01
##   0.0003162278  0.20          75.05823  0.4236235  5.174140e+01
##   0.0003162278  0.25          92.34641  0.3987150  6.480044e+01
##   0.0003162278  0.30         109.40882  0.3736252  7.710401e+01
##   0.0003162278  0.35         126.67606  0.3437560  8.933810e+01
##   0.0003162278  0.40         146.29682  0.3151006  1.028918e+02
##   0.0003162278  0.45         178.15888  0.2853031  1.229209e+02
##   0.0003162278  0.50         210.92601  0.2588831  1.421932e+02
##   0.0003162278  0.55         244.38817  0.2348774  1.610821e+02
##   0.0003162278  0.60         277.55044  0.2153709  1.793726e+02
##   0.0003162278  0.65         311.01230  0.2008296  1.985066e+02
##   0.0003162278  0.70         342.53784  0.1880802  2.163537e+02
##   0.0003162278  0.75         372.12129  0.1739390  2.327578e+02
##   0.0003162278  0.80         401.32922  0.1663224  2.490218e+02
##   0.0003162278  0.85         430.09411  0.1640584  2.650725e+02
##   0.0003162278  0.90         458.65386  0.1626614  2.808157e+02
##   0.0003162278  0.95         486.81258  0.1600761  2.961530e+02
##   0.0003162278  1.00         514.66187  0.1571215  3.112145e+02
##   0.0004641589  0.05        1449.98992  0.4175101  7.603304e+02
##   0.0004641589  0.10        2816.99025  0.4391738  1.480176e+03
##   0.0004641589  0.15        4142.28405  0.4256555  2.177221e+03
##   0.0004641589  0.20        5460.28428  0.4112536  2.868295e+03
##   0.0004641589  0.25        6779.37064  0.3984380  3.559323e+03
##   0.0004641589  0.30        8098.79019  0.3843961  4.249961e+03
##   0.0004641589  0.35        9394.84411  0.3641405  4.927150e+03
##   0.0004641589  0.40       10672.55710  0.3466935  5.593874e+03
##   0.0004641589  0.45       11989.60269  0.3268408  6.279629e+03
##   0.0004641589  0.50       13362.25602  0.3034455  6.989181e+03
##   0.0004641589  0.55       14686.62687  0.2818788  7.672847e+03
##   0.0004641589  0.60       16003.74729  0.2562062  8.353101e+03
##   0.0004641589  0.65       17321.42590  0.2401037  9.033376e+03
##   0.0004641589  0.70       18636.84667  0.2260205  9.711193e+03
##   0.0004641589  0.75       19917.12225  0.2111692  1.037077e+04
##   0.0004641589  0.80       21180.79170  0.2044938  1.101872e+04
##   0.0004641589  0.85       22438.70500  0.2019479  1.166299e+04
##   0.0004641589  0.90       23701.10393  0.1986715  1.230961e+04
##   0.0004641589  0.95       24966.15560  0.1952596  1.295612e+04
##   0.0004641589  1.00       26269.36403  0.1924903  1.362076e+04
##   0.0006812921  0.05       15202.39436  0.4568734  7.739529e+03
##   0.0006812921  0.10       27793.82374  0.4907443  1.434650e+04
##   0.0006812921  0.15       40492.69007  0.4840522  2.091724e+04
##   0.0006812921  0.20       53041.93118  0.4719306  2.748762e+04
##   0.0006812921  0.25       65406.46570  0.4618247  3.414555e+04
##   0.0006812921  0.30       77346.31015  0.4520317  4.064400e+04
##   0.0006812921  0.35       89037.49818  0.4330823  4.701525e+04
##   0.0006812921  0.40      100732.43530  0.4109373  5.338498e+04
##   0.0006812921  0.45      112425.25454  0.3898246  5.977154e+04
##   0.0006812921  0.50      123913.88433  0.3660525  6.604759e+04
##   0.0006812921  0.55      135399.08604  0.3363738  7.231555e+04
##   0.0006812921  0.60      146779.12135  0.3103539  7.853528e+04
##   0.0006812921  0.65      157219.60211  0.2863484  8.429266e+04
##   0.0006812921  0.70      167534.72350  0.2681377  8.994914e+04
##   0.0006812921  0.75      177708.94878  0.2519002  9.553771e+04
##   0.0006812921  0.80      187952.00754  0.2387073  1.011566e+05
##   0.0006812921  0.85      198291.83491  0.2257589  1.068013e+05
##   0.0006812921  0.90      208258.93692  0.2169298  1.122287e+05
##   0.0006812921  0.95      218021.33095  0.2126031  1.175453e+05
##   0.0006812921  1.00      227716.52056  0.2077198  1.228097e+05
##   0.0010000000  0.05         246.66872  0.4892400  1.181342e+02
##   0.0010000000  0.10         448.04571  0.5215124  2.155686e+02
##   0.0010000000  0.15         639.18278  0.5187544  3.081160e+02
##   0.0010000000  0.20         820.87656  0.5033338  3.960252e+02
##   0.0010000000  0.25        1002.59902  0.4858980  4.837712e+02
##   0.0010000000  0.30        1184.62117  0.4642999  5.717771e+02
##   0.0010000000  0.35        1367.06023  0.4436638  6.600518e+02
##   0.0010000000  0.40        1550.30411  0.4227287  7.487830e+02
##   0.0010000000  0.45        1732.29720  0.4059457  8.366983e+02
##   0.0010000000  0.50        1910.66306  0.3869240  9.219627e+02
##   0.0010000000  0.55        2089.07965  0.3649237  1.007230e+03
##   0.0010000000  0.60        2275.91882  0.3443069  1.097332e+03
##   0.0010000000  0.65        2459.91385  0.3216433  1.186626e+03
##   0.0010000000  0.70        2638.53644  0.2979140  1.271849e+03
##   0.0010000000  0.75        2812.47551  0.2804666  1.354641e+03
##   0.0010000000  0.80        2986.41973  0.2676600  1.437496e+03
##   0.0010000000  0.85        3159.74388  0.2515377  1.520850e+03
##   0.0010000000  0.90        3326.46211  0.2402413  1.601640e+03
##   0.0010000000  0.95        3492.39436  0.2315944  1.682008e+03
##   0.0010000000  1.00        3657.24133  0.2243417  1.759983e+03
##   0.0014677993  0.05         775.97009  0.3875490  5.212216e+02
##   0.0014677993  0.10        1662.65668  0.4141710  1.120952e+03
##   0.0014677993  0.15        2532.24969  0.4108568  1.700955e+03
##   0.0014677993  0.20        3365.47482  0.3981198  2.255316e+03
##   0.0014677993  0.25        4204.20528  0.3883312  2.814143e+03
##   0.0014677993  0.30        5051.88915  0.3811476  3.378064e+03
##   0.0014677993  0.35        5890.81313  0.3720068  3.935281e+03
##   0.0014677993  0.40        6726.29864  0.3639599  4.489978e+03
##   0.0014677993  0.45        7589.11924  0.3511131  5.061549e+03
##   0.0014677993  0.50        8454.65705  0.3386886  5.634972e+03
##   0.0014677993  0.55        9322.02428  0.3278018  6.209742e+03
##   0.0014677993  0.60       10189.93745  0.3120999  6.784621e+03
##   0.0014677993  0.65       11043.04686  0.2973018  7.349880e+03
##   0.0014677993  0.70       11899.28776  0.2824318  7.917763e+03
##   0.0014677993  0.75       12766.08153  0.2671000  8.494642e+03
##   0.0014677993  0.80       13644.61059  0.2555518  9.079919e+03
##   0.0014677993  0.85       14520.44665  0.2484996  9.663155e+03
##   0.0014677993  0.90       15348.19851  0.2410916  1.021362e+04
##   0.0014677993  0.95       16150.58069  0.2325383  1.074769e+04
##   0.0014677993  1.00       16947.29789  0.2249940  1.127704e+04
##   0.0021544347  0.05         205.24610  0.4456061  1.114135e+02
##   0.0021544347  0.10         392.19285  0.4850667  2.121032e+02
##   0.0021544347  0.15         577.27185  0.4897111  3.119985e+02
##   0.0021544347  0.20         758.69648  0.4789021  4.107415e+02
##   0.0021544347  0.25         940.45135  0.4667007  5.091743e+02
##   0.0021544347  0.30        1121.91770  0.4500824  6.073445e+02
##   0.0021544347  0.35        1302.60148  0.4373027  7.046563e+02
##   0.0021544347  0.40        1480.99046  0.4257033  8.021905e+02
##   0.0021544347  0.45        1658.48672  0.4073388  8.999217e+02
##   0.0021544347  0.50        1835.79788  0.3896755  9.974852e+02
##   0.0021544347  0.55        2012.91122  0.3770597  1.094838e+03
##   0.0021544347  0.60        2190.09589  0.3598401  1.192180e+03
##   0.0021544347  0.65        2367.31729  0.3425238  1.289501e+03
##   0.0021544347  0.70        2544.50407  0.3248600  1.386731e+03
##   0.0021544347  0.75        2721.64294  0.3093395  1.483941e+03
##   0.0021544347  0.80        2898.69976  0.2970171  1.581088e+03
##   0.0021544347  0.85        3075.58971  0.2884081  1.678141e+03
##   0.0021544347  0.90        3252.48584  0.2805309  1.775184e+03
##   0.0021544347  0.95        3429.26214  0.2719010  1.872118e+03
##   0.0021544347  1.00        3605.96702  0.2645014  1.968988e+03
##   0.0031622777  0.05         188.67302  0.4346400  1.196818e+02
##   0.0031622777  0.10         363.57651  0.4617011  2.311745e+02
##   0.0031622777  0.15         530.90825  0.4701175  3.378439e+02
##   0.0031622777  0.20         687.88264  0.4626100  4.346974e+02
##   0.0031622777  0.25         827.29123  0.4440860  5.208155e+02
##   0.0031622777  0.30         965.08596  0.4316220  6.061613e+02
##   0.0031622777  0.35        1102.67160  0.4213565  6.919498e+02
##   0.0031622777  0.40        1240.35469  0.4104015  7.799244e+02
##   0.0031622777  0.45        1378.82725  0.3932768  8.687357e+02
##   0.0031622777  0.50        1519.15270  0.3758102  9.581927e+02
##   0.0031622777  0.55        1659.91847  0.3572522  1.047278e+03
##   0.0031622777  0.60        1800.66386  0.3422289  1.136258e+03
##   0.0031622777  0.65        1941.43624  0.3295896  1.225193e+03
##   0.0031622777  0.70        2082.33812  0.3137405  1.314218e+03
##   0.0031622777  0.75        2223.24901  0.3008764  1.403217e+03
##   0.0031622777  0.80        2364.18677  0.2894144  1.492230e+03
##   0.0031622777  0.85        2505.15470  0.2794706  1.581231e+03
##   0.0031622777  0.90        2646.10641  0.2718022  1.670239e+03
##   0.0031622777  0.95        2787.05113  0.2664072  1.759234e+03
##   0.0031622777  1.00        2928.05132  0.2597736  1.848225e+03
##   0.0046415888  0.05         318.32099  0.4296604  2.344044e+02
##   0.0046415888  0.10         614.23175  0.4617003  4.784158e+02
##   0.0046415888  0.15         848.33140  0.4701866  6.750916e+02
##   0.0046415888  0.20        1094.86347  0.4662475  8.719617e+02
##   0.0046415888  0.25        1340.81841  0.4605304  1.064184e+03
##   0.0046415888  0.30        1584.53102  0.4478026  1.252077e+03
##   0.0046415888  0.35        1828.35093  0.4317517  1.441328e+03
##   0.0046415888  0.40        2072.80367  0.4161804  1.629844e+03
##   0.0046415888  0.45        2305.66048  0.4030888  1.811358e+03
##   0.0046415888  0.50        2539.32084  0.3858688  1.991711e+03
##   0.0046415888  0.55        2777.74596  0.3717939  2.169618e+03
##   0.0046415888  0.60        3014.40143  0.3594848  2.342953e+03
##   0.0046415888  0.65        3254.48506  0.3494057  2.520749e+03
##   0.0046415888  0.70        3497.74819  0.3399113  2.702955e+03
##   0.0046415888  0.75        3743.12265  0.3292750  2.886047e+03
##   0.0046415888  0.80        4004.19349  0.3181564  3.076278e+03
##   0.0046415888  0.85        4260.60871  0.3074471  3.262650e+03
##   0.0046415888  0.90        4495.76207  0.2985095  3.436852e+03
##   0.0046415888  0.95        4722.40695  0.2917455  3.606372e+03
##   0.0046415888  1.00        4953.74771  0.2846376  3.777918e+03
##   0.0068129207  0.05         603.13491  0.4827650  3.908896e+02
##   0.0068129207  0.10        1137.95832  0.5249078  7.377858e+02
##   0.0068129207  0.15        1674.12432  0.5322026  1.084942e+03
##   0.0068129207  0.20        2212.26423  0.5210346  1.431009e+03
##   0.0068129207  0.25        2750.45893  0.5136112  1.777070e+03
##   0.0068129207  0.30        3288.95376  0.4993016  2.123235e+03
##   0.0068129207  0.35        3827.63315  0.4821651  2.469499e+03
##   0.0068129207  0.40        4366.30629  0.4673629  2.815771e+03
##   0.0068129207  0.45        4904.97181  0.4556873  3.162020e+03
##   0.0068129207  0.50        5438.63882  0.4456865  3.505443e+03
##   0.0068129207  0.55        5963.50426  0.4319849  3.843796e+03
##   0.0068129207  0.60        6488.40950  0.4201895  4.182112e+03
##   0.0068129207  0.65        7013.30617  0.4102068  4.520453e+03
##   0.0068129207  0.70        7538.21752  0.4022256  4.858779e+03
##   0.0068129207  0.75        8063.11394  0.3949867  5.197046e+03
##   0.0068129207  0.80        8588.06942  0.3848236  5.535327e+03
##   0.0068129207  0.85        9113.03955  0.3761710  5.873632e+03
##   0.0068129207  0.90        9635.44732  0.3685180  6.210530e+03
##   0.0068129207  0.95       10140.10454  0.3614760  6.536466e+03
##   0.0068129207  1.00       10641.02425  0.3550964  6.859991e+03
##   0.0100000000  0.05          11.29834  0.4796733  8.076048e+00
##   0.0100000000  0.10          10.87626  0.5036502  7.780892e+00
##   0.0100000000  0.15          10.68853  0.5202723  7.903218e+00
##   0.0100000000  0.20          10.80087  0.5154918  8.089678e+00
##   0.0100000000  0.25          10.97174  0.5071605  8.203831e+00
##   0.0100000000  0.30          11.15696  0.4949747  8.434626e+00
##   0.0100000000  0.35          11.41231  0.4802925  8.640441e+00
##   0.0100000000  0.40          11.63302  0.4686331  8.850980e+00
##   0.0100000000  0.45          11.83618  0.4586729  9.032995e+00
##   0.0100000000  0.50          12.08237  0.4480545  9.235739e+00
##   0.0100000000  0.55          12.34545  0.4364731  9.427570e+00
##   0.0100000000  0.60          12.61046  0.4239076  9.640066e+00
##   0.0100000000  0.65          12.83792  0.4145138  9.864298e+00
##   0.0100000000  0.70          13.02992  0.4074144  1.006788e+01
##   0.0100000000  0.75          13.23825  0.3991008  1.023228e+01
##   0.0100000000  0.80          13.44113  0.3905248  1.040632e+01
##   0.0100000000  0.85          13.64563  0.3819838  1.057811e+01
##   0.0100000000  0.90          13.83363  0.3744453  1.073160e+01
##   0.0100000000  0.95          14.01363  0.3676065  1.087797e+01
##   0.0100000000  1.00          14.17464  0.3612613  1.100950e+01
##   0.0146779927  0.05          11.40594  0.4771448  8.271002e+00
##   0.0146779927  0.10          10.95729  0.4965565  7.791458e+00
##   0.0146779927  0.15          10.69917  0.5204905  7.856950e+00
##   0.0146779927  0.20          10.74037  0.5221544  8.029186e+00
##   0.0146779927  0.25          10.91493  0.5144603  8.158566e+00
##   0.0146779927  0.30          10.99702  0.5102279  8.225515e+00
##   0.0146779927  0.35          11.23636  0.4945261  8.452339e+00
##   0.0146779927  0.40          11.48136  0.4794289  8.661418e+00
##   0.0146779927  0.45          11.66890  0.4689460  8.823483e+00
##   0.0146779927  0.50          11.80194  0.4629783  8.911880e+00
##   0.0146779927  0.55          11.98273  0.4551318  9.073445e+00
##   0.0146779927  0.60          12.17358  0.4462614  9.271517e+00
##   0.0146779927  0.65          12.36920  0.4371468  9.463801e+00
##   0.0146779927  0.70          12.53216  0.4301381  9.628363e+00
##   0.0146779927  0.75          12.70427  0.4217997  9.800881e+00
##   0.0146779927  0.80          12.86940  0.4137668  9.953121e+00
##   0.0146779927  0.85          13.01690  0.4065135  1.006999e+01
##   0.0146779927  0.90          13.15913  0.3998447  1.020093e+01
##   0.0146779927  0.95          13.31056  0.3931108  1.033477e+01
##   0.0146779927  1.00          13.45090  0.3868010  1.046140e+01
##   0.0215443469  0.05          11.53570  0.4711752  8.474404e+00
##   0.0215443469  0.10          11.02967  0.4911782  7.768018e+00
##   0.0215443469  0.15          10.77500  0.5140922  7.818451e+00
##   0.0215443469  0.20          10.66846  0.5275783  7.926709e+00
##   0.0215443469  0.25          10.82474  0.5177926  8.107751e+00
##   0.0215443469  0.30          10.92077  0.5149211  8.198932e+00
##   0.0215443469  0.35          11.03158  0.5085305  8.312899e+00
##   0.0215443469  0.40          11.27002  0.4928566  8.490469e+00
##   0.0215443469  0.45          11.45692  0.4810452  8.647676e+00
##   0.0215443469  0.50          11.60504  0.4721592  8.778292e+00
##   0.0215443469  0.55          11.75377  0.4647739  8.903048e+00
##   0.0215443469  0.60          11.90192  0.4588195  9.049630e+00
##   0.0215443469  0.65          12.05233  0.4518967  9.204383e+00
##   0.0215443469  0.70          12.19845  0.4457200  9.345317e+00
##   0.0215443469  0.75          12.34000  0.4396185  9.485869e+00
##   0.0215443469  0.80          12.46937  0.4335937  9.600438e+00
##   0.0215443469  0.85          12.58611  0.4280684  9.704150e+00
##   0.0215443469  0.90          12.69986  0.4222588  9.798415e+00
##   0.0215443469  0.95          12.82048  0.4162308  9.901575e+00
##   0.0215443469  1.00          12.92701  0.4108419  9.998926e+00
##   0.0316227766  0.05          11.57860  0.4768285  8.568532e+00
##   0.0316227766  0.10          11.02619  0.4933007  7.710450e+00
##   0.0316227766  0.15          10.78639  0.5105980  7.756459e+00
##   0.0316227766  0.20          10.67788  0.5236193  7.857510e+00
##   0.0316227766  0.25          10.77487  0.5212057  8.032874e+00
##   0.0316227766  0.30          10.91919  0.5121925  8.146848e+00
##   0.0316227766  0.35          11.00861  0.5092312  8.255047e+00
##   0.0316227766  0.40          11.12213  0.5026624  8.364224e+00
##   0.0316227766  0.45          11.30673  0.4900217  8.507484e+00
##   0.0316227766  0.50          11.46235  0.4806370  8.624876e+00
##   0.0316227766  0.55          11.59829  0.4733022  8.741139e+00
##   0.0316227766  0.60          11.72230  0.4677667  8.844155e+00
##   0.0316227766  0.65          11.85604  0.4629895  8.992123e+00
##   0.0316227766  0.70          11.98829  0.4576117  9.141715e+00
##   0.0316227766  0.75          12.10576  0.4519980  9.262573e+00
##   0.0316227766  0.80          12.22263  0.4460353  9.377797e+00
##   0.0316227766  0.85          12.34614  0.4396814  9.495285e+00
##   0.0316227766  0.90          12.45809  0.4338052  9.590540e+00
##   0.0316227766  0.95          12.55731  0.4283673  9.670794e+00
##   0.0316227766  1.00          12.64413  0.4234744  9.745868e+00
##   0.0464158883  0.05          11.75045  0.4676929  8.774863e+00
##   0.0464158883  0.10          11.15701  0.4865227  7.784390e+00
##   0.0464158883  0.15          10.92231  0.5024384  7.755724e+00
##   0.0464158883  0.20          10.75540  0.5206385  7.841510e+00
##   0.0464158883  0.25          10.75649  0.5237036  7.967077e+00
##   0.0464158883  0.30          10.87256  0.5177596  8.112499e+00
##   0.0464158883  0.35          10.98034  0.5122208  8.216075e+00
##   0.0464158883  0.40          11.08009  0.5074762  8.303757e+00
##   0.0464158883  0.45          11.19232  0.5014066  8.390451e+00
##   0.0464158883  0.50          11.32431  0.4938889  8.485132e+00
##   0.0464158883  0.55          11.42710  0.4888996  8.567765e+00
##   0.0464158883  0.60          11.51728  0.4851493  8.651735e+00
##   0.0464158883  0.65          11.60452  0.4822867  8.745361e+00
##   0.0464158883  0.70          11.70284  0.4785988  8.855946e+00
##   0.0464158883  0.75          11.79221  0.4748525  8.962282e+00
##   0.0464158883  0.80          11.87249  0.4712862  9.053282e+00
##   0.0464158883  0.85          11.95418  0.4674740  9.135571e+00
##   0.0464158883  0.90          12.03289  0.4635807  9.214207e+00
##   0.0464158883  0.95          12.10071  0.4598085  9.275421e+00
##   0.0464158883  1.00          12.16115  0.4560467  9.326185e+00
##   0.0681292069  0.05          11.83764  0.4677199  8.902602e+00
##   0.0681292069  0.10          11.19073  0.4862580  7.835392e+00
##   0.0681292069  0.15          11.00591  0.4975823  7.720331e+00
##   0.0681292069  0.20          10.83825  0.5141903  7.808698e+00
##   0.0681292069  0.25          10.76865  0.5244606  7.906879e+00
##   0.0681292069  0.30          10.83362  0.5220736  8.038093e+00
##   0.0681292069  0.35          10.95142  0.5157257  8.162365e+00
##   0.0681292069  0.40          11.06430  0.5107080  8.266528e+00
##   0.0681292069  0.45          11.16608  0.5063722  8.353434e+00
##   0.0681292069  0.50          11.26208  0.5015397  8.426030e+00
##   0.0681292069  0.55          11.35680  0.4969997  8.494122e+00
##   0.0681292069  0.60          11.42652  0.4949126  8.566354e+00
##   0.0681292069  0.65          11.49245  0.4930455  8.630782e+00
##   0.0681292069  0.70          11.54532  0.4918683  8.713480e+00
##   0.0681292069  0.75          11.60353  0.4897735  8.787104e+00
##   0.0681292069  0.80          11.66567  0.4874048  8.853747e+00
##   0.0681292069  0.85          11.72960  0.4847556  8.919609e+00
##   0.0681292069  0.90          11.78494  0.4822123  8.976844e+00
##   0.0681292069  0.95          11.83245  0.4795878  9.025969e+00
##   0.0681292069  1.00          11.88503  0.4769072  9.069016e+00
##   0.1000000000  0.05          12.07598  0.4534109  9.207363e+00
##   0.1000000000  0.10          12.54107  0.4278885  8.990073e+00
##   0.1000000000  0.15          13.41247  0.4214700  9.575646e+00
##   0.1000000000  0.20          14.26380  0.4291633  1.042240e+01
##   0.1000000000  0.25          15.10993  0.4408117  1.124950e+01
##   0.1000000000  0.30          16.02829  0.4432588  1.209341e+01
##   0.1000000000  0.35          17.02230  0.4400193  1.292993e+01
##   0.1000000000  0.40          18.02820  0.4359747  1.376510e+01
##   0.1000000000  0.45          19.05102  0.4315621  1.458239e+01
##   0.1000000000  0.50          20.07065  0.4283229  1.540002e+01
##   0.1000000000  0.55          21.08059  0.4261541  1.621078e+01
##   0.1000000000  0.60          22.08132  0.4253826  1.701786e+01
##   0.1000000000  0.65          23.07332  0.4248939  1.781498e+01
##   0.1000000000  0.70          24.04011  0.4256316  1.859662e+01
##   0.1000000000  0.75          24.99168  0.4274110  1.937462e+01
##   0.1000000000  0.80          25.95704  0.4280401  2.014386e+01
##   0.1000000000  0.85          26.92573  0.4280844  2.092443e+01
##   0.1000000000  0.90          27.89232  0.4278613  2.170966e+01
##   0.1000000000  0.95          28.86043  0.4272247  2.248800e+01
##   0.1000000000  1.00          29.83380  0.4256524  2.326592e+01
## 
## Rsquared was used to select the optimal model using the largest value.
## The final values used for the model were fraction = 0.15 and lambda = 0.
plot(enetmodel)

predictions <-  predict(enetmodel, xtest)

#RMSE
RMSE(predictions, ytest)
## [1] 11.19975
#R2
R2(predictions, ytest)
## [1] 0.5260718
ridgeGrid <- data.frame(.lambda = seq(0, .1, length = 15)) 
set.seed(100) 
ridgeModel <- train(xtrain, ytrain, 
                    method = "ridge", tuneGrid = ridgeGrid, 
                    trControl = ctrl, preProc = c("center", "scale", "knnImpute"))

ridgeModel
## Ridge Regression 
## 
## 133 samples
## 388 predictors
## 
## Pre-processing: centered (388), scaled (388), nearest neighbor imputation (388) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 120, 120, 119, 121, 120, 119, ... 
## Resampling results across tuning parameters:
## 
##   lambda       RMSE       Rsquared   MAE      
##   0.000000000   13.56846  0.3858837  10.092990
##   0.007142857   15.60215  0.2689935  11.569288
##   0.014285714  115.84292  0.3196042  82.969549
##   0.021428571   13.70226  0.3589300  10.306764
##   0.028571429   13.43111  0.3751570  10.118236
##   0.035714286   13.10860  0.3906398   9.825295
##   0.042857143   12.83212  0.4061188   9.623303
##   0.050000000   12.70364  0.4137372   9.503849
##   0.057142857   12.57114  0.4223368   9.407583
##   0.064285714   12.39352  0.4333250   9.282137
##   0.071428571   12.32787  0.4382032   9.225255
##   0.078571429   12.22797  0.4449271   9.148902
##   0.085714286   12.16429  0.4496660   9.092823
##   0.092857143   12.11078  0.4540054   9.042764
##   0.100000000   12.07131  0.4575539   9.008745
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was lambda = 0.1.
plot(ridgeModel)

predictions <-  predict(ridgeModel, xtest)

#RMSE
RMSE(predictions, ytest)
## [1] 11.81689
#R2
R2(predictions, ytest)
## [1] 0.5174488

(f)

Based on the R2 values of different regression methods, we see that these model only explains about 50% variation in the data. Better approaches would be to fit different models with better R2 values and see if they perform any better. With the limited knowledge about the problem, we dont know enough about it come to a conclusion.

6.3

(a)

 data("ChemicalManufacturingProcess")

(b)

Below figures display missing values before and after data imputation.

Centering and scaling the data KNN to replace missing values Remove highly correlated predictors Remove near zero variance predictors

missmap(ChemicalManufacturingProcess, col = c("red", "blue"))

pdata <- preProcess(ChemicalManufacturingProcess[,-1], method = c("center", "scale", "knnImpute", "corr", "nzv")) 

chemdata <- predict(pdata, ChemicalManufacturingProcess[,-1])
missmap(chemdata, col = c("red", "blue"))

### (c) Optimal value:

The lowest point in the curve indicates the optimal lambda is : 0.007142857 and R2 : 0.4517271

set.seed(100)
dp <- createDataPartition(ChemicalManufacturingProcess$Yield, p=0.8, list=FALSE)
xtrain <- chemdata[dp, ]
ytrain <- ChemicalManufacturingProcess$Yield[dp]
xtest <- chemdata[-dp, ]
ytest <- ChemicalManufacturingProcess$Yield[-dp]

ridgeGrid <- data.frame(.lambda = seq(0, .1, length = 15)) 
ridge <- train(x=xtrain  ,
                 y=ytrain,
                 method='ridge',
                 metric='RMSE',
                 tuneGrid=ridgeGrid,
                 trControl=trainControl(method='cv'),
                 preProcess=c('center','scale', 'knnImpute')
                  )
ridge
## Ridge Regression 
## 
## 144 samples
##  56 predictor
## 
## Pre-processing: centered (56), scaled (56), nearest neighbor imputation (56) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 130, 130, 130, 130, 130, 129, ... 
## Resampling results across tuning parameters:
## 
##   lambda       RMSE      Rsquared   MAE     
##   0.000000000  2.465189  0.3529796  1.590211
##   0.007142857  1.844616  0.4517271  1.288468
##   0.014285714  1.965186  0.4633138  1.291750
##   0.021428571  2.026802  0.4738772  1.292381
##   0.028571429  2.062682  0.4808774  1.295481
##   0.035714286  2.084540  0.4855553  1.298670
##   0.042857143  2.097834  0.4887993  1.300154
##   0.050000000  2.105530  0.4911412  1.300773
##   0.057142857  2.109403  0.4928983  1.300648
##   0.064285714  2.110588  0.4942650  1.300083
##   0.071428571  2.109848  0.4953636  1.299394
##   0.078571429  2.107714  0.4962733  1.298385
##   0.085714286  2.104563  0.4970469  1.297062
##   0.092857143  2.100673  0.4977201  1.295515
##   0.100000000  2.096249  0.4983178  1.294485
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was lambda = 0.007142857.
plot(ridge)

ridge$bestTune

(d)

Test R2 is 0.16 and train R2 0.45. Model might be overfitting to the training set. RMSE = 3.202444 R2 = 0.1689975

predictions <-   predict(ridge, xtest)
#RMSE
RMSE(predictions, ytest)
## [1] 3.202444
#R2
R2(predictions, ytest)
## [1] 0.1689975

(e)

Based on table below we see that ManufacturingProcess13 and ManufacturingProcess32 dominate the list

varImp(ridge)
## loess r-squared variable importance
## 
##   only 20 most important variables shown (out of 56)
## 
##                        Overall
## ManufacturingProcess32  100.00
## ManufacturingProcess13   97.84
## BiologicalMaterial06     82.22
## ManufacturingProcess17   77.27
## BiologicalMaterial03     76.21
## ManufacturingProcess36   70.77
## BiologicalMaterial02     68.79
## ManufacturingProcess09   67.86
## BiologicalMaterial12     63.36
## ManufacturingProcess06   55.15
## BiologicalMaterial04     54.31
## ManufacturingProcess33   49.26
## ManufacturingProcess31   47.73
## ManufacturingProcess11   45.72
## BiologicalMaterial11     42.44
## BiologicalMaterial08     41.89
## ManufacturingProcess29   41.28
## BiologicalMaterial01     41.19
## BiologicalMaterial09     39.70
## ManufacturingProcess02   36.69
plot(varImp(ridge))