Problem 6.1

Part a

library(caret)
data(tecator)

raw_data <- as_tibble(absorp)
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
## `.name_repair` is omitted as of tibble 2.0.0.
## ℹ Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
response_raw <- as_tibble(endpoints)

colnames(response_raw) <- c("water_pct", "fat_pct", "protein_pct")

m1 <- cbind(raw_data, response_raw)

Part b

set.seed(100)

ctrl <- trainControl(method="cv", number=10)

rlmPCA <- train(raw_data, response_raw$water_pct,
      method="rlm",
      preProcess = "pca",
      trControl = ctrl)

rlmPCA
## Robust Linear Model 
## 
## 215 samples
## 100 predictors
## 
## Pre-processing: principal component signal extraction (100), centered
##  (100), scaled (100) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 193, 194, 194, 193, 193, 194, ... 
## Resampling results across tuning parameters:
## 
##   intercept  psi           RMSE       Rsquared   MAE      
##   FALSE      psi.huber     63.798603  0.2875412  63.232121
##   FALSE      psi.hampel    63.798603  0.2875412  63.232121
##   FALSE      psi.bisquare  63.799249  0.2875042  63.232623
##    TRUE      psi.huber      8.562152  0.2892638   6.707973
##    TRUE      psi.hampel     8.505529  0.2880885   6.859076
##    TRUE      psi.bisquare   8.601691  0.2888414   6.657618
## 
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were intercept = TRUE and psi = psi.hampel.

Part c

set.seed(1234)


sample_set <- sample(nrow(m1), round(nrow(m1)*.70), replace=FALSE)
train_set <- m1[sample_set,]
test_set <- m1[-sample_set,]

train_predictors <- train_set %>% dplyr::select(-c("water_pct", "fat_pct","protein_pct"))

train_water_pct <- train_set %>% select(-c("fat_pct","protein_pct"))
train_fat_pct <- train_set %>% select(-c("water_pct","protein_pct"))
train_protein_pct <- train_set %>% select(-c("water_pct","fat_pct"))

test_predictors <- test_set %>% select(-c("water_pct","fat_pct","protein_pct"))

##Ordinary Least Squares

ols_water_pct <- lm(water_pct ~ ., data=train_water_pct)
ols_fat_pct <- lm(fat_pct ~ ., data=train_fat_pct)
ols_protein_pct <- lm(protein_pct ~ ., data=train_protein_pct)

summary(ols_water_pct)
## 
## Call:
## lm(formula = water_pct ~ ., data = train_water_pct)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.51720 -0.22950 -0.01484  0.28057  2.01623 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     72.94       2.27  32.133  < 2e-16 ***
## V1          -14126.01    4306.20  -3.280 0.001913 ** 
## V2           21572.27    7525.32   2.867 0.006099 ** 
## V3           -4114.54   10607.69  -0.388 0.699782    
## V4           -6391.76   19679.57  -0.325 0.746721    
## V5             682.13   26125.90   0.026 0.979276    
## V6           15662.76   23914.18   0.655 0.515559    
## V7          -10351.01   15373.97  -0.673 0.503931    
## V8            5590.94    8800.91   0.635 0.528209    
## V9           -7763.39    7411.57  -1.047 0.300023    
## V10         -12825.78   10462.96  -1.226 0.226123    
## V11          32480.82   13299.03   2.442 0.018246 *  
## V12         -72940.72   25580.17  -2.851 0.006353 ** 
## V13          94376.20   34092.12   2.768 0.007933 ** 
## V14         -66687.32   28814.71  -2.314 0.024883 *  
## V15          36885.99   18815.44   1.960 0.055646 .  
## V16          -7797.56   10756.79  -0.725 0.471963    
## V17           2807.19    7442.55   0.377 0.707667    
## V18         -12947.80    8226.56  -1.574 0.121945    
## V19         -14817.91   10997.27  -1.347 0.184044    
## V20          28589.74   20483.92   1.396 0.169093    
## V21          12146.59   28920.10   0.420 0.676318    
## V22         -37157.84   29882.39  -1.243 0.219611    
## V23          18381.06   25479.52   0.721 0.474088    
## V24           6963.18   19225.21   0.362 0.718767    
## V25         -11247.72   11993.03  -0.938 0.352919    
## V26          -2405.94    7869.23  -0.306 0.761097    
## V27          13852.13    8530.37   1.624 0.110822    
## V28         -41186.99   11839.55  -3.479 0.001067 ** 
## V29          72554.84   18334.39   3.957 0.000244 ***
## V30         -67604.99   24895.58  -2.716 0.009114 ** 
## V31          36495.98   29392.14   1.242 0.220261    
## V32          -7514.10   27987.02  -0.268 0.789453    
## V33          -1469.09   17930.99  -0.082 0.935036    
## V34          -3508.86   12582.67  -0.279 0.781522    
## V35          17777.12   10342.76   1.719 0.091964 .  
## V36         -13522.36    7286.12  -1.856 0.069486 .  
## V37          -2896.28    8338.25  -0.347 0.729817    
## V38          11459.25    9442.44   1.214 0.230722    
## V39         -22388.31   13511.56  -1.657 0.103915    
## V40          12555.83   18862.85   0.666 0.508764    
## V41          -6981.68   21704.78  -0.322 0.749074    
## V42          21110.83   23597.21   0.895 0.375357    
## V43         -18956.71   21689.00  -0.874 0.386369    
## V44          13751.41   15802.81   0.870 0.388441    
## V45         -18255.45   15032.32  -1.214 0.230410    
## V46          13173.88   10068.43   1.308 0.196831    
## V47          -3061.50    5178.83  -0.591 0.557133    
## V48          -5193.77    5826.80  -0.891 0.377092    
## V49          16275.24    9993.91   1.629 0.109829    
## V50          -4334.99   12793.35  -0.339 0.736172    
## V51         -36831.85   16532.44  -2.228 0.030513 *  
## V52          56945.79   22156.71   2.570 0.013257 *  
## V53         -30871.04   23727.89  -1.301 0.199328    
## V54          -6543.87   18961.65  -0.345 0.731488    
## V55          16300.51   13878.11   1.175 0.245852    
## V56          -2824.37    9374.04  -0.301 0.764463    
## V57          -3702.74    7004.87  -0.529 0.599472    
## V58          -2866.39    6069.43  -0.472 0.638833    
## V59          11778.33    5689.27   2.070 0.043718 *  
## V60         -15810.01    5210.30  -3.034 0.003850 ** 
## V61           8629.20    4353.34   1.982 0.053079 .  
## V62           1504.13    4928.01   0.305 0.761491    
## V63          -5745.43    6759.15  -0.850 0.399447    
## V64           1663.66    9607.84   0.173 0.863242    
## V65           6286.71   12518.61   0.502 0.617784    
## V66           -672.06   13378.17  -0.050 0.960139    
## V67         -20915.61   14372.67  -1.455 0.151981    
## V68          37027.59   16285.47   2.274 0.027405 *  
## V69         -27061.09   13518.13  -2.002 0.050854 .  
## V70          10812.22   10278.85   1.052 0.298010    
## V71          -3463.24    8482.15  -0.408 0.684835    
## V72           5759.88    7794.93   0.739 0.463477    
## V73         -10245.38    6946.98  -1.475 0.146664    
## V74          -3783.16    7446.90  -0.508 0.613720    
## V75          -5813.55    7271.80  -0.799 0.427880    
## V76          16067.42    7539.75   2.131 0.038129 *  
## V77         -12385.59    6385.10  -1.940 0.058176 .  
## V78          15584.37    7304.34   2.134 0.037909 *  
## V79          -3850.68    8698.88  -0.443 0.659957    
## V80          13336.82   10926.82   1.221 0.228095    
## V81         -13935.10   12329.02  -1.130 0.263866    
## V82           8355.12   12584.25   0.664 0.509845    
## V83          -2345.20   15233.42  -0.154 0.878281    
## V84           -990.95   17915.65  -0.055 0.956115    
## V85         -16179.00   19006.17  -0.851 0.398772    
## V86            766.28   20323.69   0.038 0.970077    
## V87           6334.61   21107.76   0.300 0.765364    
## V88           8291.55   19041.66   0.435 0.665153    
## V89         -13748.48   15431.32  -0.891 0.377312    
## V90          23472.07   17705.09   1.326 0.191079    
## V91         -30523.04   19298.19  -1.582 0.120163    
## V92           9211.59   18082.11   0.509 0.612737    
## V93           7626.65   15785.55   0.483 0.631148    
## V94          -4160.73   15535.91  -0.268 0.789967    
## V95          -6427.40   12329.87  -0.521 0.604515    
## V96          23107.84   11580.56   1.995 0.051575 .  
## V97         -18888.23   11405.05  -1.656 0.104087    
## V98          20843.81    9418.31   2.213 0.031578 *  
## V99         -19535.74    9671.97  -2.020 0.048887 *  
## V100          3715.75    4326.44   0.859 0.394607    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.027 on 49 degrees of freedom
## Multiple R-squared:  0.9965, Adjusted R-squared:  0.9894 
## F-statistic: 140.7 on 100 and 49 DF,  p-value: < 2.2e-16
summary(ols_fat_pct)
## 
## Call:
## lm(formula = fat_pct ~ ., data = train_fat_pct)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.13045 -0.33450 -0.00974  0.34267  2.13637 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  4.834e+00  2.841e+00   1.702  0.09516 . 
## V1           1.554e+04  5.389e+03   2.883  0.00583 **
## V2          -2.578e+04  9.418e+03  -2.738  0.00859 **
## V3           7.380e+03  1.327e+04   0.556  0.58076   
## V4           1.363e+04  2.463e+04   0.553  0.58253   
## V5          -1.136e+04  3.270e+04  -0.348  0.72968   
## V6          -1.452e+04  2.993e+04  -0.485  0.62968   
## V7           8.632e+03  1.924e+04   0.449  0.65565   
## V8          -8.607e+02  1.101e+04  -0.078  0.93803   
## V9           7.089e+03  9.275e+03   0.764  0.44836   
## V10          7.855e+03  1.309e+04   0.600  0.55136   
## V11         -2.830e+04  1.664e+04  -1.701  0.09537 . 
## V12          7.965e+04  3.201e+04   2.488  0.01629 * 
## V13         -1.050e+05  4.266e+04  -2.462  0.01739 * 
## V14          7.228e+04  3.606e+04   2.004  0.05056 . 
## V15         -4.002e+04  2.355e+04  -1.700  0.09551 . 
## V16          1.158e+04  1.346e+04   0.860  0.39373   
## V17         -3.375e+03  9.314e+03  -0.362  0.71861   
## V18          1.029e+04  1.030e+04   0.999  0.32268   
## V19          1.848e+04  1.376e+04   1.342  0.18564   
## V20         -3.254e+04  2.563e+04  -1.270  0.21025   
## V21         -1.519e+04  3.619e+04  -0.420  0.67655   
## V22          4.662e+04  3.740e+04   1.247  0.21847   
## V23         -2.087e+04  3.189e+04  -0.654  0.51588   
## V24         -1.470e+04  2.406e+04  -0.611  0.54408   
## V25          2.174e+04  1.501e+04   1.448  0.15387   
## V26         -1.458e+03  9.848e+03  -0.148  0.88291   
## V27         -1.914e+04  1.068e+04  -1.793  0.07914 . 
## V28          4.610e+04  1.482e+04   3.112  0.00310 **
## V29         -7.955e+04  2.294e+04  -3.467  0.00111 **
## V30          8.584e+04  3.116e+04   2.755  0.00821 **
## V31         -5.645e+04  3.678e+04  -1.535  0.13128   
## V32          1.777e+04  3.502e+04   0.507  0.61424   
## V33         -2.888e+03  2.244e+04  -0.129  0.89813   
## V34          9.301e+03  1.575e+04   0.591  0.55744   
## V35         -2.385e+04  1.294e+04  -1.842  0.07147 . 
## V36          1.322e+04  9.118e+03   1.450  0.15343   
## V37          6.596e+03  1.043e+04   0.632  0.53025   
## V38         -1.326e+04  1.182e+04  -1.122  0.26744   
## V39          2.280e+04  1.691e+04   1.349  0.18368   
## V40         -1.076e+04  2.361e+04  -0.456  0.65054   
## V41          5.653e+03  2.716e+04   0.208  0.83601   
## V42         -2.446e+04  2.953e+04  -0.828  0.41147   
## V43          2.353e+04  2.714e+04   0.867  0.39023   
## V44         -1.835e+04  1.978e+04  -0.928  0.35796   
## V45          2.565e+04  1.881e+04   1.364  0.17895   
## V46         -1.936e+04  1.260e+04  -1.536  0.13094   
## V47          7.214e+03  6.481e+03   1.113  0.27109   
## V48          8.194e+02  7.292e+03   0.112  0.91099   
## V49         -1.108e+04  1.251e+04  -0.886  0.38010   
## V50         -5.044e+02  1.601e+04  -0.032  0.97499   
## V51          3.982e+04  2.069e+04   1.925  0.06009 . 
## V52         -6.320e+04  2.773e+04  -2.279  0.02704 * 
## V53          3.837e+04  2.969e+04   1.292  0.20231   
## V54          5.108e+03  2.373e+04   0.215  0.83045   
## V55         -1.769e+04  1.737e+04  -1.019  0.31329   
## V56          2.556e+03  1.173e+04   0.218  0.82841   
## V57          3.737e+03  8.766e+03   0.426  0.67173   
## V58          2.298e+03  7.596e+03   0.303  0.76352   
## V59         -1.072e+04  7.120e+03  -1.506  0.13840   
## V60          1.852e+04  6.520e+03   2.840  0.00655 **
## V61         -1.330e+04  5.448e+03  -2.442  0.01825 * 
## V62          4.939e+03  6.167e+03   0.801  0.42710   
## V63          4.282e+03  8.459e+03   0.506  0.61493   
## V64         -7.284e+03  1.202e+04  -0.606  0.54744   
## V65         -5.329e+03  1.567e+04  -0.340  0.73518   
## V66          7.143e+03  1.674e+04   0.427  0.67149   
## V67          8.568e+03  1.799e+04   0.476  0.63594   
## V68         -2.201e+04  2.038e+04  -1.080  0.28544   
## V69          1.292e+04  1.692e+04   0.764  0.44870   
## V70          7.238e+02  1.286e+04   0.056  0.95536   
## V71         -3.565e+03  1.061e+04  -0.336  0.73840   
## V72         -4.926e+03  9.755e+03  -0.505  0.61582   
## V73          1.217e+04  8.694e+03   1.399  0.16799   
## V74          1.556e+03  9.319e+03   0.167  0.86812   
## V75          4.538e+03  9.100e+03   0.499  0.62027   
## V76         -1.418e+04  9.436e+03  -1.503  0.13932   
## V77          1.258e+04  7.991e+03   1.575  0.12177   
## V78         -1.495e+04  9.141e+03  -1.635  0.10840   
## V79          4.590e+03  1.089e+04   0.422  0.67512   
## V80         -1.035e+04  1.367e+04  -0.757  0.45288   
## V81          1.073e+04  1.543e+04   0.696  0.48988   
## V82         -4.535e+03  1.575e+04  -0.288  0.77461   
## V83         -3.900e+02  1.906e+04  -0.020  0.98376   
## V84         -5.459e+03  2.242e+04  -0.243  0.80865   
## V85          2.733e+04  2.379e+04   1.149  0.25610   
## V86         -4.775e+03  2.543e+04  -0.188  0.85186   
## V87         -1.930e+04  2.642e+04  -0.730  0.46859   
## V88          1.001e+04  2.383e+04   0.420  0.67627   
## V89          7.872e+03  1.931e+04   0.408  0.68533   
## V90         -2.882e+04  2.216e+04  -1.301  0.19948   
## V91          2.727e+04  2.415e+04   1.129  0.26441   
## V92          2.244e+03  2.263e+04   0.099  0.92142   
## V93         -1.642e+04  1.975e+04  -0.831  0.40979   
## V94          1.592e+04  1.944e+04   0.819  0.41684   
## V95         -6.812e+03  1.543e+04  -0.442  0.66079   
## V96         -1.147e+04  1.449e+04  -0.791  0.43269   
## V97          1.667e+04  1.427e+04   1.168  0.24841   
## V98         -2.824e+04  1.179e+04  -2.396  0.02046 * 
## V99          2.222e+04  1.210e+04   1.836  0.07245 . 
## V100        -2.550e+03  5.414e+03  -0.471  0.63981   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.286 on 49 degrees of freedom
## Multiple R-squared:  0.9968, Adjusted R-squared:  0.9901 
## F-statistic: 150.4 on 100 and 49 DF,  p-value: < 2.2e-16
summary(ols_protein_pct)
## 
## Call:
## lm(formula = protein_pct ~ ., data = train_protein_pct)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.41374 -0.12675 -0.02425  0.08987  0.50477 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.033e+01  7.245e-01  28.065  < 2e-16 ***
## V1          -4.602e+03  1.374e+03  -3.349 0.001568 ** 
## V2           7.438e+03  2.402e+03   3.097 0.003232 ** 
## V3           1.609e+03  3.385e+03   0.475 0.636707    
## V4          -1.364e+04  6.281e+03  -2.171 0.034779 *  
## V5           1.600e+04  8.338e+03   1.919 0.060824 .  
## V6          -5.690e+03  7.632e+03  -0.746 0.459505    
## V7           3.210e+02  4.907e+03   0.065 0.948109    
## V8          -2.307e+03  2.809e+03  -0.821 0.415531    
## V9          -2.588e+03  2.365e+03  -1.094 0.279336    
## V10          7.192e+03  3.339e+03   2.154 0.036216 *  
## V11         -3.461e+02  4.244e+03  -0.082 0.935340    
## V12         -1.986e+04  8.164e+03  -2.433 0.018663 *  
## V13          3.565e+04  1.088e+04   3.276 0.001935 ** 
## V14         -2.849e+04  9.196e+03  -3.098 0.003218 ** 
## V15          1.716e+04  6.005e+03   2.858 0.006249 ** 
## V16         -9.428e+03  3.433e+03  -2.746 0.008408 ** 
## V17          2.486e+03  2.375e+03   1.047 0.300330    
## V18         -3.517e+03  2.626e+03  -1.340 0.186567    
## V19         -5.032e+03  3.510e+03  -1.434 0.157982    
## V20          9.382e+03  6.538e+03   1.435 0.157590    
## V21          3.569e+03  9.230e+03   0.387 0.700635    
## V22         -1.126e+04  9.537e+03  -1.181 0.243495    
## V23          6.673e+03  8.132e+03   0.821 0.415831    
## V24          6.186e+02  6.136e+03   0.101 0.920111    
## V25         -3.107e+03  3.828e+03  -0.812 0.420881    
## V26          7.881e+02  2.511e+03   0.314 0.755001    
## V27          2.637e+03  2.723e+03   0.968 0.337557    
## V28         -1.102e+04  3.779e+03  -2.916 0.005338 ** 
## V29          2.418e+04  5.851e+03   4.132 0.000140 ***
## V30         -2.385e+04  7.946e+03  -3.001 0.004219 ** 
## V31          1.438e+04  9.381e+03   1.533 0.131676    
## V32         -6.165e+03  8.932e+03  -0.690 0.493328    
## V33          3.257e+03  5.723e+03   0.569 0.571872    
## V34         -5.210e+03  4.016e+03  -1.297 0.200585    
## V35          5.140e+03  3.301e+03   1.557 0.125893    
## V36         -8.485e+02  2.325e+03  -0.365 0.716769    
## V37         -4.649e+03  2.661e+03  -1.747 0.086899 .  
## V38          3.616e+03  3.014e+03   1.200 0.235890    
## V39          2.401e+03  4.312e+03   0.557 0.580240    
## V40         -7.018e+03  6.020e+03  -1.166 0.249353    
## V41          5.901e+02  6.927e+03   0.085 0.932458    
## V42          8.864e+03  7.531e+03   1.177 0.244880    
## V43         -1.474e+03  6.922e+03  -0.213 0.832301    
## V44         -1.031e+04  5.044e+03  -2.044 0.046316 *  
## V45          8.201e+03  4.798e+03   1.709 0.093711 .  
## V46         -1.481e+02  3.213e+03  -0.046 0.963437    
## V47         -2.224e+03  1.653e+03  -1.346 0.184637    
## V48         -2.453e+03  1.860e+03  -1.319 0.193316    
## V49          5.768e+03  3.190e+03   1.808 0.076669 .  
## V50         -4.037e+01  4.083e+03  -0.010 0.992150    
## V51         -1.080e+04  5.276e+03  -2.047 0.046049 *  
## V52          1.794e+04  7.071e+03   2.537 0.014406 *  
## V53         -1.516e+04  7.573e+03  -2.002 0.050882 .  
## V54          5.612e+03  6.052e+03   0.927 0.358275    
## V55          3.247e+02  4.429e+03   0.073 0.941867    
## V56         -1.860e+02  2.992e+03  -0.062 0.950689    
## V57         -1.015e+02  2.236e+03  -0.045 0.963990    
## V58         -1.162e+03  1.937e+03  -0.600 0.551439    
## V59          1.707e+03  1.816e+03   0.940 0.351754    
## V60         -4.068e+03  1.663e+03  -2.446 0.018073 *  
## V61          5.058e+03  1.389e+03   3.640 0.000655 ***
## V62         -1.535e+03  1.573e+03  -0.976 0.333724    
## V63         -5.797e+03  2.157e+03  -2.687 0.009816 ** 
## V64          1.291e+04  3.066e+03   4.209 0.000109 ***
## V65         -8.561e+03  3.995e+03  -2.143 0.037125 *  
## V66          2.099e+03  4.270e+03   0.492 0.625211    
## V67         -5.157e+03  4.587e+03  -1.124 0.266430    
## V68          1.254e+04  5.198e+03   2.413 0.019627 *  
## V69         -1.094e+04  4.314e+03  -2.536 0.014437 *  
## V70          2.699e+03  3.281e+03   0.823 0.414714    
## V71          4.588e+02  2.707e+03   0.169 0.866119    
## V72         -1.382e+03  2.488e+03  -0.555 0.581082    
## V73         -1.555e+03  2.217e+03  -0.701 0.486524    
## V74          6.233e+03  2.377e+03   2.623 0.011592 *  
## V75         -8.237e+03  2.321e+03  -3.549 0.000864 ***
## V76          1.871e+03  2.406e+03   0.778 0.440485    
## V77          2.678e+03  2.038e+03   1.314 0.194976    
## V78          1.400e+03  2.331e+03   0.601 0.550813    
## V79         -1.534e+03  2.776e+03  -0.553 0.583048    
## V80          2.944e+03  3.487e+03   0.844 0.402585    
## V81         -2.657e+03  3.935e+03  -0.675 0.502773    
## V82          1.507e+03  4.016e+03   0.375 0.709149    
## V83         -1.177e+04  4.862e+03  -2.421 0.019219 *  
## V84          2.250e+04  5.718e+03   3.936 0.000262 ***
## V85         -2.002e+04  6.066e+03  -3.300 0.001808 ** 
## V86          7.967e+03  6.486e+03   1.228 0.225238    
## V87         -8.966e+02  6.737e+03  -0.133 0.894666    
## V88          4.248e+02  6.077e+03   0.070 0.944555    
## V89         -5.318e+03  4.925e+03  -1.080 0.285543    
## V90          1.362e+04  5.651e+03   2.411 0.019713 *  
## V91         -7.887e+03  6.159e+03  -1.280 0.206410    
## V92         -3.151e+03  5.771e+03  -0.546 0.587510    
## V93          1.310e+03  5.038e+03   0.260 0.795898    
## V94          2.882e+03  4.958e+03   0.581 0.563686    
## V95         -9.439e+02  3.935e+03  -0.240 0.811427    
## V96         -2.727e+03  3.696e+03  -0.738 0.464106    
## V97          1.620e+03  3.640e+03   0.445 0.658288    
## V98          6.960e+03  3.006e+03   2.315 0.024820 *  
## V99         -9.354e+03  3.087e+03  -3.030 0.003895 ** 
## V100         2.983e+03  1.381e+03   2.160 0.035663 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3279 on 49 degrees of freedom
## Multiple R-squared:  0.9962, Adjusted R-squared:  0.9885 
## F-statistic: 129.3 on 100 and 49 DF,  p-value: < 2.2e-16
predict_water_pct <- predict(ols_water_pct,test_predictors)
predict_fat_pct <- predict(ols_fat_pct,test_predictors)
predict_protein_pct <- predict(ols_protein_pct,test_predictors)

water_pct_df <- data.frame(pred=predict_water_pct, obs=test_set$water_pct)
fat_pct_df <- data.frame(pred=predict_fat_pct, obs=test_set$fat_pct)
protein_pct_df <- data.frame(pred=predict_protein_pct, obs=test_set$protein_pct)

defaultSummary(water_pct_df)
##      RMSE  Rsquared       MAE 
## 5.5817925 0.8038929 2.4307695
defaultSummary(fat_pct_df)
##      RMSE  Rsquared       MAE 
## 5.8725434 0.8397015 2.8190720
defaultSummary(protein_pct_df)
##      RMSE  Rsquared       MAE 
## 2.5283355 0.6393083 1.2180471
##Robust Least Regression

rlm_water_pct <- rlm(water_pct ~ ., data=train_water_pct)
## Warning in rlm.default(x, y, weights, method = method, wt.method = wt.method, :
## 'rlm' failed to converge in 20 steps
rlm_fat_pct <- rlm(fat_pct ~ ., data=train_fat_pct)
## Warning in rlm.default(x, y, weights, method = method, wt.method = wt.method, :
## 'rlm' failed to converge in 20 steps
rlm_protein_pct <- rlm(protein_pct ~ ., data=train_protein_pct)
## Warning in rlm.default(x, y, weights, method = method, wt.method = wt.method, :
## 'rlm' failed to converge in 20 steps
summary(rlm_water_pct)
## 
## Call: rlm(formula = water_pct ~ ., data = train_water_pct)
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -3.774939 -0.079509 -0.001079  0.086589  6.123185 
## 
## Coefficients:
##             Value       Std. Error  t value    
## (Intercept)     75.6648      0.7140    105.9689
## V1          -16651.5107   1354.5261    -12.2932
## V2           19756.9026   2367.1049      8.3464
## V3            -726.7800   3336.6709     -0.2178
## V4            4904.1431   6190.2499      0.7922
## V5          -12431.4383   8217.9575     -1.5127
## V6           20758.9998   7522.2560      2.7597
## V7           -3690.2476   4835.9137     -0.7631
## V8           -2134.6793   2768.3455     -0.7711
## V9          -16673.2031   2331.3251     -7.1518
## V10         -11454.8300   3291.1455     -3.4805
## V11          42967.3078   4183.2375     10.2713
## V12         -88001.1266   8046.2961    -10.9368
## V13         107145.8752  10723.7506      9.9915
## V14         -59972.1323   9063.7284     -6.6167
## V15          25594.6041   5918.4384      4.3246
## V16          -3845.7567   3383.5721     -1.1366
## V17            729.4902   2341.0689      0.3116
## V18          -9512.3917   2587.6804     -3.6760
## V19         -24528.2533   3459.2137     -7.0907
## V20          35082.0208   6443.2621      5.4448
## V21          22744.1395   9096.8787      2.5002
## V22         -58520.6402   9399.5685     -6.2259
## V23          36849.2967   8014.6372      4.5977
## V24            353.4321   6047.3315      0.0584
## V25         -14509.8651   3772.4321     -3.8463
## V26          -1884.7257   2475.2827     -0.7614
## V27          18608.3643   2683.2474      6.9350
## V28         -41468.0331   3724.1545    -11.1349
## V29          67040.7343   5767.1212     11.6246
## V30         -57496.9019   7830.9575     -7.3423
## V31          25448.6859   9245.3612      2.7526
## V32          -6115.0199   8803.3754     -0.6946
## V33           7509.9638   5640.2307      1.3315
## V34         -10184.6700   3957.9050     -2.5732
## V35          15082.1291   3253.3363      4.6359
## V36         -12130.1950   2291.8645     -5.2927
## V37          -1071.2293   2622.8134     -0.4084
## V38          13652.1068   2970.1397      4.5965
## V39         -30746.2341   4250.0906     -7.2343
## V40          24637.4638   5933.3502      4.1524
## V41         -12273.1120   6827.2853     -1.7977
## V42          20017.0059   7422.5527      2.6968
## V43         -23416.7860   6822.3226     -3.4324
## V44          25951.7895   4970.8071      5.2208
## V45         -29706.1059   4728.4470     -6.2824
## V46          15626.1195   3167.0471      4.9340
## V47           -676.3698   1629.0133     -0.4152
## V48          -4362.0040   1832.8316     -2.3799
## V49          16733.9165   3143.6071      5.3232
## V50         -13288.9255   4024.1753     -3.3023
## V51         -23401.9816   5200.3155     -4.5001
## V52          50249.3785   6969.4389      7.2100
## V53         -30881.5939   7463.6590     -4.1376
## V54         -11813.8473   5964.4263     -1.9807
## V55          28093.8811   4365.3879      6.4356
## V56         -10756.7072   2948.6233     -3.6480
## V57          -2562.2507   2203.3976     -1.1629
## V58           -629.0949   1909.1529     -0.3295
## V59           5963.1517   1789.5718      3.3322
## V60          -8619.6660   1638.9094     -5.2594
## V61           3905.7132   1369.3534      2.8522
## V62           2421.5447   1550.1155      1.5622
## V63           2856.0685   2126.1034      1.3433
## V64         -11180.5338   3022.1654     -3.6995
## V65          13796.1819   3937.7543      3.5036
## V66          -8032.5367   4208.1306     -1.9088
## V67         -10653.3489   4520.9534     -2.3564
## V68          25665.8333   5122.6286      5.0103
## V69         -20031.0515   4252.1558     -4.7108
## V70           7389.0990   3233.2330      2.2854
## V71            129.7586   2668.0797      0.0486
## V72            807.4423   2451.9102      0.3293
## V73          -3223.3206   2185.1885     -1.4751
## V74         -12915.3837   2342.4388     -5.5136
## V75           3920.3769   2287.3608      1.7139
## V76          16147.6912   2371.6436      6.8086
## V77         -12840.5349   2008.4477     -6.3933
## V78          10918.3146   2297.5941      4.7521
## V79            966.8102   2736.2514      0.3533
## V80           6950.4730   3437.0558      2.0222
## V81         -16038.6069   3878.1207     -4.1357
## V82          13602.9420   3958.4040      3.4365
## V83          -1430.9818   4791.7061     -0.2986
## V84           3034.1630   5635.4056      0.5384
## V85         -34168.4026   5978.4310     -5.7153
## V86          19795.1822   6392.8584      3.0965
## V87           2893.4291   6639.4922      0.4358
## V88          -1680.0729   5989.5961     -0.2805
## V89          -3918.0770   4853.9541     -0.8072
## V90          27421.4799   5569.1745      4.9238
## V91         -48216.9897   6070.2866     -7.9431
## V92          24232.9683   5687.7668      4.2605
## V93           4757.1261   4965.3790      0.9581
## V94          -3029.1162   4886.8525     -0.6199
## V95         -17244.1026   3878.3865     -4.4462
## V96          35567.9132   3642.6885      9.7642
## V97         -22523.9209   3587.4830     -6.2785
## V98          16070.5547   2962.5490      5.4246
## V99         -10297.0300   3042.3401     -3.3846
## V100         -1194.0365   1360.8910     -0.8774
## 
## Residual standard error: 0.1295 on 49 degrees of freedom
summary(rlm_fat_pct)
## 
## Call: rlm(formula = fat_pct ~ ., data = train_fat_pct)
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -5.488239 -0.098107  0.003275  0.085600  3.900130 
## 
## Coefficients:
##             Value       Std. Error  t value    
## (Intercept)      2.7564      0.7938      3.4723
## V1           15203.6026   1505.9377     10.0958
## V2          -20735.7829   2631.7045     -7.8792
## V3            6728.7757   3709.6504      1.8139
## V4          -13276.6783   6882.2081     -1.9291
## V5           30788.4999   9136.5767      3.3698
## V6          -42734.4216   8363.1083     -5.1099
## V7           13052.0725   5376.4815      2.4276
## V8            1457.8086   3077.7965      0.4737
## V9           19554.5409   2591.9252      7.5444
## V10            899.0792   3659.0361      0.2457
## V11         -35881.7235   4650.8479     -7.7151
## V12          80985.6648   8945.7267      9.0530
## V13         -85437.9752  11922.4723     -7.1661
## V14          36065.9392  10076.8895      3.5791
## V15         -10001.0450   6580.0130     -1.5199
## V16          -1833.2999   3761.7944     -0.4873
## V17            963.5837   2602.7581      0.3702
## V18           6820.9910   2876.9364      2.3709
## V19          28210.6242   3845.8913      7.3353
## V20         -55889.6336   7163.5024     -7.8020
## V21          10163.3575  10113.7454      1.0049
## V22          36869.0143  10450.2705      3.5280
## V23         -18419.2292   8910.5288     -2.0671
## V24         -26544.5633   6723.3139     -3.9481
## V25          34845.6683   4194.1219      8.3082
## V26          -3872.4570   2751.9746     -1.4072
## V27         -15056.1707   2983.1859     -5.0470
## V28          32484.3155   4140.4477      7.8456
## V29         -65592.0433   6411.7812    -10.2299
## V30          77470.8564   8706.3171      8.8982
## V31         -63380.0748  10278.8256     -6.1661
## V32          43129.3115   9787.4338      4.4066
## V33         -26615.1388   6270.7066     -4.2444
## V34          15818.6065   4400.3273      3.5949
## V35         -18631.7301   3617.0006     -5.1512
## V36          15630.2568   2548.0536      6.1342
## V37          -6808.5657   2915.9966     -2.3349
## V38          -6496.4275   3302.1477     -1.9673
## V39          27915.9003   4725.1740      5.9079
## V40         -17197.0519   6596.5915     -2.6070
## V41             19.5110   7590.4525      0.0026
## V42          -7103.8709   8252.2600     -0.8608
## V43          12969.0095   7584.9350      1.7098
## V44         -24986.0932   5526.4536     -4.5212
## V45          37176.7876   5257.0020      7.0719
## V46         -20336.5264   3521.0658     -5.7757
## V47            827.0865   1811.1076      0.4567
## V48           3419.6397   2037.7091      1.6782
## V49          -8705.3660   3495.0056     -2.4908
## V50          -2142.2212   4474.0054     -0.4788
## V51          33706.4861   5781.6168      5.8299
## V52         -47837.2928   7748.4963     -6.1738
## V53          19056.8363   8297.9613      2.2966
## V54          25433.1891   6631.1415      3.8354
## V55         -35400.5797   4853.3595     -7.2940
## V56          10736.4548   3278.2261      3.2751
## V57           4478.0854   2449.6977      1.8280
## V58          -1063.9639   2122.5617     -0.5013
## V59          -6958.1297   1989.6136     -3.4972
## V60          15987.1013   1822.1099      8.7739
## V61         -12964.1987   1522.4224     -8.5155
## V62           3880.4975   1723.3904      2.2517
## V63            -54.1518   2363.7634     -0.0229
## V64           5094.9229   3359.9889      1.5164
## V65         -20817.7327   4377.9241     -4.7552
## V66          24897.5140   4678.5236      5.3217
## V67          -5261.3470   5026.3143     -1.0468
## V68         -11356.0222   5695.2459     -1.9939
## V69          10494.5444   4727.4700      2.2199
## V70          -4159.1651   3594.6500     -1.1570
## V71          -6530.9047   2966.3228     -2.2017
## V72           2753.2320   2725.9895      1.0100
## V73           5386.8897   2429.4531      2.2173
## V74          10296.0721   2604.2812      3.9535
## V75          -4862.4451   2543.0464     -1.9121
## V76         -14517.4743   2636.7505     -5.5058
## V77          12811.2136   2232.9559      5.7373
## V78          -6400.9430   2554.4236     -2.5058
## V79          -2992.1594   3042.1149     -0.9836
## V80          -3203.9074   3821.2566     -0.8384
## V81          11934.0844   4311.6246      2.7679
## V82         -12155.7105   4400.8821     -2.7621
## V83          -1511.3247   5327.3323     -0.2837
## V84          -1539.0641   6265.3422     -0.2456
## V85          29000.7190   6646.7116      4.3632
## V86          -3026.2618   7107.4645     -0.4258
## V87         -28669.0795   7381.6675     -3.8838
## V88          18057.3838   6659.1248      2.7117
## V89           4312.9308   5396.5384      0.7992
## V90         -27267.7762   6191.7076     -4.4039
## V91          27508.5439   6748.8350      4.0760
## V92           3363.2831   6323.5564      0.5319
## V93         -22382.7984   5520.4187     -4.0545
## V94          17700.6674   5433.1144      3.2579
## V95           4866.7088   4311.9201      1.1287
## V96         -21861.9836   4049.8752     -5.3982
## V97          17159.9242   3988.4988      4.3024
## V98         -26122.5240   3293.7084     -7.9310
## V99          16755.2238   3382.4187      4.9536
## V100          1458.1694   1513.0140      0.9638
## 
## Residual standard error: 0.145 on 49 degrees of freedom
summary(rlm_protein_pct)
## 
## Call: rlm(formula = protein_pct ~ ., data = train_protein_pct)
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.6527513 -0.0194684  0.0002333  0.0187142  0.9909981 
## 
## Coefficients:
##             Value       Std. Error  t value    
## (Intercept)     19.9637      0.1756    113.6592
## V1           -3572.2509    333.2018    -10.7210
## V2            6377.5864    582.2875     10.9526
## V3            4308.6788    820.7924      5.2494
## V4          -19430.6855   1522.7484    -12.7603
## V5           19499.4585   2021.5471      9.6458
## V6           -7424.4566   1850.4105     -4.0123
## V7             455.8161   1189.5933      0.3832
## V8            -609.9403    680.9893     -0.8957
## V9           -3961.2217    573.4860     -6.9073
## V10          10268.0924    809.5936     12.6830
## V11          -4043.6133   1029.0406     -3.9295
## V12         -15639.9764   1979.3199     -7.9017
## V13          29781.4118   2637.9508     11.2896
## V14         -21959.1050   2229.5995     -9.8489
## V15          13481.8603   1455.8851      9.2602
## V16          -9706.1418    832.3297    -11.6614
## V17           3830.8935    575.8829      6.6522
## V18          -3835.5255    636.5472     -6.0255
## V19          -7831.0838    850.9369     -9.2029
## V20          12170.1981   1584.9873      7.6784
## V21           3100.6753   2237.7542      1.3856
## V22         -11173.8829   2312.2133     -4.8325
## V23           5584.5095   1971.5321      2.8326
## V24           2531.6748   1487.5917      1.7019
## V25          -2628.5651    927.9860     -2.8325
## V26          -1725.4393    608.8983     -2.8337
## V27           3112.1588    660.0559      4.7150
## V28          -9890.3379    916.1101    -10.7960
## V29          23059.1968   1418.6624     16.2542
## V30         -22657.6384   1926.3484    -11.7620
## V31          15318.3099   2274.2796      6.7355
## V32          -7869.8492   2165.5549     -3.6341
## V33           2657.0031   1387.4484      1.9150
## V34          -3053.3140    973.6107     -3.1361
## V35           2569.6212    800.2929      3.2109
## V36           1471.1660    563.7790      2.6095
## V37          -6834.0072    645.1896    -10.5922
## V38           4828.2375    730.6289      6.6083
## V39           3409.0156   1045.4859      3.2607
## V40          -8020.5628   1459.5533     -5.4952
## V41          -2957.1634   1679.4537     -1.7608
## V42          16874.9460   1825.8843      9.2421
## V43          -8324.0999   1678.2329     -4.9600
## V44          -8264.5309   1222.7760     -6.7588
## V45           9190.5033   1163.1574      7.9013
## V46           -879.2156    779.0665     -1.1286
## V47          -2414.6639    400.7233     -6.0258
## V48          -2197.4363    450.8609     -4.8739
## V49           7126.0130    773.3004      9.2151
## V50          -3486.1909    989.9126     -3.5217
## V51          -7819.6328   1279.2330     -6.1128
## V52          16553.1819   1714.4222      9.6553
## V53         -14375.6972   1835.9962     -7.8299
## V54           5246.9341   1467.1978      3.5762
## V55              8.4113   1073.8480      0.0078
## V56            379.2141    725.3361      0.5228
## V57           -349.0652    542.0169     -0.6440
## V58          -1169.4001    469.6353     -2.4900
## V59           1708.6936    440.2193      3.8815
## V60          -4149.8248    403.1577    -10.2933
## V61           4977.9459    336.8492     14.7780
## V62          -1190.2825    381.3151     -3.1215
## V63          -6131.3943    523.0032    -11.7234
## V64          12931.0782    743.4268     17.3939
## V65          -6409.6773    968.6538     -6.6171
## V66          -1073.8322   1035.1641     -1.0374
## V67          -4361.0838   1112.1158     -3.9214
## V68          14452.7167   1260.1227     11.4693
## V69         -14114.4518   1045.9939    -13.4938
## V70           4071.6629    795.3476      5.1194
## V71           1217.9592    656.3247      1.8557
## V72          -1891.0509    603.1489     -3.1353
## V73          -2753.8079    537.5376     -5.1230
## V74           8342.2325    576.2199     14.4775
## V75         -10054.3615    562.6712    -17.8690
## V76           1931.6477    583.4040      3.3110
## V77           5012.9223    494.0609     10.1464
## V78           -100.4332    565.1885     -0.1777
## V79          -1100.8705    673.0944     -1.6355
## V80           3150.6323    845.4863      3.7264
## V81          -3999.0107    953.9845     -4.1919
## V82           4258.3624    973.7335      4.3732
## V83         -14564.5429   1178.7186    -12.3563
## V84          23020.1281   1386.2615     16.6059
## V85         -18515.9322   1470.6428    -12.5904
## V86           6176.9896   1572.5884      3.9279
## V87          -3474.4340   1633.2582     -2.1273
## V88           7095.9956   1473.3893      4.8161
## V89         -10033.8669   1194.0311     -8.4034
## V90          12835.1409   1369.9692      9.3689
## V91          -5268.5839   1493.2385     -3.5283
## V92          -3855.2170   1399.1419     -2.7554
## V93            483.7073   1221.4407      0.3960
## V94           5094.5789   1202.1238      4.2380
## V95          -3363.8981    954.0498     -3.5259
## V96          -1242.5713    896.0701     -1.3867
## V97           2042.5253    882.4901      2.3145
## V98           6111.3313    728.7617      8.3859
## V99          -9401.6209    748.3896    -12.5625
## V100          3043.3156    334.7675      9.0908
## 
## Residual standard error: 0.0296 on 49 degrees of freedom
predict_water_pct <- predict(rlm_water_pct,test_predictors)
predict_fat_pct <- predict(rlm_fat_pct,test_predictors)
predict_protein_pct <- predict(rlm_protein_pct,test_predictors)

water_pct_df <- data.frame(pred=predict_water_pct, obs=test_set$water_pct)
fat_pct_df <- data.frame(pred=predict_fat_pct, obs=test_set$fat_pct)
protein_pct_df <- data.frame(pred=predict_protein_pct, obs=test_set$protein_pct)

defaultSummary(water_pct_df)
##      RMSE  Rsquared       MAE 
## 6.4078059 0.7595091 2.9757493
defaultSummary(fat_pct_df)
##      RMSE  Rsquared       MAE 
## 6.0493218 0.8249204 3.1560538
defaultSummary(protein_pct_df)
##     RMSE Rsquared      MAE 
## 2.589344 0.628458 1.352753
##Partial Least Squares

pls_water_pct <- plsr(water_pct ~ ., data=train_water_pct)
pls_fat_pct <- plsr(fat_pct ~ ., data=train_fat_pct)
pls_protein_pct <- plsr(protein_pct ~ ., data=train_protein_pct)

summary(pls_water_pct)
## Data:    X dimension: 150 100 
##  Y dimension: 150 1
## Fit method: kernelpls
## Number of components considered: 100
## TRAINING: % variance explained
##            1 comps  2 comps  3 comps  4 comps  5 comps  6 comps  7 comps
## X            98.58    98.97    99.77    99.99   100.00   100.00    100.0
## water_pct    17.55    78.57    84.45    89.68    93.07    93.63     93.9
##            8 comps  9 comps  10 comps  11 comps  12 comps  13 comps  14 comps
## X           100.00   100.00    100.00    100.00    100.00    100.00    100.00
## water_pct    94.27    95.37     95.55     95.61     95.95     96.46     97.26
##            15 comps  16 comps  17 comps  18 comps  19 comps  20 comps  21 comps
## X            100.00    100.00    100.00    100.00    100.00    100.00    100.00
## water_pct     97.51     97.95     98.22     98.41     98.55     98.67     98.73
##            22 comps  23 comps  24 comps  25 comps  26 comps  27 comps  28 comps
## X            100.00    100.00    100.00    100.00    100.00    100.00    100.00
## water_pct     98.83     98.94     99.01     99.07     99.11     99.14     99.19
##            29 comps  30 comps  31 comps  32 comps  33 comps  34 comps  35 comps
## X            100.00    100.00    100.00    100.00    100.00    100.00     100.0
## water_pct     99.23     99.27     99.31     99.33     99.35     99.37      99.4
##            36 comps  37 comps  38 comps  39 comps  40 comps  41 comps  42 comps
## X            100.00    100.00    100.00    100.00    100.00    100.00    100.00
## water_pct     99.42     99.44     99.46     99.49     99.51     99.52     99.54
##            43 comps  44 comps  45 comps  46 comps  47 comps  48 comps  49 comps
## X            100.00    100.00    100.00    100.00    100.00    100.00     100.0
## water_pct     99.55     99.56     99.57     99.58     99.59     99.59      99.6
##            50 comps  51 comps  52 comps  53 comps  54 comps  55 comps  56 comps
## X             100.0    100.00    100.00    100.00    100.00    100.00    100.00
## water_pct      99.6     99.61     99.61     99.62     99.62     99.63     99.63
##            57 comps  58 comps  59 comps  60 comps  61 comps  62 comps  63 comps
## X            100.00    100.00    100.00    100.00    100.00    100.00    100.00
## water_pct     99.63     99.64     99.64     99.64     99.64     99.64     99.65
##            64 comps  65 comps  66 comps  67 comps  68 comps  69 comps  70 comps
## X            100.00    100.00    100.00    100.00    100.00    100.00    100.00
## water_pct     99.65     99.65     99.65     99.65     99.65     99.65     99.65
##            71 comps  72 comps  73 comps  74 comps  75 comps  76 comps  77 comps
## X            100.00    100.00    100.00    100.00    100.00    100.00    100.00
## water_pct     99.65     99.65     99.65     99.65     99.65     99.65     99.65
##            78 comps  79 comps  80 comps  81 comps  82 comps  83 comps  84 comps
## X            100.00    100.00    100.00    100.00    100.00    100.00    100.00
## water_pct     99.65     99.65     99.65     99.65     99.65     99.65     99.65
##            85 comps  86 comps  87 comps  88 comps  89 comps  90 comps  91 comps
## X            100.00    100.00    100.00    100.00    100.00    100.00    100.00
## water_pct     99.65     99.65     99.65     99.65     99.65     99.65     99.65
##            92 comps  93 comps  94 comps  95 comps  96 comps  97 comps  98 comps
## X            100.00    100.00    100.00    100.00    100.00    100.00    100.00
## water_pct     99.65     99.65     99.65     99.65     99.65     99.65     99.65
##            99 comps  100 comps
## X            100.00     100.00
## water_pct     99.65      99.65
summary(pls_fat_pct)
## Data:    X dimension: 150 100 
##  Y dimension: 150 1
## Fit method: kernelpls
## Number of components considered: 100
## TRAINING: % variance explained
##          1 comps  2 comps  3 comps  4 comps  5 comps  6 comps  7 comps  8 comps
## X          98.58    98.97    99.76    99.99   100.00   100.00   100.00   100.00
## fat_pct    14.56    76.75    83.97    90.20    94.71    95.23    95.42    95.77
##          9 comps  10 comps  11 comps  12 comps  13 comps  14 comps  15 comps
## X         100.00    100.00    100.00    100.00    100.00    100.00    100.00
## fat_pct    95.93     96.26     96.36     97.43     97.66     97.84     97.95
##          16 comps  17 comps  18 comps  19 comps  20 comps  21 comps  22 comps
## X           100.0    100.00    100.00    100.00    100.00    100.00    100.00
## fat_pct      98.2     98.49     98.64     98.75     98.84     98.91     99.01
##          23 comps  24 comps  25 comps  26 comps  27 comps  28 comps  29 comps
## X          100.00    100.00     100.0    100.00    100.00     100.0    100.00
## fat_pct     99.09     99.17      99.2     99.24     99.27      99.3     99.34
##          30 comps  31 comps  32 comps  33 comps  34 comps  35 comps  36 comps
## X          100.00     100.0    100.00    100.00    100.00    100.00     100.0
## fat_pct     99.37      99.4     99.43     99.44     99.46     99.48      99.5
##          37 comps  38 comps  39 comps  40 comps  41 comps  42 comps  43 comps
## X          100.00    100.00    100.00    100.00    100.00    100.00     100.0
## fat_pct     99.52     99.53     99.55     99.56     99.57     99.59      99.6
##          44 comps  45 comps  46 comps  47 comps  48 comps  49 comps  50 comps
## X           100.0    100.00    100.00    100.00    100.00    100.00    100.00
## fat_pct      99.6     99.61     99.62     99.63     99.63     99.63     99.64
##          51 comps  52 comps  53 comps  54 comps  55 comps  56 comps  57 comps
## X          100.00    100.00    100.00    100.00    100.00    100.00    100.00
## fat_pct     99.64     99.64     99.65     99.65     99.65     99.66     99.66
##          58 comps  59 comps  60 comps  61 comps  62 comps  63 comps  64 comps
## X          100.00    100.00    100.00    100.00    100.00    100.00    100.00
## fat_pct     99.66     99.66     99.67     99.67     99.67     99.67     99.67
##          65 comps  66 comps  67 comps  68 comps  69 comps  70 comps  71 comps
## X          100.00    100.00    100.00    100.00    100.00    100.00    100.00
## fat_pct     99.67     99.67     99.67     99.67     99.67     99.67     99.67
##          72 comps  73 comps  74 comps  75 comps  76 comps  77 comps  78 comps
## X          100.00    100.00    100.00    100.00    100.00    100.00    100.00
## fat_pct     99.67     99.68     99.68     99.68     99.68     99.68     99.68
##          79 comps  80 comps  81 comps  82 comps  83 comps  84 comps  85 comps
## X          100.00    100.00    100.00    100.00    100.00    100.00    100.00
## fat_pct     99.68     99.68     99.68     99.68     99.68     99.68     99.68
##          86 comps  87 comps  88 comps  89 comps  90 comps  91 comps  92 comps
## X          100.00    100.00    100.00    100.00    100.00    100.00    100.00
## fat_pct     99.68     99.68     99.68     99.68     99.68     99.68     99.68
##          93 comps  94 comps  95 comps  96 comps  97 comps  98 comps  99 comps
## X          100.00    100.00    100.00    100.00    100.00    100.00    100.00
## fat_pct     99.68     99.68     99.68     99.68     99.68     99.68     99.68
##          100 comps
## X           100.00
## fat_pct      99.68
summary(pls_protein_pct)
## Data:    X dimension: 150 100 
##  Y dimension: 150 1
## Fit method: kernelpls
## Number of components considered: 100
## TRAINING: % variance explained
##              1 comps  2 comps  3 comps  4 comps  5 comps  6 comps  7 comps
## X             98.557    99.29    99.80    99.99   100.00   100.00   100.00
## protein_pct    4.948    46.20    64.43    70.92    85.84    88.09    89.66
##              8 comps  9 comps  10 comps  11 comps  12 comps  13 comps  14 comps
## X             100.00   100.00    100.00    100.00    100.00    100.00    100.00
## protein_pct    91.56    92.02     93.35     95.24     96.03     96.21     96.55
##              15 comps  16 comps  17 comps  18 comps  19 comps  20 comps
## X              100.00    100.00    100.00    100.00    100.00    100.00
## protein_pct     97.02     97.27     97.52     97.65     97.87     97.96
##              21 comps  22 comps  23 comps  24 comps  25 comps  26 comps
## X              100.00    100.00    100.00    100.00    100.00    100.00
## protein_pct     98.04     98.15     98.33     98.45     98.51     98.61
##              27 comps  28 comps  29 comps  30 comps  31 comps  32 comps
## X              100.00    100.00    100.00    100.00    100.00    100.00
## protein_pct     98.72     98.88     98.96     99.01     99.06     99.12
##              33 comps  34 comps  35 comps  36 comps  37 comps  38 comps
## X              100.00    100.00    100.00    100.00    100.00    100.00
## protein_pct     99.17     99.22     99.25     99.28     99.32     99.36
##              39 comps  40 comps  41 comps  42 comps  43 comps  44 comps
## X              100.00    100.00    100.00    100.00    100.00    100.00
## protein_pct     99.38     99.41     99.43     99.44     99.46     99.47
##              45 comps  46 comps  47 comps  48 comps  49 comps  50 comps
## X              100.00     100.0    100.00    100.00    100.00    100.00
## protein_pct     99.48      99.5     99.51     99.52     99.53     99.54
##              51 comps  52 comps  53 comps  54 comps  55 comps  56 comps
## X              100.00    100.00    100.00    100.00    100.00    100.00
## protein_pct     99.54     99.55     99.56     99.56     99.57     99.58
##              57 comps  58 comps  59 comps  60 comps  61 comps  62 comps
## X              100.00    100.00    100.00     100.0     100.0    100.00
## protein_pct     99.58     99.59     99.59      99.6      99.6     99.61
##              63 comps  64 comps  65 comps  66 comps  67 comps  68 comps
## X              100.00    100.00    100.00    100.00    100.00    100.00
## protein_pct     99.61     99.62     99.62     99.62     99.62     99.62
##              69 comps  70 comps  71 comps  72 comps  73 comps  74 comps
## X              100.00    100.00    100.00    100.00    100.00    100.00
## protein_pct     99.62     99.62     99.62     99.62     99.62     99.62
##              75 comps  76 comps  77 comps  78 comps  79 comps  80 comps
## X              100.00    100.00    100.00    100.00    100.00    100.00
## protein_pct     99.62     99.62     99.62     99.62     99.62     99.62
##              81 comps  82 comps  83 comps  84 comps  85 comps  86 comps
## X              100.00    100.00    100.00    100.00    100.00    100.00
## protein_pct     99.62     99.62     99.62     99.62     99.62     99.62
##              87 comps  88 comps  89 comps  90 comps  91 comps  92 comps
## X              100.00    100.00    100.00    100.00    100.00    100.00
## protein_pct     99.62     99.62     99.62     99.62     99.62     99.62
##              93 comps  94 comps  95 comps  96 comps  97 comps  98 comps
## X              100.00    100.00    100.00    100.00    100.00    100.00
## protein_pct     99.62     99.62     99.62     99.62     99.62     99.62
##              99 comps  100 comps
## X              100.00     100.00
## protein_pct     99.62      99.62
predict_water_pct <- predict(pls_water_pct,test_predictors[1:5,], ncomp = 1:2)
predict_fat_pct <- predict(pls_fat_pct,test_predictors[1:5,], ncomp = 1:2)
predict_protein_pct <- predict(pls_protein_pct,test_predictors[1:5,], ncomp = 1:2)

## Penalized Regression Models

ridge_water_pct <- enet(x = as.matrix(train_predictors), y=train_set$water_pct, lambda = 0.001)
ridge_fat_pct <- enet(x = as.matrix(train_predictors), y=train_set$fat_pct, lambda = 0.001)
ridge_protein_pct <- enet(x = as.matrix(train_predictors), y=train_set$protein_pct, lambda = 0.001)

water_pct_predict <- predict(ridge_water_pct, newx=as.matrix(test_predictors), s=1, mode="fraction", type='fit')
fat_pct_predict <- predict(ridge_fat_pct, newx=as.matrix(test_predictors), s=1, mode="fraction", type='fit')
protein_pct_predict <- predict(ridge_protein_pct, newx=as.matrix(test_predictors), s=1, mode="fraction", type='fit')

Part d

Part e

Problem 6.3

Part a

data("ChemicalManufacturingProcess")

Part b

summary(ChemicalManufacturingProcess)
##      Yield       BiologicalMaterial01 BiologicalMaterial02 BiologicalMaterial03
##  Min.   :35.25   Min.   :4.580        Min.   :46.87        Min.   :56.97       
##  1st Qu.:38.75   1st Qu.:5.978        1st Qu.:52.68        1st Qu.:64.98       
##  Median :39.97   Median :6.305        Median :55.09        Median :67.22       
##  Mean   :40.18   Mean   :6.411        Mean   :55.69        Mean   :67.70       
##  3rd Qu.:41.48   3rd Qu.:6.870        3rd Qu.:58.74        3rd Qu.:70.43       
##  Max.   :46.34   Max.   :8.810        Max.   :64.75        Max.   :78.25       
##                                                                                
##  BiologicalMaterial04 BiologicalMaterial05 BiologicalMaterial06
##  Min.   : 9.38        Min.   :13.24        Min.   :40.60       
##  1st Qu.:11.24        1st Qu.:17.23        1st Qu.:46.05       
##  Median :12.10        Median :18.49        Median :48.46       
##  Mean   :12.35        Mean   :18.60        Mean   :48.91       
##  3rd Qu.:13.22        3rd Qu.:19.90        3rd Qu.:51.34       
##  Max.   :23.09        Max.   :24.85        Max.   :59.38       
##                                                                
##  BiologicalMaterial07 BiologicalMaterial08 BiologicalMaterial09
##  Min.   :100.0        Min.   :15.88        Min.   :11.44       
##  1st Qu.:100.0        1st Qu.:17.06        1st Qu.:12.60       
##  Median :100.0        Median :17.51        Median :12.84       
##  Mean   :100.0        Mean   :17.49        Mean   :12.85       
##  3rd Qu.:100.0        3rd Qu.:17.88        3rd Qu.:13.13       
##  Max.   :100.8        Max.   :19.14        Max.   :14.08       
##                                                                
##  BiologicalMaterial10 BiologicalMaterial11 BiologicalMaterial12
##  Min.   :1.770        Min.   :135.8        Min.   :18.35       
##  1st Qu.:2.460        1st Qu.:143.8        1st Qu.:19.73       
##  Median :2.710        Median :146.1        Median :20.12       
##  Mean   :2.801        Mean   :147.0        Mean   :20.20       
##  3rd Qu.:2.990        3rd Qu.:149.6        3rd Qu.:20.75       
##  Max.   :6.870        Max.   :158.7        Max.   :22.21       
##                                                                
##  ManufacturingProcess01 ManufacturingProcess02 ManufacturingProcess03
##  Min.   : 0.00          Min.   : 0.00          Min.   :1.47          
##  1st Qu.:10.80          1st Qu.:19.30          1st Qu.:1.53          
##  Median :11.40          Median :21.00          Median :1.54          
##  Mean   :11.21          Mean   :16.68          Mean   :1.54          
##  3rd Qu.:12.15          3rd Qu.:21.50          3rd Qu.:1.55          
##  Max.   :14.10          Max.   :22.50          Max.   :1.60          
##  NA's   :1              NA's   :3              NA's   :15            
##  ManufacturingProcess04 ManufacturingProcess05 ManufacturingProcess06
##  Min.   :911.0          Min.   : 923.0         Min.   :203.0         
##  1st Qu.:928.0          1st Qu.: 986.8         1st Qu.:205.7         
##  Median :934.0          Median : 999.2         Median :206.8         
##  Mean   :931.9          Mean   :1001.7         Mean   :207.4         
##  3rd Qu.:936.0          3rd Qu.:1008.9         3rd Qu.:208.7         
##  Max.   :946.0          Max.   :1175.3         Max.   :227.4         
##  NA's   :1              NA's   :1              NA's   :2             
##  ManufacturingProcess07 ManufacturingProcess08 ManufacturingProcess09
##  Min.   :177.0          Min.   :177.0          Min.   :38.89         
##  1st Qu.:177.0          1st Qu.:177.0          1st Qu.:44.89         
##  Median :177.0          Median :178.0          Median :45.73         
##  Mean   :177.5          Mean   :177.6          Mean   :45.66         
##  3rd Qu.:178.0          3rd Qu.:178.0          3rd Qu.:46.52         
##  Max.   :178.0          Max.   :178.0          Max.   :49.36         
##  NA's   :1              NA's   :1                                    
##  ManufacturingProcess10 ManufacturingProcess11 ManufacturingProcess12
##  Min.   : 7.500         Min.   : 7.500         Min.   :   0.0        
##  1st Qu.: 8.700         1st Qu.: 9.000         1st Qu.:   0.0        
##  Median : 9.100         Median : 9.400         Median :   0.0        
##  Mean   : 9.179         Mean   : 9.386         Mean   : 857.8        
##  3rd Qu.: 9.550         3rd Qu.: 9.900         3rd Qu.:   0.0        
##  Max.   :11.600         Max.   :11.500         Max.   :4549.0        
##  NA's   :9              NA's   :10             NA's   :1             
##  ManufacturingProcess13 ManufacturingProcess14 ManufacturingProcess15
##  Min.   :32.10          Min.   :4701           Min.   :5904          
##  1st Qu.:33.90          1st Qu.:4828           1st Qu.:6010          
##  Median :34.60          Median :4856           Median :6032          
##  Mean   :34.51          Mean   :4854           Mean   :6039          
##  3rd Qu.:35.20          3rd Qu.:4882           3rd Qu.:6061          
##  Max.   :38.60          Max.   :5055           Max.   :6233          
##                         NA's   :1                                    
##  ManufacturingProcess16 ManufacturingProcess17 ManufacturingProcess18
##  Min.   :   0           Min.   :31.30          Min.   :   0          
##  1st Qu.:4561           1st Qu.:33.50          1st Qu.:4813          
##  Median :4588           Median :34.40          Median :4835          
##  Mean   :4566           Mean   :34.34          Mean   :4810          
##  3rd Qu.:4619           3rd Qu.:35.10          3rd Qu.:4862          
##  Max.   :4852           Max.   :40.00          Max.   :4971          
##                                                                      
##  ManufacturingProcess19 ManufacturingProcess20 ManufacturingProcess21
##  Min.   :5890           Min.   :   0           Min.   :-1.8000       
##  1st Qu.:6001           1st Qu.:4553           1st Qu.:-0.6000       
##  Median :6022           Median :4582           Median :-0.3000       
##  Mean   :6028           Mean   :4556           Mean   :-0.1642       
##  3rd Qu.:6050           3rd Qu.:4610           3rd Qu.: 0.0000       
##  Max.   :6146           Max.   :4759           Max.   : 3.6000       
##                                                                      
##  ManufacturingProcess22 ManufacturingProcess23 ManufacturingProcess24
##  Min.   : 0.000         Min.   :0.000          Min.   : 0.000        
##  1st Qu.: 3.000         1st Qu.:2.000          1st Qu.: 4.000        
##  Median : 5.000         Median :3.000          Median : 8.000        
##  Mean   : 5.406         Mean   :3.017          Mean   : 8.834        
##  3rd Qu.: 8.000         3rd Qu.:4.000          3rd Qu.:14.000        
##  Max.   :12.000         Max.   :6.000          Max.   :23.000        
##  NA's   :1              NA's   :1              NA's   :1             
##  ManufacturingProcess25 ManufacturingProcess26 ManufacturingProcess27
##  Min.   :   0           Min.   :   0           Min.   :   0          
##  1st Qu.:4832           1st Qu.:6020           1st Qu.:4560          
##  Median :4855           Median :6047           Median :4587          
##  Mean   :4828           Mean   :6016           Mean   :4563          
##  3rd Qu.:4877           3rd Qu.:6070           3rd Qu.:4609          
##  Max.   :4990           Max.   :6161           Max.   :4710          
##  NA's   :5              NA's   :5              NA's   :5             
##  ManufacturingProcess28 ManufacturingProcess29 ManufacturingProcess30
##  Min.   : 0.000         Min.   : 0.00          Min.   : 0.000        
##  1st Qu.: 0.000         1st Qu.:19.70          1st Qu.: 8.800        
##  Median :10.400         Median :19.90          Median : 9.100        
##  Mean   : 6.592         Mean   :20.01          Mean   : 9.161        
##  3rd Qu.:10.750         3rd Qu.:20.40          3rd Qu.: 9.700        
##  Max.   :11.500         Max.   :22.00          Max.   :11.200        
##  NA's   :5              NA's   :5              NA's   :5             
##  ManufacturingProcess31 ManufacturingProcess32 ManufacturingProcess33
##  Min.   : 0.00          Min.   :143.0          Min.   :56.00         
##  1st Qu.:70.10          1st Qu.:155.0          1st Qu.:62.00         
##  Median :70.80          Median :158.0          Median :64.00         
##  Mean   :70.18          Mean   :158.5          Mean   :63.54         
##  3rd Qu.:71.40          3rd Qu.:162.0          3rd Qu.:65.00         
##  Max.   :72.50          Max.   :173.0          Max.   :70.00         
##  NA's   :5                                     NA's   :5             
##  ManufacturingProcess34 ManufacturingProcess35 ManufacturingProcess36
##  Min.   :2.300          Min.   :463.0          Min.   :0.01700       
##  1st Qu.:2.500          1st Qu.:490.0          1st Qu.:0.01900       
##  Median :2.500          Median :495.0          Median :0.02000       
##  Mean   :2.494          Mean   :495.6          Mean   :0.01957       
##  3rd Qu.:2.500          3rd Qu.:501.5          3rd Qu.:0.02000       
##  Max.   :2.600          Max.   :522.0          Max.   :0.02200       
##  NA's   :5              NA's   :5              NA's   :5             
##  ManufacturingProcess37 ManufacturingProcess38 ManufacturingProcess39
##  Min.   :0.000          Min.   :0.000          Min.   :0.000         
##  1st Qu.:0.700          1st Qu.:2.000          1st Qu.:7.100         
##  Median :1.000          Median :3.000          Median :7.200         
##  Mean   :1.014          Mean   :2.534          Mean   :6.851         
##  3rd Qu.:1.300          3rd Qu.:3.000          3rd Qu.:7.300         
##  Max.   :2.300          Max.   :3.000          Max.   :7.500         
##                                                                      
##  ManufacturingProcess40 ManufacturingProcess41 ManufacturingProcess42
##  Min.   :0.00000        Min.   :0.00000        Min.   : 0.00         
##  1st Qu.:0.00000        1st Qu.:0.00000        1st Qu.:11.40         
##  Median :0.00000        Median :0.00000        Median :11.60         
##  Mean   :0.01771        Mean   :0.02371        Mean   :11.21         
##  3rd Qu.:0.00000        3rd Qu.:0.00000        3rd Qu.:11.70         
##  Max.   :0.10000        Max.   :0.20000        Max.   :12.10         
##  NA's   :1              NA's   :1                                    
##  ManufacturingProcess43 ManufacturingProcess44 ManufacturingProcess45
##  Min.   : 0.0000        Min.   :0.000          Min.   :0.000         
##  1st Qu.: 0.6000        1st Qu.:1.800          1st Qu.:2.100         
##  Median : 0.8000        Median :1.900          Median :2.200         
##  Mean   : 0.9119        Mean   :1.805          Mean   :2.138         
##  3rd Qu.: 1.0250        3rd Qu.:1.900          3rd Qu.:2.300         
##  Max.   :11.0000        Max.   :2.100          Max.   :2.600         
## 
glimpse(ChemicalManufacturingProcess)
## Rows: 176
## Columns: 58
## $ Yield                  <dbl> 38.00, 42.44, 42.03, 41.42, 42.49, 43.57, 43.12…
## $ BiologicalMaterial01   <dbl> 6.25, 8.01, 8.01, 8.01, 7.47, 6.12, 7.48, 6.94,…
## $ BiologicalMaterial02   <dbl> 49.58, 60.97, 60.97, 60.97, 63.33, 58.36, 64.47…
## $ BiologicalMaterial03   <dbl> 56.97, 67.48, 67.48, 67.48, 72.25, 65.31, 72.41…
## $ BiologicalMaterial04   <dbl> 12.74, 14.65, 14.65, 14.65, 14.02, 15.17, 13.82…
## $ BiologicalMaterial05   <dbl> 19.51, 19.36, 19.36, 19.36, 17.91, 21.79, 17.71…
## $ BiologicalMaterial06   <dbl> 43.73, 53.14, 53.14, 53.14, 54.66, 51.23, 54.45…
## $ BiologicalMaterial07   <dbl> 100, 100, 100, 100, 100, 100, 100, 100, 100, 10…
## $ BiologicalMaterial08   <dbl> 16.66, 19.04, 19.04, 19.04, 18.22, 18.30, 18.72…
## $ BiologicalMaterial09   <dbl> 11.44, 12.55, 12.55, 12.55, 12.80, 12.13, 12.95…
## $ BiologicalMaterial10   <dbl> 3.46, 3.46, 3.46, 3.46, 3.05, 3.78, 3.04, 3.85,…
## $ BiologicalMaterial11   <dbl> 138.09, 153.67, 153.67, 153.67, 147.61, 151.88,…
## $ BiologicalMaterial12   <dbl> 18.83, 21.05, 21.05, 21.05, 21.05, 20.76, 20.75…
## $ ManufacturingProcess01 <dbl> NA, 0.0, 0.0, 0.0, 10.7, 12.0, 11.5, 12.0, 12.0…
## $ ManufacturingProcess02 <dbl> NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ ManufacturingProcess03 <dbl> NA, NA, NA, NA, NA, NA, 1.56, 1.55, 1.56, 1.55,…
## $ ManufacturingProcess04 <dbl> NA, 917, 912, 911, 918, 924, 933, 929, 928, 938…
## $ ManufacturingProcess05 <dbl> NA, 1032.2, 1003.6, 1014.6, 1027.5, 1016.8, 988…
## $ ManufacturingProcess06 <dbl> NA, 210.0, 207.1, 213.3, 205.7, 208.9, 210.0, 2…
## $ ManufacturingProcess07 <dbl> NA, 177, 178, 177, 178, 178, 177, 178, 177, 177…
## $ ManufacturingProcess08 <dbl> NA, 178, 178, 177, 178, 178, 178, 178, 177, 177…
## $ ManufacturingProcess09 <dbl> 43.00, 46.57, 45.07, 44.92, 44.96, 45.32, 49.36…
## $ ManufacturingProcess10 <dbl> NA, NA, NA, NA, NA, NA, 11.6, 10.2, 9.7, 10.1, …
## $ ManufacturingProcess11 <dbl> NA, NA, NA, NA, NA, NA, 11.5, 11.3, 11.1, 10.2,…
## $ ManufacturingProcess12 <dbl> NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ ManufacturingProcess13 <dbl> 35.5, 34.0, 34.8, 34.8, 34.6, 34.0, 32.4, 33.6,…
## $ ManufacturingProcess14 <dbl> 4898, 4869, 4878, 4897, 4992, 4985, 4745, 4854,…
## $ ManufacturingProcess15 <dbl> 6108, 6095, 6087, 6102, 6233, 6222, 5999, 6105,…
## $ ManufacturingProcess16 <dbl> 4682, 4617, 4617, 4635, 4733, 4786, 4486, 4626,…
## $ ManufacturingProcess17 <dbl> 35.5, 34.0, 34.8, 34.8, 33.9, 33.4, 33.8, 33.6,…
## $ ManufacturingProcess18 <dbl> 4865, 4867, 4877, 4872, 4886, 4862, 4758, 4766,…
## $ ManufacturingProcess19 <dbl> 6049, 6097, 6078, 6073, 6102, 6115, 6013, 6022,…
## $ ManufacturingProcess20 <dbl> 4665, 4621, 4621, 4611, 4659, 4696, 4522, 4552,…
## $ ManufacturingProcess21 <dbl> 0.0, 0.0, 0.0, 0.0, -0.7, -0.6, 1.4, 0.0, 0.0, …
## $ ManufacturingProcess22 <dbl> NA, 3, 4, 5, 8, 9, 1, 2, 3, 4, 6, 7, 8, 10, 11,…
## $ ManufacturingProcess23 <dbl> NA, 0, 1, 2, 4, 1, 1, 2, 3, 1, 3, 4, 1, 2, 3, 4…
## $ ManufacturingProcess24 <dbl> NA, 3, 4, 5, 18, 1, 1, 2, 3, 4, 6, 7, 8, 2, 15,…
## $ ManufacturingProcess25 <dbl> 4873, 4869, 4897, 4892, 4930, 4871, 4795, 4806,…
## $ ManufacturingProcess26 <dbl> 6074, 6107, 6116, 6111, 6151, 6128, 6057, 6059,…
## $ ManufacturingProcess27 <dbl> 4685, 4630, 4637, 4630, 4684, 4687, 4572, 4586,…
## $ ManufacturingProcess28 <dbl> 10.7, 11.2, 11.1, 11.1, 11.3, 11.4, 11.2, 11.1,…
## $ ManufacturingProcess29 <dbl> 21.0, 21.4, 21.3, 21.3, 21.6, 21.7, 21.2, 21.2,…
## $ ManufacturingProcess30 <dbl> 9.9, 9.9, 9.4, 9.4, 9.0, 10.1, 11.2, 10.9, 10.5…
## $ ManufacturingProcess31 <dbl> 69.1, 68.7, 69.3, 69.3, 69.4, 68.2, 67.6, 67.9,…
## $ ManufacturingProcess32 <dbl> 156, 169, 173, 171, 171, 173, 159, 161, 160, 16…
## $ ManufacturingProcess33 <dbl> 66, 66, 66, 68, 70, 70, 65, 65, 65, 66, 67, 67,…
## $ ManufacturingProcess34 <dbl> 2.4, 2.6, 2.6, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.…
## $ ManufacturingProcess35 <dbl> 486, 508, 509, 496, 468, 490, 475, 478, 491, 48…
## $ ManufacturingProcess36 <dbl> 0.019, 0.019, 0.018, 0.018, 0.017, 0.018, 0.019…
## $ ManufacturingProcess37 <dbl> 0.5, 2.0, 0.7, 1.2, 0.2, 0.4, 0.8, 1.0, 1.2, 1.…
## $ ManufacturingProcess38 <dbl> 3, 2, 2, 2, 2, 2, 2, 2, 3, 3, 2, 3, 3, 3, 3, 3,…
## $ ManufacturingProcess39 <dbl> 7.2, 7.2, 7.2, 7.2, 7.3, 7.2, 7.3, 7.3, 7.4, 7.…
## $ ManufacturingProcess40 <dbl> NA, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0…
## $ ManufacturingProcess41 <dbl> NA, 0.15, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0…
## $ ManufacturingProcess42 <dbl> 11.6, 11.1, 12.0, 10.6, 11.0, 11.5, 11.7, 11.4,…
## $ ManufacturingProcess43 <dbl> 3.0, 0.9, 1.0, 1.1, 1.1, 2.2, 0.7, 0.8, 0.9, 0.…
## $ ManufacturingProcess44 <dbl> 1.8, 1.9, 1.8, 1.8, 1.7, 1.8, 2.0, 2.0, 1.9, 1.…
## $ ManufacturingProcess45 <dbl> 2.4, 2.2, 2.3, 2.1, 2.1, 2.0, 2.2, 2.2, 2.1, 2.…
columns <- colnames(ChemicalManufacturingProcess)

for(col in columns) {
  print(col)
  
  median_value <- median(ChemicalManufacturingProcess[[col]],na.rm=TRUE)
  ChemicalManufacturingProcess[col][is.na(ChemicalManufacturingProcess[col])] <- median_value
}
## [1] "Yield"
## [1] "BiologicalMaterial01"
## [1] "BiologicalMaterial02"
## [1] "BiologicalMaterial03"
## [1] "BiologicalMaterial04"
## [1] "BiologicalMaterial05"
## [1] "BiologicalMaterial06"
## [1] "BiologicalMaterial07"
## [1] "BiologicalMaterial08"
## [1] "BiologicalMaterial09"
## [1] "BiologicalMaterial10"
## [1] "BiologicalMaterial11"
## [1] "BiologicalMaterial12"
## [1] "ManufacturingProcess01"
## [1] "ManufacturingProcess02"
## [1] "ManufacturingProcess03"
## [1] "ManufacturingProcess04"
## [1] "ManufacturingProcess05"
## [1] "ManufacturingProcess06"
## [1] "ManufacturingProcess07"
## [1] "ManufacturingProcess08"
## [1] "ManufacturingProcess09"
## [1] "ManufacturingProcess10"
## [1] "ManufacturingProcess11"
## [1] "ManufacturingProcess12"
## [1] "ManufacturingProcess13"
## [1] "ManufacturingProcess14"
## [1] "ManufacturingProcess15"
## [1] "ManufacturingProcess16"
## [1] "ManufacturingProcess17"
## [1] "ManufacturingProcess18"
## [1] "ManufacturingProcess19"
## [1] "ManufacturingProcess20"
## [1] "ManufacturingProcess21"
## [1] "ManufacturingProcess22"
## [1] "ManufacturingProcess23"
## [1] "ManufacturingProcess24"
## [1] "ManufacturingProcess25"
## [1] "ManufacturingProcess26"
## [1] "ManufacturingProcess27"
## [1] "ManufacturingProcess28"
## [1] "ManufacturingProcess29"
## [1] "ManufacturingProcess30"
## [1] "ManufacturingProcess31"
## [1] "ManufacturingProcess32"
## [1] "ManufacturingProcess33"
## [1] "ManufacturingProcess34"
## [1] "ManufacturingProcess35"
## [1] "ManufacturingProcess36"
## [1] "ManufacturingProcess37"
## [1] "ManufacturingProcess38"
## [1] "ManufacturingProcess39"
## [1] "ManufacturingProcess40"
## [1] "ManufacturingProcess41"
## [1] "ManufacturingProcess42"
## [1] "ManufacturingProcess43"
## [1] "ManufacturingProcess44"
## [1] "ManufacturingProcess45"
summary(ChemicalManufacturingProcess)
##      Yield       BiologicalMaterial01 BiologicalMaterial02 BiologicalMaterial03
##  Min.   :35.25   Min.   :4.580        Min.   :46.87        Min.   :56.97       
##  1st Qu.:38.75   1st Qu.:5.978        1st Qu.:52.68        1st Qu.:64.98       
##  Median :39.97   Median :6.305        Median :55.09        Median :67.22       
##  Mean   :40.18   Mean   :6.411        Mean   :55.69        Mean   :67.70       
##  3rd Qu.:41.48   3rd Qu.:6.870        3rd Qu.:58.74        3rd Qu.:70.43       
##  Max.   :46.34   Max.   :8.810        Max.   :64.75        Max.   :78.25       
##  BiologicalMaterial04 BiologicalMaterial05 BiologicalMaterial06
##  Min.   : 9.38        Min.   :13.24        Min.   :40.60       
##  1st Qu.:11.24        1st Qu.:17.23        1st Qu.:46.05       
##  Median :12.10        Median :18.49        Median :48.46       
##  Mean   :12.35        Mean   :18.60        Mean   :48.91       
##  3rd Qu.:13.22        3rd Qu.:19.90        3rd Qu.:51.34       
##  Max.   :23.09        Max.   :24.85        Max.   :59.38       
##  BiologicalMaterial07 BiologicalMaterial08 BiologicalMaterial09
##  Min.   :100.0        Min.   :15.88        Min.   :11.44       
##  1st Qu.:100.0        1st Qu.:17.06        1st Qu.:12.60       
##  Median :100.0        Median :17.51        Median :12.84       
##  Mean   :100.0        Mean   :17.49        Mean   :12.85       
##  3rd Qu.:100.0        3rd Qu.:17.88        3rd Qu.:13.13       
##  Max.   :100.8        Max.   :19.14        Max.   :14.08       
##  BiologicalMaterial10 BiologicalMaterial11 BiologicalMaterial12
##  Min.   :1.770        Min.   :135.8        Min.   :18.35       
##  1st Qu.:2.460        1st Qu.:143.8        1st Qu.:19.73       
##  Median :2.710        Median :146.1        Median :20.12       
##  Mean   :2.801        Mean   :147.0        Mean   :20.20       
##  3rd Qu.:2.990        3rd Qu.:149.6        3rd Qu.:20.75       
##  Max.   :6.870        Max.   :158.7        Max.   :22.21       
##  ManufacturingProcess01 ManufacturingProcess02 ManufacturingProcess03
##  Min.   : 0.00          Min.   : 0.00          Min.   :1.47          
##  1st Qu.:10.80          1st Qu.:19.30          1st Qu.:1.53          
##  Median :11.40          Median :21.00          Median :1.54          
##  Mean   :11.21          Mean   :16.76          Mean   :1.54          
##  3rd Qu.:12.12          3rd Qu.:21.50          3rd Qu.:1.55          
##  Max.   :14.10          Max.   :22.50          Max.   :1.60          
##  ManufacturingProcess04 ManufacturingProcess05 ManufacturingProcess06
##  Min.   :911.0          Min.   : 923.0         Min.   :203.0         
##  1st Qu.:928.0          1st Qu.: 986.8         1st Qu.:205.7         
##  Median :934.0          Median : 999.2         Median :206.8         
##  Mean   :931.9          Mean   :1001.7         Mean   :207.4         
##  3rd Qu.:936.0          3rd Qu.:1008.7         3rd Qu.:208.7         
##  Max.   :946.0          Max.   :1175.3         Max.   :227.4         
##  ManufacturingProcess07 ManufacturingProcess08 ManufacturingProcess09
##  Min.   :177.0          Min.   :177.0          Min.   :38.89         
##  1st Qu.:177.0          1st Qu.:177.0          1st Qu.:44.89         
##  Median :177.0          Median :178.0          Median :45.73         
##  Mean   :177.5          Mean   :177.6          Mean   :45.66         
##  3rd Qu.:178.0          3rd Qu.:178.0          3rd Qu.:46.52         
##  Max.   :178.0          Max.   :178.0          Max.   :49.36         
##  ManufacturingProcess10 ManufacturingProcess11 ManufacturingProcess12
##  Min.   : 7.500         Min.   : 7.500         Min.   :   0.0        
##  1st Qu.: 8.700         1st Qu.: 9.000         1st Qu.:   0.0        
##  Median : 9.100         Median : 9.400         Median :   0.0        
##  Mean   : 9.175         Mean   : 9.386         Mean   : 852.9        
##  3rd Qu.: 9.500         3rd Qu.: 9.825         3rd Qu.:   0.0        
##  Max.   :11.600         Max.   :11.500         Max.   :4549.0        
##  ManufacturingProcess13 ManufacturingProcess14 ManufacturingProcess15
##  Min.   :32.10          Min.   :4701           Min.   :5904          
##  1st Qu.:33.90          1st Qu.:4828           1st Qu.:6010          
##  Median :34.60          Median :4856           Median :6032          
##  Mean   :34.51          Mean   :4854           Mean   :6039          
##  3rd Qu.:35.20          3rd Qu.:4882           3rd Qu.:6061          
##  Max.   :38.60          Max.   :5055           Max.   :6233          
##  ManufacturingProcess16 ManufacturingProcess17 ManufacturingProcess18
##  Min.   :   0           Min.   :31.30          Min.   :   0          
##  1st Qu.:4561           1st Qu.:33.50          1st Qu.:4813          
##  Median :4588           Median :34.40          Median :4835          
##  Mean   :4566           Mean   :34.34          Mean   :4810          
##  3rd Qu.:4619           3rd Qu.:35.10          3rd Qu.:4862          
##  Max.   :4852           Max.   :40.00          Max.   :4971          
##  ManufacturingProcess19 ManufacturingProcess20 ManufacturingProcess21
##  Min.   :5890           Min.   :   0           Min.   :-1.8000       
##  1st Qu.:6001           1st Qu.:4553           1st Qu.:-0.6000       
##  Median :6022           Median :4582           Median :-0.3000       
##  Mean   :6028           Mean   :4556           Mean   :-0.1642       
##  3rd Qu.:6050           3rd Qu.:4610           3rd Qu.: 0.0000       
##  Max.   :6146           Max.   :4759           Max.   : 3.6000       
##  ManufacturingProcess22 ManufacturingProcess23 ManufacturingProcess24
##  Min.   : 0.000         Min.   :0.000          Min.   : 0.00         
##  1st Qu.: 3.000         1st Qu.:2.000          1st Qu.: 4.00         
##  Median : 5.000         Median :3.000          Median : 8.00         
##  Mean   : 5.403         Mean   :3.017          Mean   : 8.83         
##  3rd Qu.: 8.000         3rd Qu.:4.000          3rd Qu.:14.00         
##  Max.   :12.000         Max.   :6.000          Max.   :23.00         
##  ManufacturingProcess25 ManufacturingProcess26 ManufacturingProcess27
##  Min.   :   0           Min.   :   0           Min.   :   0          
##  1st Qu.:4834           1st Qu.:6021           1st Qu.:4563          
##  Median :4855           Median :6047           Median :4587          
##  Mean   :4829           Mean   :6016           Mean   :4563          
##  3rd Qu.:4876           3rd Qu.:6069           3rd Qu.:4609          
##  Max.   :4990           Max.   :6161           Max.   :4710          
##  ManufacturingProcess28 ManufacturingProcess29 ManufacturingProcess30
##  Min.   : 0.0           Min.   : 0.00          Min.   : 0.00         
##  1st Qu.: 0.0           1st Qu.:19.70          1st Qu.: 8.80         
##  Median :10.4           Median :19.90          Median : 9.10         
##  Mean   : 6.7           Mean   :20.01          Mean   : 9.16         
##  3rd Qu.:10.7           3rd Qu.:20.40          3rd Qu.: 9.70         
##  Max.   :11.5           Max.   :22.00          Max.   :11.20         
##  ManufacturingProcess31 ManufacturingProcess32 ManufacturingProcess33
##  Min.   : 0.0           Min.   :143.0          Min.   :56.00         
##  1st Qu.:70.1           1st Qu.:155.0          1st Qu.:62.00         
##  Median :70.8           Median :158.0          Median :64.00         
##  Mean   :70.2           Mean   :158.5          Mean   :63.56         
##  3rd Qu.:71.4           3rd Qu.:162.0          3rd Qu.:65.00         
##  Max.   :72.5           Max.   :173.0          Max.   :70.00         
##  ManufacturingProcess34 ManufacturingProcess35 ManufacturingProcess36
##  Min.   :2.300          Min.   :463.0          Min.   :0.01700       
##  1st Qu.:2.500          1st Qu.:490.0          1st Qu.:0.01900       
##  Median :2.500          Median :495.0          Median :0.02000       
##  Mean   :2.494          Mean   :495.6          Mean   :0.01959       
##  3rd Qu.:2.500          3rd Qu.:501.0          3rd Qu.:0.02000       
##  Max.   :2.600          Max.   :522.0          Max.   :0.02200       
##  ManufacturingProcess37 ManufacturingProcess38 ManufacturingProcess39
##  Min.   :0.000          Min.   :0.000          Min.   :0.000         
##  1st Qu.:0.700          1st Qu.:2.000          1st Qu.:7.100         
##  Median :1.000          Median :3.000          Median :7.200         
##  Mean   :1.014          Mean   :2.534          Mean   :6.851         
##  3rd Qu.:1.300          3rd Qu.:3.000          3rd Qu.:7.300         
##  Max.   :2.300          Max.   :3.000          Max.   :7.500         
##  ManufacturingProcess40 ManufacturingProcess41 ManufacturingProcess42
##  Min.   :0.00000        Min.   :0.00000        Min.   : 0.00         
##  1st Qu.:0.00000        1st Qu.:0.00000        1st Qu.:11.40         
##  Median :0.00000        Median :0.00000        Median :11.60         
##  Mean   :0.01761        Mean   :0.02358        Mean   :11.21         
##  3rd Qu.:0.00000        3rd Qu.:0.00000        3rd Qu.:11.70         
##  Max.   :0.10000        Max.   :0.20000        Max.   :12.10         
##  ManufacturingProcess43 ManufacturingProcess44 ManufacturingProcess45
##  Min.   : 0.0000        Min.   :0.000          Min.   :0.000         
##  1st Qu.: 0.6000        1st Qu.:1.800          1st Qu.:2.100         
##  Median : 0.8000        Median :1.900          Median :2.200         
##  Mean   : 0.9119        Mean   :1.805          Mean   :2.138         
##  3rd Qu.: 1.0250        3rd Qu.:1.900          3rd Qu.:2.300         
##  Max.   :11.0000        Max.   :2.100          Max.   :2.600

Part c

set.seed(1234)

sample_set <- sample(nrow(ChemicalManufacturingProcess),round(nrow(ChemicalManufacturingProcess)*.75), replace=FALSE)

train_set <- ChemicalManufacturingProcess[sample_set, ]
test_set <- ChemicalManufacturingProcess[-sample_set, ]

lm_mod <- lm(Yield ~ ., data=train_set)

summary(lm_mod)
## 
## Call:
## lm(formula = Yield ~ ., data = train_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.29591 -0.47313 -0.03832  0.52560  1.95060 
## 
## Coefficients: (1 not defined because of singularities)
##                          Estimate Std. Error t value Pr(>|t|)   
## (Intercept)             1.045e+02  1.649e+02   0.634  0.52805   
## BiologicalMaterial01   -3.738e-01  5.217e-01  -0.716  0.47596   
## BiologicalMaterial02   -1.886e-01  1.564e-01  -1.206  0.23146   
## BiologicalMaterial03    5.307e-01  2.977e-01   1.783  0.07869 . 
## BiologicalMaterial04   -6.683e-01  7.489e-01  -0.892  0.37499   
## BiologicalMaterial05    1.619e-01  1.450e-01   1.117  0.26774   
## BiologicalMaterial06   -1.864e-01  3.687e-01  -0.506  0.61462   
## BiologicalMaterial07   -1.466e+00  1.224e+00  -1.198  0.23483   
## BiologicalMaterial08    1.170e+00  8.969e-01   1.305  0.19594   
## BiologicalMaterial09   -2.906e+00  1.829e+00  -1.589  0.11628   
## BiologicalMaterial10    1.397e+00  1.929e+00   0.724  0.47112   
## BiologicalMaterial11    1.433e-02  1.221e-01   0.117  0.90689   
## BiologicalMaterial12    3.197e-01  8.077e-01   0.396  0.69333   
## ManufacturingProcess01  6.107e-02  1.232e-01   0.496  0.62156   
## ManufacturingProcess02  5.395e-02  6.278e-02   0.859  0.39285   
## ManufacturingProcess03 -7.360e+00  6.296e+00  -1.169  0.24609   
## ManufacturingProcess04  6.652e-02  3.927e-02   1.694  0.09440 . 
## ManufacturingProcess05 -1.504e-03  4.747e-03  -0.317  0.75231   
## ManufacturingProcess06  1.229e-01  7.911e-02   1.554  0.12439   
## ManufacturingProcess07 -1.529e-02  2.652e-01  -0.058  0.95419   
## ManufacturingProcess08 -2.349e-01  3.126e-01  -0.751  0.45483   
## ManufacturingProcess09  2.552e-01  2.333e-01   1.094  0.27759   
## ManufacturingProcess10 -4.665e-01  6.717e-01  -0.694  0.48952   
## ManufacturingProcess11  4.531e-01  8.798e-01   0.515  0.60805   
## ManufacturingProcess12  5.780e-06  1.450e-04   0.040  0.96831   
## ManufacturingProcess13 -5.438e-01  5.968e-01  -0.911  0.36514   
## ManufacturingProcess14  1.485e-02  1.368e-02   1.085  0.28119   
## ManufacturingProcess15 -1.329e-02  1.216e-02  -1.092  0.27817   
## ManufacturingProcess16  4.847e-04  5.263e-04   0.921  0.36001   
## ManufacturingProcess17  4.245e-01  5.078e-01   0.836  0.40583   
## ManufacturingProcess18  5.217e-03  5.462e-03   0.955  0.34255   
## ManufacturingProcess19  3.528e-03  1.078e-02   0.327  0.74448   
## ManufacturingProcess20 -5.254e-03  5.742e-03  -0.915  0.36306   
## ManufacturingProcess21         NA         NA      NA       NA   
## ManufacturingProcess22 -4.289e-03  5.601e-02  -0.077  0.93917   
## ManufacturingProcess23 -1.722e-02  1.108e-01  -0.155  0.87692   
## ManufacturingProcess24  1.018e-03  3.179e-02   0.032  0.97455   
## ManufacturingProcess25 -5.984e-03  1.861e-02  -0.322  0.74869   
## ManufacturingProcess26  7.468e-03  1.436e-02   0.520  0.60463   
## ManufacturingProcess27 -1.207e-02  1.055e-02  -1.144  0.25629   
## ManufacturingProcess28 -5.524e-02  4.448e-02  -1.242  0.21812   
## ManufacturingProcess29  1.479e+00  1.344e+00   1.100  0.27465   
## ManufacturingProcess30  6.995e-01  9.285e-01   0.753  0.45359   
## ManufacturingProcess31  6.816e-02  1.421e-01   0.480  0.63295   
## ManufacturingProcess32  2.514e-01  7.756e-02   3.241  0.00178 **
## ManufacturingProcess33 -3.230e-01  1.627e-01  -1.985  0.05082 . 
## ManufacturingProcess34 -4.644e-02  3.453e+00  -0.013  0.98931   
## ManufacturingProcess35  1.044e-02  2.202e-02   0.474  0.63689   
## ManufacturingProcess36 -1.205e+02  4.011e+02  -0.300  0.76466   
## ManufacturingProcess37 -7.608e-01  3.803e-01  -2.001  0.04904 * 
## ManufacturingProcess38 -3.977e-01  3.150e-01  -1.262  0.21068   
## ManufacturingProcess39  1.779e-01  1.689e-01   1.054  0.29540   
## ManufacturingProcess40  1.039e+00  9.859e+00   0.105  0.91639   
## ManufacturingProcess41  1.908e+00  6.904e+00   0.276  0.78308   
## ManufacturingProcess42 -1.701e-01  3.265e-01  -0.521  0.60392   
## ManufacturingProcess43  2.823e-01  4.537e-01   0.622  0.53561   
## ManufacturingProcess44 -7.369e-01  1.698e+00  -0.434  0.66544   
## ManufacturingProcess45  1.167e+00  7.182e-01   1.625  0.10841   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.073 on 75 degrees of freedom
## Multiple R-squared:  0.8105, Adjusted R-squared:  0.669 
## F-statistic: 5.727 on 56 and 75 DF,  p-value: 3.719e-12

Part d

lm_predict <- predict(lm_mod, test_set %>% dplyr::select(-Yield))

lmValues <- data.frame(obs = test_set$Yield, pred = lm_predict)

defaultSummary(lmValues)
##      RMSE  Rsquared       MAE 
## 1.5481630 0.4581323 1.1693219

Part e

Part f