library(caret)
data(tecator)
raw_data <- as_tibble(absorp)
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
## `.name_repair` is omitted as of tibble 2.0.0.
## ℹ Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
response_raw <- as_tibble(endpoints)
colnames(response_raw) <- c("water_pct", "fat_pct", "protein_pct")
m1 <- cbind(raw_data, response_raw)
set.seed(100)
ctrl <- trainControl(method="cv", number=10)
rlmPCA <- train(raw_data, response_raw$water_pct,
method="rlm",
preProcess = "pca",
trControl = ctrl)
rlmPCA
## Robust Linear Model
##
## 215 samples
## 100 predictors
##
## Pre-processing: principal component signal extraction (100), centered
## (100), scaled (100)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 193, 194, 194, 193, 193, 194, ...
## Resampling results across tuning parameters:
##
## intercept psi RMSE Rsquared MAE
## FALSE psi.huber 63.798603 0.2875412 63.232121
## FALSE psi.hampel 63.798603 0.2875412 63.232121
## FALSE psi.bisquare 63.799249 0.2875042 63.232623
## TRUE psi.huber 8.562152 0.2892638 6.707973
## TRUE psi.hampel 8.505529 0.2880885 6.859076
## TRUE psi.bisquare 8.601691 0.2888414 6.657618
##
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were intercept = TRUE and psi = psi.hampel.
set.seed(1234)
sample_set <- sample(nrow(m1), round(nrow(m1)*.70), replace=FALSE)
train_set <- m1[sample_set,]
test_set <- m1[-sample_set,]
train_predictors <- train_set %>% dplyr::select(-c("water_pct", "fat_pct","protein_pct"))
train_water_pct <- train_set %>% select(-c("fat_pct","protein_pct"))
train_fat_pct <- train_set %>% select(-c("water_pct","protein_pct"))
train_protein_pct <- train_set %>% select(-c("water_pct","fat_pct"))
test_predictors <- test_set %>% select(-c("water_pct","fat_pct","protein_pct"))
##Ordinary Least Squares
ols_water_pct <- lm(water_pct ~ ., data=train_water_pct)
ols_fat_pct <- lm(fat_pct ~ ., data=train_fat_pct)
ols_protein_pct <- lm(protein_pct ~ ., data=train_protein_pct)
summary(ols_water_pct)
##
## Call:
## lm(formula = water_pct ~ ., data = train_water_pct)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.51720 -0.22950 -0.01484 0.28057 2.01623
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 72.94 2.27 32.133 < 2e-16 ***
## V1 -14126.01 4306.20 -3.280 0.001913 **
## V2 21572.27 7525.32 2.867 0.006099 **
## V3 -4114.54 10607.69 -0.388 0.699782
## V4 -6391.76 19679.57 -0.325 0.746721
## V5 682.13 26125.90 0.026 0.979276
## V6 15662.76 23914.18 0.655 0.515559
## V7 -10351.01 15373.97 -0.673 0.503931
## V8 5590.94 8800.91 0.635 0.528209
## V9 -7763.39 7411.57 -1.047 0.300023
## V10 -12825.78 10462.96 -1.226 0.226123
## V11 32480.82 13299.03 2.442 0.018246 *
## V12 -72940.72 25580.17 -2.851 0.006353 **
## V13 94376.20 34092.12 2.768 0.007933 **
## V14 -66687.32 28814.71 -2.314 0.024883 *
## V15 36885.99 18815.44 1.960 0.055646 .
## V16 -7797.56 10756.79 -0.725 0.471963
## V17 2807.19 7442.55 0.377 0.707667
## V18 -12947.80 8226.56 -1.574 0.121945
## V19 -14817.91 10997.27 -1.347 0.184044
## V20 28589.74 20483.92 1.396 0.169093
## V21 12146.59 28920.10 0.420 0.676318
## V22 -37157.84 29882.39 -1.243 0.219611
## V23 18381.06 25479.52 0.721 0.474088
## V24 6963.18 19225.21 0.362 0.718767
## V25 -11247.72 11993.03 -0.938 0.352919
## V26 -2405.94 7869.23 -0.306 0.761097
## V27 13852.13 8530.37 1.624 0.110822
## V28 -41186.99 11839.55 -3.479 0.001067 **
## V29 72554.84 18334.39 3.957 0.000244 ***
## V30 -67604.99 24895.58 -2.716 0.009114 **
## V31 36495.98 29392.14 1.242 0.220261
## V32 -7514.10 27987.02 -0.268 0.789453
## V33 -1469.09 17930.99 -0.082 0.935036
## V34 -3508.86 12582.67 -0.279 0.781522
## V35 17777.12 10342.76 1.719 0.091964 .
## V36 -13522.36 7286.12 -1.856 0.069486 .
## V37 -2896.28 8338.25 -0.347 0.729817
## V38 11459.25 9442.44 1.214 0.230722
## V39 -22388.31 13511.56 -1.657 0.103915
## V40 12555.83 18862.85 0.666 0.508764
## V41 -6981.68 21704.78 -0.322 0.749074
## V42 21110.83 23597.21 0.895 0.375357
## V43 -18956.71 21689.00 -0.874 0.386369
## V44 13751.41 15802.81 0.870 0.388441
## V45 -18255.45 15032.32 -1.214 0.230410
## V46 13173.88 10068.43 1.308 0.196831
## V47 -3061.50 5178.83 -0.591 0.557133
## V48 -5193.77 5826.80 -0.891 0.377092
## V49 16275.24 9993.91 1.629 0.109829
## V50 -4334.99 12793.35 -0.339 0.736172
## V51 -36831.85 16532.44 -2.228 0.030513 *
## V52 56945.79 22156.71 2.570 0.013257 *
## V53 -30871.04 23727.89 -1.301 0.199328
## V54 -6543.87 18961.65 -0.345 0.731488
## V55 16300.51 13878.11 1.175 0.245852
## V56 -2824.37 9374.04 -0.301 0.764463
## V57 -3702.74 7004.87 -0.529 0.599472
## V58 -2866.39 6069.43 -0.472 0.638833
## V59 11778.33 5689.27 2.070 0.043718 *
## V60 -15810.01 5210.30 -3.034 0.003850 **
## V61 8629.20 4353.34 1.982 0.053079 .
## V62 1504.13 4928.01 0.305 0.761491
## V63 -5745.43 6759.15 -0.850 0.399447
## V64 1663.66 9607.84 0.173 0.863242
## V65 6286.71 12518.61 0.502 0.617784
## V66 -672.06 13378.17 -0.050 0.960139
## V67 -20915.61 14372.67 -1.455 0.151981
## V68 37027.59 16285.47 2.274 0.027405 *
## V69 -27061.09 13518.13 -2.002 0.050854 .
## V70 10812.22 10278.85 1.052 0.298010
## V71 -3463.24 8482.15 -0.408 0.684835
## V72 5759.88 7794.93 0.739 0.463477
## V73 -10245.38 6946.98 -1.475 0.146664
## V74 -3783.16 7446.90 -0.508 0.613720
## V75 -5813.55 7271.80 -0.799 0.427880
## V76 16067.42 7539.75 2.131 0.038129 *
## V77 -12385.59 6385.10 -1.940 0.058176 .
## V78 15584.37 7304.34 2.134 0.037909 *
## V79 -3850.68 8698.88 -0.443 0.659957
## V80 13336.82 10926.82 1.221 0.228095
## V81 -13935.10 12329.02 -1.130 0.263866
## V82 8355.12 12584.25 0.664 0.509845
## V83 -2345.20 15233.42 -0.154 0.878281
## V84 -990.95 17915.65 -0.055 0.956115
## V85 -16179.00 19006.17 -0.851 0.398772
## V86 766.28 20323.69 0.038 0.970077
## V87 6334.61 21107.76 0.300 0.765364
## V88 8291.55 19041.66 0.435 0.665153
## V89 -13748.48 15431.32 -0.891 0.377312
## V90 23472.07 17705.09 1.326 0.191079
## V91 -30523.04 19298.19 -1.582 0.120163
## V92 9211.59 18082.11 0.509 0.612737
## V93 7626.65 15785.55 0.483 0.631148
## V94 -4160.73 15535.91 -0.268 0.789967
## V95 -6427.40 12329.87 -0.521 0.604515
## V96 23107.84 11580.56 1.995 0.051575 .
## V97 -18888.23 11405.05 -1.656 0.104087
## V98 20843.81 9418.31 2.213 0.031578 *
## V99 -19535.74 9671.97 -2.020 0.048887 *
## V100 3715.75 4326.44 0.859 0.394607
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.027 on 49 degrees of freedom
## Multiple R-squared: 0.9965, Adjusted R-squared: 0.9894
## F-statistic: 140.7 on 100 and 49 DF, p-value: < 2.2e-16
summary(ols_fat_pct)
##
## Call:
## lm(formula = fat_pct ~ ., data = train_fat_pct)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.13045 -0.33450 -0.00974 0.34267 2.13637
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.834e+00 2.841e+00 1.702 0.09516 .
## V1 1.554e+04 5.389e+03 2.883 0.00583 **
## V2 -2.578e+04 9.418e+03 -2.738 0.00859 **
## V3 7.380e+03 1.327e+04 0.556 0.58076
## V4 1.363e+04 2.463e+04 0.553 0.58253
## V5 -1.136e+04 3.270e+04 -0.348 0.72968
## V6 -1.452e+04 2.993e+04 -0.485 0.62968
## V7 8.632e+03 1.924e+04 0.449 0.65565
## V8 -8.607e+02 1.101e+04 -0.078 0.93803
## V9 7.089e+03 9.275e+03 0.764 0.44836
## V10 7.855e+03 1.309e+04 0.600 0.55136
## V11 -2.830e+04 1.664e+04 -1.701 0.09537 .
## V12 7.965e+04 3.201e+04 2.488 0.01629 *
## V13 -1.050e+05 4.266e+04 -2.462 0.01739 *
## V14 7.228e+04 3.606e+04 2.004 0.05056 .
## V15 -4.002e+04 2.355e+04 -1.700 0.09551 .
## V16 1.158e+04 1.346e+04 0.860 0.39373
## V17 -3.375e+03 9.314e+03 -0.362 0.71861
## V18 1.029e+04 1.030e+04 0.999 0.32268
## V19 1.848e+04 1.376e+04 1.342 0.18564
## V20 -3.254e+04 2.563e+04 -1.270 0.21025
## V21 -1.519e+04 3.619e+04 -0.420 0.67655
## V22 4.662e+04 3.740e+04 1.247 0.21847
## V23 -2.087e+04 3.189e+04 -0.654 0.51588
## V24 -1.470e+04 2.406e+04 -0.611 0.54408
## V25 2.174e+04 1.501e+04 1.448 0.15387
## V26 -1.458e+03 9.848e+03 -0.148 0.88291
## V27 -1.914e+04 1.068e+04 -1.793 0.07914 .
## V28 4.610e+04 1.482e+04 3.112 0.00310 **
## V29 -7.955e+04 2.294e+04 -3.467 0.00111 **
## V30 8.584e+04 3.116e+04 2.755 0.00821 **
## V31 -5.645e+04 3.678e+04 -1.535 0.13128
## V32 1.777e+04 3.502e+04 0.507 0.61424
## V33 -2.888e+03 2.244e+04 -0.129 0.89813
## V34 9.301e+03 1.575e+04 0.591 0.55744
## V35 -2.385e+04 1.294e+04 -1.842 0.07147 .
## V36 1.322e+04 9.118e+03 1.450 0.15343
## V37 6.596e+03 1.043e+04 0.632 0.53025
## V38 -1.326e+04 1.182e+04 -1.122 0.26744
## V39 2.280e+04 1.691e+04 1.349 0.18368
## V40 -1.076e+04 2.361e+04 -0.456 0.65054
## V41 5.653e+03 2.716e+04 0.208 0.83601
## V42 -2.446e+04 2.953e+04 -0.828 0.41147
## V43 2.353e+04 2.714e+04 0.867 0.39023
## V44 -1.835e+04 1.978e+04 -0.928 0.35796
## V45 2.565e+04 1.881e+04 1.364 0.17895
## V46 -1.936e+04 1.260e+04 -1.536 0.13094
## V47 7.214e+03 6.481e+03 1.113 0.27109
## V48 8.194e+02 7.292e+03 0.112 0.91099
## V49 -1.108e+04 1.251e+04 -0.886 0.38010
## V50 -5.044e+02 1.601e+04 -0.032 0.97499
## V51 3.982e+04 2.069e+04 1.925 0.06009 .
## V52 -6.320e+04 2.773e+04 -2.279 0.02704 *
## V53 3.837e+04 2.969e+04 1.292 0.20231
## V54 5.108e+03 2.373e+04 0.215 0.83045
## V55 -1.769e+04 1.737e+04 -1.019 0.31329
## V56 2.556e+03 1.173e+04 0.218 0.82841
## V57 3.737e+03 8.766e+03 0.426 0.67173
## V58 2.298e+03 7.596e+03 0.303 0.76352
## V59 -1.072e+04 7.120e+03 -1.506 0.13840
## V60 1.852e+04 6.520e+03 2.840 0.00655 **
## V61 -1.330e+04 5.448e+03 -2.442 0.01825 *
## V62 4.939e+03 6.167e+03 0.801 0.42710
## V63 4.282e+03 8.459e+03 0.506 0.61493
## V64 -7.284e+03 1.202e+04 -0.606 0.54744
## V65 -5.329e+03 1.567e+04 -0.340 0.73518
## V66 7.143e+03 1.674e+04 0.427 0.67149
## V67 8.568e+03 1.799e+04 0.476 0.63594
## V68 -2.201e+04 2.038e+04 -1.080 0.28544
## V69 1.292e+04 1.692e+04 0.764 0.44870
## V70 7.238e+02 1.286e+04 0.056 0.95536
## V71 -3.565e+03 1.061e+04 -0.336 0.73840
## V72 -4.926e+03 9.755e+03 -0.505 0.61582
## V73 1.217e+04 8.694e+03 1.399 0.16799
## V74 1.556e+03 9.319e+03 0.167 0.86812
## V75 4.538e+03 9.100e+03 0.499 0.62027
## V76 -1.418e+04 9.436e+03 -1.503 0.13932
## V77 1.258e+04 7.991e+03 1.575 0.12177
## V78 -1.495e+04 9.141e+03 -1.635 0.10840
## V79 4.590e+03 1.089e+04 0.422 0.67512
## V80 -1.035e+04 1.367e+04 -0.757 0.45288
## V81 1.073e+04 1.543e+04 0.696 0.48988
## V82 -4.535e+03 1.575e+04 -0.288 0.77461
## V83 -3.900e+02 1.906e+04 -0.020 0.98376
## V84 -5.459e+03 2.242e+04 -0.243 0.80865
## V85 2.733e+04 2.379e+04 1.149 0.25610
## V86 -4.775e+03 2.543e+04 -0.188 0.85186
## V87 -1.930e+04 2.642e+04 -0.730 0.46859
## V88 1.001e+04 2.383e+04 0.420 0.67627
## V89 7.872e+03 1.931e+04 0.408 0.68533
## V90 -2.882e+04 2.216e+04 -1.301 0.19948
## V91 2.727e+04 2.415e+04 1.129 0.26441
## V92 2.244e+03 2.263e+04 0.099 0.92142
## V93 -1.642e+04 1.975e+04 -0.831 0.40979
## V94 1.592e+04 1.944e+04 0.819 0.41684
## V95 -6.812e+03 1.543e+04 -0.442 0.66079
## V96 -1.147e+04 1.449e+04 -0.791 0.43269
## V97 1.667e+04 1.427e+04 1.168 0.24841
## V98 -2.824e+04 1.179e+04 -2.396 0.02046 *
## V99 2.222e+04 1.210e+04 1.836 0.07245 .
## V100 -2.550e+03 5.414e+03 -0.471 0.63981
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.286 on 49 degrees of freedom
## Multiple R-squared: 0.9968, Adjusted R-squared: 0.9901
## F-statistic: 150.4 on 100 and 49 DF, p-value: < 2.2e-16
summary(ols_protein_pct)
##
## Call:
## lm(formula = protein_pct ~ ., data = train_protein_pct)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41374 -0.12675 -0.02425 0.08987 0.50477
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.033e+01 7.245e-01 28.065 < 2e-16 ***
## V1 -4.602e+03 1.374e+03 -3.349 0.001568 **
## V2 7.438e+03 2.402e+03 3.097 0.003232 **
## V3 1.609e+03 3.385e+03 0.475 0.636707
## V4 -1.364e+04 6.281e+03 -2.171 0.034779 *
## V5 1.600e+04 8.338e+03 1.919 0.060824 .
## V6 -5.690e+03 7.632e+03 -0.746 0.459505
## V7 3.210e+02 4.907e+03 0.065 0.948109
## V8 -2.307e+03 2.809e+03 -0.821 0.415531
## V9 -2.588e+03 2.365e+03 -1.094 0.279336
## V10 7.192e+03 3.339e+03 2.154 0.036216 *
## V11 -3.461e+02 4.244e+03 -0.082 0.935340
## V12 -1.986e+04 8.164e+03 -2.433 0.018663 *
## V13 3.565e+04 1.088e+04 3.276 0.001935 **
## V14 -2.849e+04 9.196e+03 -3.098 0.003218 **
## V15 1.716e+04 6.005e+03 2.858 0.006249 **
## V16 -9.428e+03 3.433e+03 -2.746 0.008408 **
## V17 2.486e+03 2.375e+03 1.047 0.300330
## V18 -3.517e+03 2.626e+03 -1.340 0.186567
## V19 -5.032e+03 3.510e+03 -1.434 0.157982
## V20 9.382e+03 6.538e+03 1.435 0.157590
## V21 3.569e+03 9.230e+03 0.387 0.700635
## V22 -1.126e+04 9.537e+03 -1.181 0.243495
## V23 6.673e+03 8.132e+03 0.821 0.415831
## V24 6.186e+02 6.136e+03 0.101 0.920111
## V25 -3.107e+03 3.828e+03 -0.812 0.420881
## V26 7.881e+02 2.511e+03 0.314 0.755001
## V27 2.637e+03 2.723e+03 0.968 0.337557
## V28 -1.102e+04 3.779e+03 -2.916 0.005338 **
## V29 2.418e+04 5.851e+03 4.132 0.000140 ***
## V30 -2.385e+04 7.946e+03 -3.001 0.004219 **
## V31 1.438e+04 9.381e+03 1.533 0.131676
## V32 -6.165e+03 8.932e+03 -0.690 0.493328
## V33 3.257e+03 5.723e+03 0.569 0.571872
## V34 -5.210e+03 4.016e+03 -1.297 0.200585
## V35 5.140e+03 3.301e+03 1.557 0.125893
## V36 -8.485e+02 2.325e+03 -0.365 0.716769
## V37 -4.649e+03 2.661e+03 -1.747 0.086899 .
## V38 3.616e+03 3.014e+03 1.200 0.235890
## V39 2.401e+03 4.312e+03 0.557 0.580240
## V40 -7.018e+03 6.020e+03 -1.166 0.249353
## V41 5.901e+02 6.927e+03 0.085 0.932458
## V42 8.864e+03 7.531e+03 1.177 0.244880
## V43 -1.474e+03 6.922e+03 -0.213 0.832301
## V44 -1.031e+04 5.044e+03 -2.044 0.046316 *
## V45 8.201e+03 4.798e+03 1.709 0.093711 .
## V46 -1.481e+02 3.213e+03 -0.046 0.963437
## V47 -2.224e+03 1.653e+03 -1.346 0.184637
## V48 -2.453e+03 1.860e+03 -1.319 0.193316
## V49 5.768e+03 3.190e+03 1.808 0.076669 .
## V50 -4.037e+01 4.083e+03 -0.010 0.992150
## V51 -1.080e+04 5.276e+03 -2.047 0.046049 *
## V52 1.794e+04 7.071e+03 2.537 0.014406 *
## V53 -1.516e+04 7.573e+03 -2.002 0.050882 .
## V54 5.612e+03 6.052e+03 0.927 0.358275
## V55 3.247e+02 4.429e+03 0.073 0.941867
## V56 -1.860e+02 2.992e+03 -0.062 0.950689
## V57 -1.015e+02 2.236e+03 -0.045 0.963990
## V58 -1.162e+03 1.937e+03 -0.600 0.551439
## V59 1.707e+03 1.816e+03 0.940 0.351754
## V60 -4.068e+03 1.663e+03 -2.446 0.018073 *
## V61 5.058e+03 1.389e+03 3.640 0.000655 ***
## V62 -1.535e+03 1.573e+03 -0.976 0.333724
## V63 -5.797e+03 2.157e+03 -2.687 0.009816 **
## V64 1.291e+04 3.066e+03 4.209 0.000109 ***
## V65 -8.561e+03 3.995e+03 -2.143 0.037125 *
## V66 2.099e+03 4.270e+03 0.492 0.625211
## V67 -5.157e+03 4.587e+03 -1.124 0.266430
## V68 1.254e+04 5.198e+03 2.413 0.019627 *
## V69 -1.094e+04 4.314e+03 -2.536 0.014437 *
## V70 2.699e+03 3.281e+03 0.823 0.414714
## V71 4.588e+02 2.707e+03 0.169 0.866119
## V72 -1.382e+03 2.488e+03 -0.555 0.581082
## V73 -1.555e+03 2.217e+03 -0.701 0.486524
## V74 6.233e+03 2.377e+03 2.623 0.011592 *
## V75 -8.237e+03 2.321e+03 -3.549 0.000864 ***
## V76 1.871e+03 2.406e+03 0.778 0.440485
## V77 2.678e+03 2.038e+03 1.314 0.194976
## V78 1.400e+03 2.331e+03 0.601 0.550813
## V79 -1.534e+03 2.776e+03 -0.553 0.583048
## V80 2.944e+03 3.487e+03 0.844 0.402585
## V81 -2.657e+03 3.935e+03 -0.675 0.502773
## V82 1.507e+03 4.016e+03 0.375 0.709149
## V83 -1.177e+04 4.862e+03 -2.421 0.019219 *
## V84 2.250e+04 5.718e+03 3.936 0.000262 ***
## V85 -2.002e+04 6.066e+03 -3.300 0.001808 **
## V86 7.967e+03 6.486e+03 1.228 0.225238
## V87 -8.966e+02 6.737e+03 -0.133 0.894666
## V88 4.248e+02 6.077e+03 0.070 0.944555
## V89 -5.318e+03 4.925e+03 -1.080 0.285543
## V90 1.362e+04 5.651e+03 2.411 0.019713 *
## V91 -7.887e+03 6.159e+03 -1.280 0.206410
## V92 -3.151e+03 5.771e+03 -0.546 0.587510
## V93 1.310e+03 5.038e+03 0.260 0.795898
## V94 2.882e+03 4.958e+03 0.581 0.563686
## V95 -9.439e+02 3.935e+03 -0.240 0.811427
## V96 -2.727e+03 3.696e+03 -0.738 0.464106
## V97 1.620e+03 3.640e+03 0.445 0.658288
## V98 6.960e+03 3.006e+03 2.315 0.024820 *
## V99 -9.354e+03 3.087e+03 -3.030 0.003895 **
## V100 2.983e+03 1.381e+03 2.160 0.035663 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3279 on 49 degrees of freedom
## Multiple R-squared: 0.9962, Adjusted R-squared: 0.9885
## F-statistic: 129.3 on 100 and 49 DF, p-value: < 2.2e-16
predict_water_pct <- predict(ols_water_pct,test_predictors)
predict_fat_pct <- predict(ols_fat_pct,test_predictors)
predict_protein_pct <- predict(ols_protein_pct,test_predictors)
water_pct_df <- data.frame(pred=predict_water_pct, obs=test_set$water_pct)
fat_pct_df <- data.frame(pred=predict_fat_pct, obs=test_set$fat_pct)
protein_pct_df <- data.frame(pred=predict_protein_pct, obs=test_set$protein_pct)
defaultSummary(water_pct_df)
## RMSE Rsquared MAE
## 5.5817925 0.8038929 2.4307695
defaultSummary(fat_pct_df)
## RMSE Rsquared MAE
## 5.8725434 0.8397015 2.8190720
defaultSummary(protein_pct_df)
## RMSE Rsquared MAE
## 2.5283355 0.6393083 1.2180471
##Robust Least Regression
rlm_water_pct <- rlm(water_pct ~ ., data=train_water_pct)
## Warning in rlm.default(x, y, weights, method = method, wt.method = wt.method, :
## 'rlm' failed to converge in 20 steps
rlm_fat_pct <- rlm(fat_pct ~ ., data=train_fat_pct)
## Warning in rlm.default(x, y, weights, method = method, wt.method = wt.method, :
## 'rlm' failed to converge in 20 steps
rlm_protein_pct <- rlm(protein_pct ~ ., data=train_protein_pct)
## Warning in rlm.default(x, y, weights, method = method, wt.method = wt.method, :
## 'rlm' failed to converge in 20 steps
summary(rlm_water_pct)
##
## Call: rlm(formula = water_pct ~ ., data = train_water_pct)
## Residuals:
## Min 1Q Median 3Q Max
## -3.774939 -0.079509 -0.001079 0.086589 6.123185
##
## Coefficients:
## Value Std. Error t value
## (Intercept) 75.6648 0.7140 105.9689
## V1 -16651.5107 1354.5261 -12.2932
## V2 19756.9026 2367.1049 8.3464
## V3 -726.7800 3336.6709 -0.2178
## V4 4904.1431 6190.2499 0.7922
## V5 -12431.4383 8217.9575 -1.5127
## V6 20758.9998 7522.2560 2.7597
## V7 -3690.2476 4835.9137 -0.7631
## V8 -2134.6793 2768.3455 -0.7711
## V9 -16673.2031 2331.3251 -7.1518
## V10 -11454.8300 3291.1455 -3.4805
## V11 42967.3078 4183.2375 10.2713
## V12 -88001.1266 8046.2961 -10.9368
## V13 107145.8752 10723.7506 9.9915
## V14 -59972.1323 9063.7284 -6.6167
## V15 25594.6041 5918.4384 4.3246
## V16 -3845.7567 3383.5721 -1.1366
## V17 729.4902 2341.0689 0.3116
## V18 -9512.3917 2587.6804 -3.6760
## V19 -24528.2533 3459.2137 -7.0907
## V20 35082.0208 6443.2621 5.4448
## V21 22744.1395 9096.8787 2.5002
## V22 -58520.6402 9399.5685 -6.2259
## V23 36849.2967 8014.6372 4.5977
## V24 353.4321 6047.3315 0.0584
## V25 -14509.8651 3772.4321 -3.8463
## V26 -1884.7257 2475.2827 -0.7614
## V27 18608.3643 2683.2474 6.9350
## V28 -41468.0331 3724.1545 -11.1349
## V29 67040.7343 5767.1212 11.6246
## V30 -57496.9019 7830.9575 -7.3423
## V31 25448.6859 9245.3612 2.7526
## V32 -6115.0199 8803.3754 -0.6946
## V33 7509.9638 5640.2307 1.3315
## V34 -10184.6700 3957.9050 -2.5732
## V35 15082.1291 3253.3363 4.6359
## V36 -12130.1950 2291.8645 -5.2927
## V37 -1071.2293 2622.8134 -0.4084
## V38 13652.1068 2970.1397 4.5965
## V39 -30746.2341 4250.0906 -7.2343
## V40 24637.4638 5933.3502 4.1524
## V41 -12273.1120 6827.2853 -1.7977
## V42 20017.0059 7422.5527 2.6968
## V43 -23416.7860 6822.3226 -3.4324
## V44 25951.7895 4970.8071 5.2208
## V45 -29706.1059 4728.4470 -6.2824
## V46 15626.1195 3167.0471 4.9340
## V47 -676.3698 1629.0133 -0.4152
## V48 -4362.0040 1832.8316 -2.3799
## V49 16733.9165 3143.6071 5.3232
## V50 -13288.9255 4024.1753 -3.3023
## V51 -23401.9816 5200.3155 -4.5001
## V52 50249.3785 6969.4389 7.2100
## V53 -30881.5939 7463.6590 -4.1376
## V54 -11813.8473 5964.4263 -1.9807
## V55 28093.8811 4365.3879 6.4356
## V56 -10756.7072 2948.6233 -3.6480
## V57 -2562.2507 2203.3976 -1.1629
## V58 -629.0949 1909.1529 -0.3295
## V59 5963.1517 1789.5718 3.3322
## V60 -8619.6660 1638.9094 -5.2594
## V61 3905.7132 1369.3534 2.8522
## V62 2421.5447 1550.1155 1.5622
## V63 2856.0685 2126.1034 1.3433
## V64 -11180.5338 3022.1654 -3.6995
## V65 13796.1819 3937.7543 3.5036
## V66 -8032.5367 4208.1306 -1.9088
## V67 -10653.3489 4520.9534 -2.3564
## V68 25665.8333 5122.6286 5.0103
## V69 -20031.0515 4252.1558 -4.7108
## V70 7389.0990 3233.2330 2.2854
## V71 129.7586 2668.0797 0.0486
## V72 807.4423 2451.9102 0.3293
## V73 -3223.3206 2185.1885 -1.4751
## V74 -12915.3837 2342.4388 -5.5136
## V75 3920.3769 2287.3608 1.7139
## V76 16147.6912 2371.6436 6.8086
## V77 -12840.5349 2008.4477 -6.3933
## V78 10918.3146 2297.5941 4.7521
## V79 966.8102 2736.2514 0.3533
## V80 6950.4730 3437.0558 2.0222
## V81 -16038.6069 3878.1207 -4.1357
## V82 13602.9420 3958.4040 3.4365
## V83 -1430.9818 4791.7061 -0.2986
## V84 3034.1630 5635.4056 0.5384
## V85 -34168.4026 5978.4310 -5.7153
## V86 19795.1822 6392.8584 3.0965
## V87 2893.4291 6639.4922 0.4358
## V88 -1680.0729 5989.5961 -0.2805
## V89 -3918.0770 4853.9541 -0.8072
## V90 27421.4799 5569.1745 4.9238
## V91 -48216.9897 6070.2866 -7.9431
## V92 24232.9683 5687.7668 4.2605
## V93 4757.1261 4965.3790 0.9581
## V94 -3029.1162 4886.8525 -0.6199
## V95 -17244.1026 3878.3865 -4.4462
## V96 35567.9132 3642.6885 9.7642
## V97 -22523.9209 3587.4830 -6.2785
## V98 16070.5547 2962.5490 5.4246
## V99 -10297.0300 3042.3401 -3.3846
## V100 -1194.0365 1360.8910 -0.8774
##
## Residual standard error: 0.1295 on 49 degrees of freedom
summary(rlm_fat_pct)
##
## Call: rlm(formula = fat_pct ~ ., data = train_fat_pct)
## Residuals:
## Min 1Q Median 3Q Max
## -5.488239 -0.098107 0.003275 0.085600 3.900130
##
## Coefficients:
## Value Std. Error t value
## (Intercept) 2.7564 0.7938 3.4723
## V1 15203.6026 1505.9377 10.0958
## V2 -20735.7829 2631.7045 -7.8792
## V3 6728.7757 3709.6504 1.8139
## V4 -13276.6783 6882.2081 -1.9291
## V5 30788.4999 9136.5767 3.3698
## V6 -42734.4216 8363.1083 -5.1099
## V7 13052.0725 5376.4815 2.4276
## V8 1457.8086 3077.7965 0.4737
## V9 19554.5409 2591.9252 7.5444
## V10 899.0792 3659.0361 0.2457
## V11 -35881.7235 4650.8479 -7.7151
## V12 80985.6648 8945.7267 9.0530
## V13 -85437.9752 11922.4723 -7.1661
## V14 36065.9392 10076.8895 3.5791
## V15 -10001.0450 6580.0130 -1.5199
## V16 -1833.2999 3761.7944 -0.4873
## V17 963.5837 2602.7581 0.3702
## V18 6820.9910 2876.9364 2.3709
## V19 28210.6242 3845.8913 7.3353
## V20 -55889.6336 7163.5024 -7.8020
## V21 10163.3575 10113.7454 1.0049
## V22 36869.0143 10450.2705 3.5280
## V23 -18419.2292 8910.5288 -2.0671
## V24 -26544.5633 6723.3139 -3.9481
## V25 34845.6683 4194.1219 8.3082
## V26 -3872.4570 2751.9746 -1.4072
## V27 -15056.1707 2983.1859 -5.0470
## V28 32484.3155 4140.4477 7.8456
## V29 -65592.0433 6411.7812 -10.2299
## V30 77470.8564 8706.3171 8.8982
## V31 -63380.0748 10278.8256 -6.1661
## V32 43129.3115 9787.4338 4.4066
## V33 -26615.1388 6270.7066 -4.2444
## V34 15818.6065 4400.3273 3.5949
## V35 -18631.7301 3617.0006 -5.1512
## V36 15630.2568 2548.0536 6.1342
## V37 -6808.5657 2915.9966 -2.3349
## V38 -6496.4275 3302.1477 -1.9673
## V39 27915.9003 4725.1740 5.9079
## V40 -17197.0519 6596.5915 -2.6070
## V41 19.5110 7590.4525 0.0026
## V42 -7103.8709 8252.2600 -0.8608
## V43 12969.0095 7584.9350 1.7098
## V44 -24986.0932 5526.4536 -4.5212
## V45 37176.7876 5257.0020 7.0719
## V46 -20336.5264 3521.0658 -5.7757
## V47 827.0865 1811.1076 0.4567
## V48 3419.6397 2037.7091 1.6782
## V49 -8705.3660 3495.0056 -2.4908
## V50 -2142.2212 4474.0054 -0.4788
## V51 33706.4861 5781.6168 5.8299
## V52 -47837.2928 7748.4963 -6.1738
## V53 19056.8363 8297.9613 2.2966
## V54 25433.1891 6631.1415 3.8354
## V55 -35400.5797 4853.3595 -7.2940
## V56 10736.4548 3278.2261 3.2751
## V57 4478.0854 2449.6977 1.8280
## V58 -1063.9639 2122.5617 -0.5013
## V59 -6958.1297 1989.6136 -3.4972
## V60 15987.1013 1822.1099 8.7739
## V61 -12964.1987 1522.4224 -8.5155
## V62 3880.4975 1723.3904 2.2517
## V63 -54.1518 2363.7634 -0.0229
## V64 5094.9229 3359.9889 1.5164
## V65 -20817.7327 4377.9241 -4.7552
## V66 24897.5140 4678.5236 5.3217
## V67 -5261.3470 5026.3143 -1.0468
## V68 -11356.0222 5695.2459 -1.9939
## V69 10494.5444 4727.4700 2.2199
## V70 -4159.1651 3594.6500 -1.1570
## V71 -6530.9047 2966.3228 -2.2017
## V72 2753.2320 2725.9895 1.0100
## V73 5386.8897 2429.4531 2.2173
## V74 10296.0721 2604.2812 3.9535
## V75 -4862.4451 2543.0464 -1.9121
## V76 -14517.4743 2636.7505 -5.5058
## V77 12811.2136 2232.9559 5.7373
## V78 -6400.9430 2554.4236 -2.5058
## V79 -2992.1594 3042.1149 -0.9836
## V80 -3203.9074 3821.2566 -0.8384
## V81 11934.0844 4311.6246 2.7679
## V82 -12155.7105 4400.8821 -2.7621
## V83 -1511.3247 5327.3323 -0.2837
## V84 -1539.0641 6265.3422 -0.2456
## V85 29000.7190 6646.7116 4.3632
## V86 -3026.2618 7107.4645 -0.4258
## V87 -28669.0795 7381.6675 -3.8838
## V88 18057.3838 6659.1248 2.7117
## V89 4312.9308 5396.5384 0.7992
## V90 -27267.7762 6191.7076 -4.4039
## V91 27508.5439 6748.8350 4.0760
## V92 3363.2831 6323.5564 0.5319
## V93 -22382.7984 5520.4187 -4.0545
## V94 17700.6674 5433.1144 3.2579
## V95 4866.7088 4311.9201 1.1287
## V96 -21861.9836 4049.8752 -5.3982
## V97 17159.9242 3988.4988 4.3024
## V98 -26122.5240 3293.7084 -7.9310
## V99 16755.2238 3382.4187 4.9536
## V100 1458.1694 1513.0140 0.9638
##
## Residual standard error: 0.145 on 49 degrees of freedom
summary(rlm_protein_pct)
##
## Call: rlm(formula = protein_pct ~ ., data = train_protein_pct)
## Residuals:
## Min 1Q Median 3Q Max
## -0.6527513 -0.0194684 0.0002333 0.0187142 0.9909981
##
## Coefficients:
## Value Std. Error t value
## (Intercept) 19.9637 0.1756 113.6592
## V1 -3572.2509 333.2018 -10.7210
## V2 6377.5864 582.2875 10.9526
## V3 4308.6788 820.7924 5.2494
## V4 -19430.6855 1522.7484 -12.7603
## V5 19499.4585 2021.5471 9.6458
## V6 -7424.4566 1850.4105 -4.0123
## V7 455.8161 1189.5933 0.3832
## V8 -609.9403 680.9893 -0.8957
## V9 -3961.2217 573.4860 -6.9073
## V10 10268.0924 809.5936 12.6830
## V11 -4043.6133 1029.0406 -3.9295
## V12 -15639.9764 1979.3199 -7.9017
## V13 29781.4118 2637.9508 11.2896
## V14 -21959.1050 2229.5995 -9.8489
## V15 13481.8603 1455.8851 9.2602
## V16 -9706.1418 832.3297 -11.6614
## V17 3830.8935 575.8829 6.6522
## V18 -3835.5255 636.5472 -6.0255
## V19 -7831.0838 850.9369 -9.2029
## V20 12170.1981 1584.9873 7.6784
## V21 3100.6753 2237.7542 1.3856
## V22 -11173.8829 2312.2133 -4.8325
## V23 5584.5095 1971.5321 2.8326
## V24 2531.6748 1487.5917 1.7019
## V25 -2628.5651 927.9860 -2.8325
## V26 -1725.4393 608.8983 -2.8337
## V27 3112.1588 660.0559 4.7150
## V28 -9890.3379 916.1101 -10.7960
## V29 23059.1968 1418.6624 16.2542
## V30 -22657.6384 1926.3484 -11.7620
## V31 15318.3099 2274.2796 6.7355
## V32 -7869.8492 2165.5549 -3.6341
## V33 2657.0031 1387.4484 1.9150
## V34 -3053.3140 973.6107 -3.1361
## V35 2569.6212 800.2929 3.2109
## V36 1471.1660 563.7790 2.6095
## V37 -6834.0072 645.1896 -10.5922
## V38 4828.2375 730.6289 6.6083
## V39 3409.0156 1045.4859 3.2607
## V40 -8020.5628 1459.5533 -5.4952
## V41 -2957.1634 1679.4537 -1.7608
## V42 16874.9460 1825.8843 9.2421
## V43 -8324.0999 1678.2329 -4.9600
## V44 -8264.5309 1222.7760 -6.7588
## V45 9190.5033 1163.1574 7.9013
## V46 -879.2156 779.0665 -1.1286
## V47 -2414.6639 400.7233 -6.0258
## V48 -2197.4363 450.8609 -4.8739
## V49 7126.0130 773.3004 9.2151
## V50 -3486.1909 989.9126 -3.5217
## V51 -7819.6328 1279.2330 -6.1128
## V52 16553.1819 1714.4222 9.6553
## V53 -14375.6972 1835.9962 -7.8299
## V54 5246.9341 1467.1978 3.5762
## V55 8.4113 1073.8480 0.0078
## V56 379.2141 725.3361 0.5228
## V57 -349.0652 542.0169 -0.6440
## V58 -1169.4001 469.6353 -2.4900
## V59 1708.6936 440.2193 3.8815
## V60 -4149.8248 403.1577 -10.2933
## V61 4977.9459 336.8492 14.7780
## V62 -1190.2825 381.3151 -3.1215
## V63 -6131.3943 523.0032 -11.7234
## V64 12931.0782 743.4268 17.3939
## V65 -6409.6773 968.6538 -6.6171
## V66 -1073.8322 1035.1641 -1.0374
## V67 -4361.0838 1112.1158 -3.9214
## V68 14452.7167 1260.1227 11.4693
## V69 -14114.4518 1045.9939 -13.4938
## V70 4071.6629 795.3476 5.1194
## V71 1217.9592 656.3247 1.8557
## V72 -1891.0509 603.1489 -3.1353
## V73 -2753.8079 537.5376 -5.1230
## V74 8342.2325 576.2199 14.4775
## V75 -10054.3615 562.6712 -17.8690
## V76 1931.6477 583.4040 3.3110
## V77 5012.9223 494.0609 10.1464
## V78 -100.4332 565.1885 -0.1777
## V79 -1100.8705 673.0944 -1.6355
## V80 3150.6323 845.4863 3.7264
## V81 -3999.0107 953.9845 -4.1919
## V82 4258.3624 973.7335 4.3732
## V83 -14564.5429 1178.7186 -12.3563
## V84 23020.1281 1386.2615 16.6059
## V85 -18515.9322 1470.6428 -12.5904
## V86 6176.9896 1572.5884 3.9279
## V87 -3474.4340 1633.2582 -2.1273
## V88 7095.9956 1473.3893 4.8161
## V89 -10033.8669 1194.0311 -8.4034
## V90 12835.1409 1369.9692 9.3689
## V91 -5268.5839 1493.2385 -3.5283
## V92 -3855.2170 1399.1419 -2.7554
## V93 483.7073 1221.4407 0.3960
## V94 5094.5789 1202.1238 4.2380
## V95 -3363.8981 954.0498 -3.5259
## V96 -1242.5713 896.0701 -1.3867
## V97 2042.5253 882.4901 2.3145
## V98 6111.3313 728.7617 8.3859
## V99 -9401.6209 748.3896 -12.5625
## V100 3043.3156 334.7675 9.0908
##
## Residual standard error: 0.0296 on 49 degrees of freedom
predict_water_pct <- predict(rlm_water_pct,test_predictors)
predict_fat_pct <- predict(rlm_fat_pct,test_predictors)
predict_protein_pct <- predict(rlm_protein_pct,test_predictors)
water_pct_df <- data.frame(pred=predict_water_pct, obs=test_set$water_pct)
fat_pct_df <- data.frame(pred=predict_fat_pct, obs=test_set$fat_pct)
protein_pct_df <- data.frame(pred=predict_protein_pct, obs=test_set$protein_pct)
defaultSummary(water_pct_df)
## RMSE Rsquared MAE
## 6.4078059 0.7595091 2.9757493
defaultSummary(fat_pct_df)
## RMSE Rsquared MAE
## 6.0493218 0.8249204 3.1560538
defaultSummary(protein_pct_df)
## RMSE Rsquared MAE
## 2.589344 0.628458 1.352753
##Partial Least Squares
pls_water_pct <- plsr(water_pct ~ ., data=train_water_pct)
pls_fat_pct <- plsr(fat_pct ~ ., data=train_fat_pct)
pls_protein_pct <- plsr(protein_pct ~ ., data=train_protein_pct)
summary(pls_water_pct)
## Data: X dimension: 150 100
## Y dimension: 150 1
## Fit method: kernelpls
## Number of components considered: 100
## TRAINING: % variance explained
## 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps 7 comps
## X 98.58 98.97 99.77 99.99 100.00 100.00 100.0
## water_pct 17.55 78.57 84.45 89.68 93.07 93.63 93.9
## 8 comps 9 comps 10 comps 11 comps 12 comps 13 comps 14 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00 100.00
## water_pct 94.27 95.37 95.55 95.61 95.95 96.46 97.26
## 15 comps 16 comps 17 comps 18 comps 19 comps 20 comps 21 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00 100.00
## water_pct 97.51 97.95 98.22 98.41 98.55 98.67 98.73
## 22 comps 23 comps 24 comps 25 comps 26 comps 27 comps 28 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00 100.00
## water_pct 98.83 98.94 99.01 99.07 99.11 99.14 99.19
## 29 comps 30 comps 31 comps 32 comps 33 comps 34 comps 35 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00 100.0
## water_pct 99.23 99.27 99.31 99.33 99.35 99.37 99.4
## 36 comps 37 comps 38 comps 39 comps 40 comps 41 comps 42 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00 100.00
## water_pct 99.42 99.44 99.46 99.49 99.51 99.52 99.54
## 43 comps 44 comps 45 comps 46 comps 47 comps 48 comps 49 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00 100.0
## water_pct 99.55 99.56 99.57 99.58 99.59 99.59 99.6
## 50 comps 51 comps 52 comps 53 comps 54 comps 55 comps 56 comps
## X 100.0 100.00 100.00 100.00 100.00 100.00 100.00
## water_pct 99.6 99.61 99.61 99.62 99.62 99.63 99.63
## 57 comps 58 comps 59 comps 60 comps 61 comps 62 comps 63 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00 100.00
## water_pct 99.63 99.64 99.64 99.64 99.64 99.64 99.65
## 64 comps 65 comps 66 comps 67 comps 68 comps 69 comps 70 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00 100.00
## water_pct 99.65 99.65 99.65 99.65 99.65 99.65 99.65
## 71 comps 72 comps 73 comps 74 comps 75 comps 76 comps 77 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00 100.00
## water_pct 99.65 99.65 99.65 99.65 99.65 99.65 99.65
## 78 comps 79 comps 80 comps 81 comps 82 comps 83 comps 84 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00 100.00
## water_pct 99.65 99.65 99.65 99.65 99.65 99.65 99.65
## 85 comps 86 comps 87 comps 88 comps 89 comps 90 comps 91 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00 100.00
## water_pct 99.65 99.65 99.65 99.65 99.65 99.65 99.65
## 92 comps 93 comps 94 comps 95 comps 96 comps 97 comps 98 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00 100.00
## water_pct 99.65 99.65 99.65 99.65 99.65 99.65 99.65
## 99 comps 100 comps
## X 100.00 100.00
## water_pct 99.65 99.65
summary(pls_fat_pct)
## Data: X dimension: 150 100
## Y dimension: 150 1
## Fit method: kernelpls
## Number of components considered: 100
## TRAINING: % variance explained
## 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps 7 comps 8 comps
## X 98.58 98.97 99.76 99.99 100.00 100.00 100.00 100.00
## fat_pct 14.56 76.75 83.97 90.20 94.71 95.23 95.42 95.77
## 9 comps 10 comps 11 comps 12 comps 13 comps 14 comps 15 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00 100.00
## fat_pct 95.93 96.26 96.36 97.43 97.66 97.84 97.95
## 16 comps 17 comps 18 comps 19 comps 20 comps 21 comps 22 comps
## X 100.0 100.00 100.00 100.00 100.00 100.00 100.00
## fat_pct 98.2 98.49 98.64 98.75 98.84 98.91 99.01
## 23 comps 24 comps 25 comps 26 comps 27 comps 28 comps 29 comps
## X 100.00 100.00 100.0 100.00 100.00 100.0 100.00
## fat_pct 99.09 99.17 99.2 99.24 99.27 99.3 99.34
## 30 comps 31 comps 32 comps 33 comps 34 comps 35 comps 36 comps
## X 100.00 100.0 100.00 100.00 100.00 100.00 100.0
## fat_pct 99.37 99.4 99.43 99.44 99.46 99.48 99.5
## 37 comps 38 comps 39 comps 40 comps 41 comps 42 comps 43 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00 100.0
## fat_pct 99.52 99.53 99.55 99.56 99.57 99.59 99.6
## 44 comps 45 comps 46 comps 47 comps 48 comps 49 comps 50 comps
## X 100.0 100.00 100.00 100.00 100.00 100.00 100.00
## fat_pct 99.6 99.61 99.62 99.63 99.63 99.63 99.64
## 51 comps 52 comps 53 comps 54 comps 55 comps 56 comps 57 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00 100.00
## fat_pct 99.64 99.64 99.65 99.65 99.65 99.66 99.66
## 58 comps 59 comps 60 comps 61 comps 62 comps 63 comps 64 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00 100.00
## fat_pct 99.66 99.66 99.67 99.67 99.67 99.67 99.67
## 65 comps 66 comps 67 comps 68 comps 69 comps 70 comps 71 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00 100.00
## fat_pct 99.67 99.67 99.67 99.67 99.67 99.67 99.67
## 72 comps 73 comps 74 comps 75 comps 76 comps 77 comps 78 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00 100.00
## fat_pct 99.67 99.68 99.68 99.68 99.68 99.68 99.68
## 79 comps 80 comps 81 comps 82 comps 83 comps 84 comps 85 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00 100.00
## fat_pct 99.68 99.68 99.68 99.68 99.68 99.68 99.68
## 86 comps 87 comps 88 comps 89 comps 90 comps 91 comps 92 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00 100.00
## fat_pct 99.68 99.68 99.68 99.68 99.68 99.68 99.68
## 93 comps 94 comps 95 comps 96 comps 97 comps 98 comps 99 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00 100.00
## fat_pct 99.68 99.68 99.68 99.68 99.68 99.68 99.68
## 100 comps
## X 100.00
## fat_pct 99.68
summary(pls_protein_pct)
## Data: X dimension: 150 100
## Y dimension: 150 1
## Fit method: kernelpls
## Number of components considered: 100
## TRAINING: % variance explained
## 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps 7 comps
## X 98.557 99.29 99.80 99.99 100.00 100.00 100.00
## protein_pct 4.948 46.20 64.43 70.92 85.84 88.09 89.66
## 8 comps 9 comps 10 comps 11 comps 12 comps 13 comps 14 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00 100.00
## protein_pct 91.56 92.02 93.35 95.24 96.03 96.21 96.55
## 15 comps 16 comps 17 comps 18 comps 19 comps 20 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00
## protein_pct 97.02 97.27 97.52 97.65 97.87 97.96
## 21 comps 22 comps 23 comps 24 comps 25 comps 26 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00
## protein_pct 98.04 98.15 98.33 98.45 98.51 98.61
## 27 comps 28 comps 29 comps 30 comps 31 comps 32 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00
## protein_pct 98.72 98.88 98.96 99.01 99.06 99.12
## 33 comps 34 comps 35 comps 36 comps 37 comps 38 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00
## protein_pct 99.17 99.22 99.25 99.28 99.32 99.36
## 39 comps 40 comps 41 comps 42 comps 43 comps 44 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00
## protein_pct 99.38 99.41 99.43 99.44 99.46 99.47
## 45 comps 46 comps 47 comps 48 comps 49 comps 50 comps
## X 100.00 100.0 100.00 100.00 100.00 100.00
## protein_pct 99.48 99.5 99.51 99.52 99.53 99.54
## 51 comps 52 comps 53 comps 54 comps 55 comps 56 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00
## protein_pct 99.54 99.55 99.56 99.56 99.57 99.58
## 57 comps 58 comps 59 comps 60 comps 61 comps 62 comps
## X 100.00 100.00 100.00 100.0 100.0 100.00
## protein_pct 99.58 99.59 99.59 99.6 99.6 99.61
## 63 comps 64 comps 65 comps 66 comps 67 comps 68 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00
## protein_pct 99.61 99.62 99.62 99.62 99.62 99.62
## 69 comps 70 comps 71 comps 72 comps 73 comps 74 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00
## protein_pct 99.62 99.62 99.62 99.62 99.62 99.62
## 75 comps 76 comps 77 comps 78 comps 79 comps 80 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00
## protein_pct 99.62 99.62 99.62 99.62 99.62 99.62
## 81 comps 82 comps 83 comps 84 comps 85 comps 86 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00
## protein_pct 99.62 99.62 99.62 99.62 99.62 99.62
## 87 comps 88 comps 89 comps 90 comps 91 comps 92 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00
## protein_pct 99.62 99.62 99.62 99.62 99.62 99.62
## 93 comps 94 comps 95 comps 96 comps 97 comps 98 comps
## X 100.00 100.00 100.00 100.00 100.00 100.00
## protein_pct 99.62 99.62 99.62 99.62 99.62 99.62
## 99 comps 100 comps
## X 100.00 100.00
## protein_pct 99.62 99.62
predict_water_pct <- predict(pls_water_pct,test_predictors[1:5,], ncomp = 1:2)
predict_fat_pct <- predict(pls_fat_pct,test_predictors[1:5,], ncomp = 1:2)
predict_protein_pct <- predict(pls_protein_pct,test_predictors[1:5,], ncomp = 1:2)
## Penalized Regression Models
ridge_water_pct <- enet(x = as.matrix(train_predictors), y=train_set$water_pct, lambda = 0.001)
ridge_fat_pct <- enet(x = as.matrix(train_predictors), y=train_set$fat_pct, lambda = 0.001)
ridge_protein_pct <- enet(x = as.matrix(train_predictors), y=train_set$protein_pct, lambda = 0.001)
water_pct_predict <- predict(ridge_water_pct, newx=as.matrix(test_predictors), s=1, mode="fraction", type='fit')
fat_pct_predict <- predict(ridge_fat_pct, newx=as.matrix(test_predictors), s=1, mode="fraction", type='fit')
protein_pct_predict <- predict(ridge_protein_pct, newx=as.matrix(test_predictors), s=1, mode="fraction", type='fit')
data("ChemicalManufacturingProcess")
summary(ChemicalManufacturingProcess)
## Yield BiologicalMaterial01 BiologicalMaterial02 BiologicalMaterial03
## Min. :35.25 Min. :4.580 Min. :46.87 Min. :56.97
## 1st Qu.:38.75 1st Qu.:5.978 1st Qu.:52.68 1st Qu.:64.98
## Median :39.97 Median :6.305 Median :55.09 Median :67.22
## Mean :40.18 Mean :6.411 Mean :55.69 Mean :67.70
## 3rd Qu.:41.48 3rd Qu.:6.870 3rd Qu.:58.74 3rd Qu.:70.43
## Max. :46.34 Max. :8.810 Max. :64.75 Max. :78.25
##
## BiologicalMaterial04 BiologicalMaterial05 BiologicalMaterial06
## Min. : 9.38 Min. :13.24 Min. :40.60
## 1st Qu.:11.24 1st Qu.:17.23 1st Qu.:46.05
## Median :12.10 Median :18.49 Median :48.46
## Mean :12.35 Mean :18.60 Mean :48.91
## 3rd Qu.:13.22 3rd Qu.:19.90 3rd Qu.:51.34
## Max. :23.09 Max. :24.85 Max. :59.38
##
## BiologicalMaterial07 BiologicalMaterial08 BiologicalMaterial09
## Min. :100.0 Min. :15.88 Min. :11.44
## 1st Qu.:100.0 1st Qu.:17.06 1st Qu.:12.60
## Median :100.0 Median :17.51 Median :12.84
## Mean :100.0 Mean :17.49 Mean :12.85
## 3rd Qu.:100.0 3rd Qu.:17.88 3rd Qu.:13.13
## Max. :100.8 Max. :19.14 Max. :14.08
##
## BiologicalMaterial10 BiologicalMaterial11 BiologicalMaterial12
## Min. :1.770 Min. :135.8 Min. :18.35
## 1st Qu.:2.460 1st Qu.:143.8 1st Qu.:19.73
## Median :2.710 Median :146.1 Median :20.12
## Mean :2.801 Mean :147.0 Mean :20.20
## 3rd Qu.:2.990 3rd Qu.:149.6 3rd Qu.:20.75
## Max. :6.870 Max. :158.7 Max. :22.21
##
## ManufacturingProcess01 ManufacturingProcess02 ManufacturingProcess03
## Min. : 0.00 Min. : 0.00 Min. :1.47
## 1st Qu.:10.80 1st Qu.:19.30 1st Qu.:1.53
## Median :11.40 Median :21.00 Median :1.54
## Mean :11.21 Mean :16.68 Mean :1.54
## 3rd Qu.:12.15 3rd Qu.:21.50 3rd Qu.:1.55
## Max. :14.10 Max. :22.50 Max. :1.60
## NA's :1 NA's :3 NA's :15
## ManufacturingProcess04 ManufacturingProcess05 ManufacturingProcess06
## Min. :911.0 Min. : 923.0 Min. :203.0
## 1st Qu.:928.0 1st Qu.: 986.8 1st Qu.:205.7
## Median :934.0 Median : 999.2 Median :206.8
## Mean :931.9 Mean :1001.7 Mean :207.4
## 3rd Qu.:936.0 3rd Qu.:1008.9 3rd Qu.:208.7
## Max. :946.0 Max. :1175.3 Max. :227.4
## NA's :1 NA's :1 NA's :2
## ManufacturingProcess07 ManufacturingProcess08 ManufacturingProcess09
## Min. :177.0 Min. :177.0 Min. :38.89
## 1st Qu.:177.0 1st Qu.:177.0 1st Qu.:44.89
## Median :177.0 Median :178.0 Median :45.73
## Mean :177.5 Mean :177.6 Mean :45.66
## 3rd Qu.:178.0 3rd Qu.:178.0 3rd Qu.:46.52
## Max. :178.0 Max. :178.0 Max. :49.36
## NA's :1 NA's :1
## ManufacturingProcess10 ManufacturingProcess11 ManufacturingProcess12
## Min. : 7.500 Min. : 7.500 Min. : 0.0
## 1st Qu.: 8.700 1st Qu.: 9.000 1st Qu.: 0.0
## Median : 9.100 Median : 9.400 Median : 0.0
## Mean : 9.179 Mean : 9.386 Mean : 857.8
## 3rd Qu.: 9.550 3rd Qu.: 9.900 3rd Qu.: 0.0
## Max. :11.600 Max. :11.500 Max. :4549.0
## NA's :9 NA's :10 NA's :1
## ManufacturingProcess13 ManufacturingProcess14 ManufacturingProcess15
## Min. :32.10 Min. :4701 Min. :5904
## 1st Qu.:33.90 1st Qu.:4828 1st Qu.:6010
## Median :34.60 Median :4856 Median :6032
## Mean :34.51 Mean :4854 Mean :6039
## 3rd Qu.:35.20 3rd Qu.:4882 3rd Qu.:6061
## Max. :38.60 Max. :5055 Max. :6233
## NA's :1
## ManufacturingProcess16 ManufacturingProcess17 ManufacturingProcess18
## Min. : 0 Min. :31.30 Min. : 0
## 1st Qu.:4561 1st Qu.:33.50 1st Qu.:4813
## Median :4588 Median :34.40 Median :4835
## Mean :4566 Mean :34.34 Mean :4810
## 3rd Qu.:4619 3rd Qu.:35.10 3rd Qu.:4862
## Max. :4852 Max. :40.00 Max. :4971
##
## ManufacturingProcess19 ManufacturingProcess20 ManufacturingProcess21
## Min. :5890 Min. : 0 Min. :-1.8000
## 1st Qu.:6001 1st Qu.:4553 1st Qu.:-0.6000
## Median :6022 Median :4582 Median :-0.3000
## Mean :6028 Mean :4556 Mean :-0.1642
## 3rd Qu.:6050 3rd Qu.:4610 3rd Qu.: 0.0000
## Max. :6146 Max. :4759 Max. : 3.6000
##
## ManufacturingProcess22 ManufacturingProcess23 ManufacturingProcess24
## Min. : 0.000 Min. :0.000 Min. : 0.000
## 1st Qu.: 3.000 1st Qu.:2.000 1st Qu.: 4.000
## Median : 5.000 Median :3.000 Median : 8.000
## Mean : 5.406 Mean :3.017 Mean : 8.834
## 3rd Qu.: 8.000 3rd Qu.:4.000 3rd Qu.:14.000
## Max. :12.000 Max. :6.000 Max. :23.000
## NA's :1 NA's :1 NA's :1
## ManufacturingProcess25 ManufacturingProcess26 ManufacturingProcess27
## Min. : 0 Min. : 0 Min. : 0
## 1st Qu.:4832 1st Qu.:6020 1st Qu.:4560
## Median :4855 Median :6047 Median :4587
## Mean :4828 Mean :6016 Mean :4563
## 3rd Qu.:4877 3rd Qu.:6070 3rd Qu.:4609
## Max. :4990 Max. :6161 Max. :4710
## NA's :5 NA's :5 NA's :5
## ManufacturingProcess28 ManufacturingProcess29 ManufacturingProcess30
## Min. : 0.000 Min. : 0.00 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.:19.70 1st Qu.: 8.800
## Median :10.400 Median :19.90 Median : 9.100
## Mean : 6.592 Mean :20.01 Mean : 9.161
## 3rd Qu.:10.750 3rd Qu.:20.40 3rd Qu.: 9.700
## Max. :11.500 Max. :22.00 Max. :11.200
## NA's :5 NA's :5 NA's :5
## ManufacturingProcess31 ManufacturingProcess32 ManufacturingProcess33
## Min. : 0.00 Min. :143.0 Min. :56.00
## 1st Qu.:70.10 1st Qu.:155.0 1st Qu.:62.00
## Median :70.80 Median :158.0 Median :64.00
## Mean :70.18 Mean :158.5 Mean :63.54
## 3rd Qu.:71.40 3rd Qu.:162.0 3rd Qu.:65.00
## Max. :72.50 Max. :173.0 Max. :70.00
## NA's :5 NA's :5
## ManufacturingProcess34 ManufacturingProcess35 ManufacturingProcess36
## Min. :2.300 Min. :463.0 Min. :0.01700
## 1st Qu.:2.500 1st Qu.:490.0 1st Qu.:0.01900
## Median :2.500 Median :495.0 Median :0.02000
## Mean :2.494 Mean :495.6 Mean :0.01957
## 3rd Qu.:2.500 3rd Qu.:501.5 3rd Qu.:0.02000
## Max. :2.600 Max. :522.0 Max. :0.02200
## NA's :5 NA's :5 NA's :5
## ManufacturingProcess37 ManufacturingProcess38 ManufacturingProcess39
## Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.700 1st Qu.:2.000 1st Qu.:7.100
## Median :1.000 Median :3.000 Median :7.200
## Mean :1.014 Mean :2.534 Mean :6.851
## 3rd Qu.:1.300 3rd Qu.:3.000 3rd Qu.:7.300
## Max. :2.300 Max. :3.000 Max. :7.500
##
## ManufacturingProcess40 ManufacturingProcess41 ManufacturingProcess42
## Min. :0.00000 Min. :0.00000 Min. : 0.00
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:11.40
## Median :0.00000 Median :0.00000 Median :11.60
## Mean :0.01771 Mean :0.02371 Mean :11.21
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:11.70
## Max. :0.10000 Max. :0.20000 Max. :12.10
## NA's :1 NA's :1
## ManufacturingProcess43 ManufacturingProcess44 ManufacturingProcess45
## Min. : 0.0000 Min. :0.000 Min. :0.000
## 1st Qu.: 0.6000 1st Qu.:1.800 1st Qu.:2.100
## Median : 0.8000 Median :1.900 Median :2.200
## Mean : 0.9119 Mean :1.805 Mean :2.138
## 3rd Qu.: 1.0250 3rd Qu.:1.900 3rd Qu.:2.300
## Max. :11.0000 Max. :2.100 Max. :2.600
##
glimpse(ChemicalManufacturingProcess)
## Rows: 176
## Columns: 58
## $ Yield <dbl> 38.00, 42.44, 42.03, 41.42, 42.49, 43.57, 43.12…
## $ BiologicalMaterial01 <dbl> 6.25, 8.01, 8.01, 8.01, 7.47, 6.12, 7.48, 6.94,…
## $ BiologicalMaterial02 <dbl> 49.58, 60.97, 60.97, 60.97, 63.33, 58.36, 64.47…
## $ BiologicalMaterial03 <dbl> 56.97, 67.48, 67.48, 67.48, 72.25, 65.31, 72.41…
## $ BiologicalMaterial04 <dbl> 12.74, 14.65, 14.65, 14.65, 14.02, 15.17, 13.82…
## $ BiologicalMaterial05 <dbl> 19.51, 19.36, 19.36, 19.36, 17.91, 21.79, 17.71…
## $ BiologicalMaterial06 <dbl> 43.73, 53.14, 53.14, 53.14, 54.66, 51.23, 54.45…
## $ BiologicalMaterial07 <dbl> 100, 100, 100, 100, 100, 100, 100, 100, 100, 10…
## $ BiologicalMaterial08 <dbl> 16.66, 19.04, 19.04, 19.04, 18.22, 18.30, 18.72…
## $ BiologicalMaterial09 <dbl> 11.44, 12.55, 12.55, 12.55, 12.80, 12.13, 12.95…
## $ BiologicalMaterial10 <dbl> 3.46, 3.46, 3.46, 3.46, 3.05, 3.78, 3.04, 3.85,…
## $ BiologicalMaterial11 <dbl> 138.09, 153.67, 153.67, 153.67, 147.61, 151.88,…
## $ BiologicalMaterial12 <dbl> 18.83, 21.05, 21.05, 21.05, 21.05, 20.76, 20.75…
## $ ManufacturingProcess01 <dbl> NA, 0.0, 0.0, 0.0, 10.7, 12.0, 11.5, 12.0, 12.0…
## $ ManufacturingProcess02 <dbl> NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ ManufacturingProcess03 <dbl> NA, NA, NA, NA, NA, NA, 1.56, 1.55, 1.56, 1.55,…
## $ ManufacturingProcess04 <dbl> NA, 917, 912, 911, 918, 924, 933, 929, 928, 938…
## $ ManufacturingProcess05 <dbl> NA, 1032.2, 1003.6, 1014.6, 1027.5, 1016.8, 988…
## $ ManufacturingProcess06 <dbl> NA, 210.0, 207.1, 213.3, 205.7, 208.9, 210.0, 2…
## $ ManufacturingProcess07 <dbl> NA, 177, 178, 177, 178, 178, 177, 178, 177, 177…
## $ ManufacturingProcess08 <dbl> NA, 178, 178, 177, 178, 178, 178, 178, 177, 177…
## $ ManufacturingProcess09 <dbl> 43.00, 46.57, 45.07, 44.92, 44.96, 45.32, 49.36…
## $ ManufacturingProcess10 <dbl> NA, NA, NA, NA, NA, NA, 11.6, 10.2, 9.7, 10.1, …
## $ ManufacturingProcess11 <dbl> NA, NA, NA, NA, NA, NA, 11.5, 11.3, 11.1, 10.2,…
## $ ManufacturingProcess12 <dbl> NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ ManufacturingProcess13 <dbl> 35.5, 34.0, 34.8, 34.8, 34.6, 34.0, 32.4, 33.6,…
## $ ManufacturingProcess14 <dbl> 4898, 4869, 4878, 4897, 4992, 4985, 4745, 4854,…
## $ ManufacturingProcess15 <dbl> 6108, 6095, 6087, 6102, 6233, 6222, 5999, 6105,…
## $ ManufacturingProcess16 <dbl> 4682, 4617, 4617, 4635, 4733, 4786, 4486, 4626,…
## $ ManufacturingProcess17 <dbl> 35.5, 34.0, 34.8, 34.8, 33.9, 33.4, 33.8, 33.6,…
## $ ManufacturingProcess18 <dbl> 4865, 4867, 4877, 4872, 4886, 4862, 4758, 4766,…
## $ ManufacturingProcess19 <dbl> 6049, 6097, 6078, 6073, 6102, 6115, 6013, 6022,…
## $ ManufacturingProcess20 <dbl> 4665, 4621, 4621, 4611, 4659, 4696, 4522, 4552,…
## $ ManufacturingProcess21 <dbl> 0.0, 0.0, 0.0, 0.0, -0.7, -0.6, 1.4, 0.0, 0.0, …
## $ ManufacturingProcess22 <dbl> NA, 3, 4, 5, 8, 9, 1, 2, 3, 4, 6, 7, 8, 10, 11,…
## $ ManufacturingProcess23 <dbl> NA, 0, 1, 2, 4, 1, 1, 2, 3, 1, 3, 4, 1, 2, 3, 4…
## $ ManufacturingProcess24 <dbl> NA, 3, 4, 5, 18, 1, 1, 2, 3, 4, 6, 7, 8, 2, 15,…
## $ ManufacturingProcess25 <dbl> 4873, 4869, 4897, 4892, 4930, 4871, 4795, 4806,…
## $ ManufacturingProcess26 <dbl> 6074, 6107, 6116, 6111, 6151, 6128, 6057, 6059,…
## $ ManufacturingProcess27 <dbl> 4685, 4630, 4637, 4630, 4684, 4687, 4572, 4586,…
## $ ManufacturingProcess28 <dbl> 10.7, 11.2, 11.1, 11.1, 11.3, 11.4, 11.2, 11.1,…
## $ ManufacturingProcess29 <dbl> 21.0, 21.4, 21.3, 21.3, 21.6, 21.7, 21.2, 21.2,…
## $ ManufacturingProcess30 <dbl> 9.9, 9.9, 9.4, 9.4, 9.0, 10.1, 11.2, 10.9, 10.5…
## $ ManufacturingProcess31 <dbl> 69.1, 68.7, 69.3, 69.3, 69.4, 68.2, 67.6, 67.9,…
## $ ManufacturingProcess32 <dbl> 156, 169, 173, 171, 171, 173, 159, 161, 160, 16…
## $ ManufacturingProcess33 <dbl> 66, 66, 66, 68, 70, 70, 65, 65, 65, 66, 67, 67,…
## $ ManufacturingProcess34 <dbl> 2.4, 2.6, 2.6, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.…
## $ ManufacturingProcess35 <dbl> 486, 508, 509, 496, 468, 490, 475, 478, 491, 48…
## $ ManufacturingProcess36 <dbl> 0.019, 0.019, 0.018, 0.018, 0.017, 0.018, 0.019…
## $ ManufacturingProcess37 <dbl> 0.5, 2.0, 0.7, 1.2, 0.2, 0.4, 0.8, 1.0, 1.2, 1.…
## $ ManufacturingProcess38 <dbl> 3, 2, 2, 2, 2, 2, 2, 2, 3, 3, 2, 3, 3, 3, 3, 3,…
## $ ManufacturingProcess39 <dbl> 7.2, 7.2, 7.2, 7.2, 7.3, 7.2, 7.3, 7.3, 7.4, 7.…
## $ ManufacturingProcess40 <dbl> NA, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0…
## $ ManufacturingProcess41 <dbl> NA, 0.15, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0…
## $ ManufacturingProcess42 <dbl> 11.6, 11.1, 12.0, 10.6, 11.0, 11.5, 11.7, 11.4,…
## $ ManufacturingProcess43 <dbl> 3.0, 0.9, 1.0, 1.1, 1.1, 2.2, 0.7, 0.8, 0.9, 0.…
## $ ManufacturingProcess44 <dbl> 1.8, 1.9, 1.8, 1.8, 1.7, 1.8, 2.0, 2.0, 1.9, 1.…
## $ ManufacturingProcess45 <dbl> 2.4, 2.2, 2.3, 2.1, 2.1, 2.0, 2.2, 2.2, 2.1, 2.…
columns <- colnames(ChemicalManufacturingProcess)
for(col in columns) {
print(col)
median_value <- median(ChemicalManufacturingProcess[[col]],na.rm=TRUE)
ChemicalManufacturingProcess[col][is.na(ChemicalManufacturingProcess[col])] <- median_value
}
## [1] "Yield"
## [1] "BiologicalMaterial01"
## [1] "BiologicalMaterial02"
## [1] "BiologicalMaterial03"
## [1] "BiologicalMaterial04"
## [1] "BiologicalMaterial05"
## [1] "BiologicalMaterial06"
## [1] "BiologicalMaterial07"
## [1] "BiologicalMaterial08"
## [1] "BiologicalMaterial09"
## [1] "BiologicalMaterial10"
## [1] "BiologicalMaterial11"
## [1] "BiologicalMaterial12"
## [1] "ManufacturingProcess01"
## [1] "ManufacturingProcess02"
## [1] "ManufacturingProcess03"
## [1] "ManufacturingProcess04"
## [1] "ManufacturingProcess05"
## [1] "ManufacturingProcess06"
## [1] "ManufacturingProcess07"
## [1] "ManufacturingProcess08"
## [1] "ManufacturingProcess09"
## [1] "ManufacturingProcess10"
## [1] "ManufacturingProcess11"
## [1] "ManufacturingProcess12"
## [1] "ManufacturingProcess13"
## [1] "ManufacturingProcess14"
## [1] "ManufacturingProcess15"
## [1] "ManufacturingProcess16"
## [1] "ManufacturingProcess17"
## [1] "ManufacturingProcess18"
## [1] "ManufacturingProcess19"
## [1] "ManufacturingProcess20"
## [1] "ManufacturingProcess21"
## [1] "ManufacturingProcess22"
## [1] "ManufacturingProcess23"
## [1] "ManufacturingProcess24"
## [1] "ManufacturingProcess25"
## [1] "ManufacturingProcess26"
## [1] "ManufacturingProcess27"
## [1] "ManufacturingProcess28"
## [1] "ManufacturingProcess29"
## [1] "ManufacturingProcess30"
## [1] "ManufacturingProcess31"
## [1] "ManufacturingProcess32"
## [1] "ManufacturingProcess33"
## [1] "ManufacturingProcess34"
## [1] "ManufacturingProcess35"
## [1] "ManufacturingProcess36"
## [1] "ManufacturingProcess37"
## [1] "ManufacturingProcess38"
## [1] "ManufacturingProcess39"
## [1] "ManufacturingProcess40"
## [1] "ManufacturingProcess41"
## [1] "ManufacturingProcess42"
## [1] "ManufacturingProcess43"
## [1] "ManufacturingProcess44"
## [1] "ManufacturingProcess45"
summary(ChemicalManufacturingProcess)
## Yield BiologicalMaterial01 BiologicalMaterial02 BiologicalMaterial03
## Min. :35.25 Min. :4.580 Min. :46.87 Min. :56.97
## 1st Qu.:38.75 1st Qu.:5.978 1st Qu.:52.68 1st Qu.:64.98
## Median :39.97 Median :6.305 Median :55.09 Median :67.22
## Mean :40.18 Mean :6.411 Mean :55.69 Mean :67.70
## 3rd Qu.:41.48 3rd Qu.:6.870 3rd Qu.:58.74 3rd Qu.:70.43
## Max. :46.34 Max. :8.810 Max. :64.75 Max. :78.25
## BiologicalMaterial04 BiologicalMaterial05 BiologicalMaterial06
## Min. : 9.38 Min. :13.24 Min. :40.60
## 1st Qu.:11.24 1st Qu.:17.23 1st Qu.:46.05
## Median :12.10 Median :18.49 Median :48.46
## Mean :12.35 Mean :18.60 Mean :48.91
## 3rd Qu.:13.22 3rd Qu.:19.90 3rd Qu.:51.34
## Max. :23.09 Max. :24.85 Max. :59.38
## BiologicalMaterial07 BiologicalMaterial08 BiologicalMaterial09
## Min. :100.0 Min. :15.88 Min. :11.44
## 1st Qu.:100.0 1st Qu.:17.06 1st Qu.:12.60
## Median :100.0 Median :17.51 Median :12.84
## Mean :100.0 Mean :17.49 Mean :12.85
## 3rd Qu.:100.0 3rd Qu.:17.88 3rd Qu.:13.13
## Max. :100.8 Max. :19.14 Max. :14.08
## BiologicalMaterial10 BiologicalMaterial11 BiologicalMaterial12
## Min. :1.770 Min. :135.8 Min. :18.35
## 1st Qu.:2.460 1st Qu.:143.8 1st Qu.:19.73
## Median :2.710 Median :146.1 Median :20.12
## Mean :2.801 Mean :147.0 Mean :20.20
## 3rd Qu.:2.990 3rd Qu.:149.6 3rd Qu.:20.75
## Max. :6.870 Max. :158.7 Max. :22.21
## ManufacturingProcess01 ManufacturingProcess02 ManufacturingProcess03
## Min. : 0.00 Min. : 0.00 Min. :1.47
## 1st Qu.:10.80 1st Qu.:19.30 1st Qu.:1.53
## Median :11.40 Median :21.00 Median :1.54
## Mean :11.21 Mean :16.76 Mean :1.54
## 3rd Qu.:12.12 3rd Qu.:21.50 3rd Qu.:1.55
## Max. :14.10 Max. :22.50 Max. :1.60
## ManufacturingProcess04 ManufacturingProcess05 ManufacturingProcess06
## Min. :911.0 Min. : 923.0 Min. :203.0
## 1st Qu.:928.0 1st Qu.: 986.8 1st Qu.:205.7
## Median :934.0 Median : 999.2 Median :206.8
## Mean :931.9 Mean :1001.7 Mean :207.4
## 3rd Qu.:936.0 3rd Qu.:1008.7 3rd Qu.:208.7
## Max. :946.0 Max. :1175.3 Max. :227.4
## ManufacturingProcess07 ManufacturingProcess08 ManufacturingProcess09
## Min. :177.0 Min. :177.0 Min. :38.89
## 1st Qu.:177.0 1st Qu.:177.0 1st Qu.:44.89
## Median :177.0 Median :178.0 Median :45.73
## Mean :177.5 Mean :177.6 Mean :45.66
## 3rd Qu.:178.0 3rd Qu.:178.0 3rd Qu.:46.52
## Max. :178.0 Max. :178.0 Max. :49.36
## ManufacturingProcess10 ManufacturingProcess11 ManufacturingProcess12
## Min. : 7.500 Min. : 7.500 Min. : 0.0
## 1st Qu.: 8.700 1st Qu.: 9.000 1st Qu.: 0.0
## Median : 9.100 Median : 9.400 Median : 0.0
## Mean : 9.175 Mean : 9.386 Mean : 852.9
## 3rd Qu.: 9.500 3rd Qu.: 9.825 3rd Qu.: 0.0
## Max. :11.600 Max. :11.500 Max. :4549.0
## ManufacturingProcess13 ManufacturingProcess14 ManufacturingProcess15
## Min. :32.10 Min. :4701 Min. :5904
## 1st Qu.:33.90 1st Qu.:4828 1st Qu.:6010
## Median :34.60 Median :4856 Median :6032
## Mean :34.51 Mean :4854 Mean :6039
## 3rd Qu.:35.20 3rd Qu.:4882 3rd Qu.:6061
## Max. :38.60 Max. :5055 Max. :6233
## ManufacturingProcess16 ManufacturingProcess17 ManufacturingProcess18
## Min. : 0 Min. :31.30 Min. : 0
## 1st Qu.:4561 1st Qu.:33.50 1st Qu.:4813
## Median :4588 Median :34.40 Median :4835
## Mean :4566 Mean :34.34 Mean :4810
## 3rd Qu.:4619 3rd Qu.:35.10 3rd Qu.:4862
## Max. :4852 Max. :40.00 Max. :4971
## ManufacturingProcess19 ManufacturingProcess20 ManufacturingProcess21
## Min. :5890 Min. : 0 Min. :-1.8000
## 1st Qu.:6001 1st Qu.:4553 1st Qu.:-0.6000
## Median :6022 Median :4582 Median :-0.3000
## Mean :6028 Mean :4556 Mean :-0.1642
## 3rd Qu.:6050 3rd Qu.:4610 3rd Qu.: 0.0000
## Max. :6146 Max. :4759 Max. : 3.6000
## ManufacturingProcess22 ManufacturingProcess23 ManufacturingProcess24
## Min. : 0.000 Min. :0.000 Min. : 0.00
## 1st Qu.: 3.000 1st Qu.:2.000 1st Qu.: 4.00
## Median : 5.000 Median :3.000 Median : 8.00
## Mean : 5.403 Mean :3.017 Mean : 8.83
## 3rd Qu.: 8.000 3rd Qu.:4.000 3rd Qu.:14.00
## Max. :12.000 Max. :6.000 Max. :23.00
## ManufacturingProcess25 ManufacturingProcess26 ManufacturingProcess27
## Min. : 0 Min. : 0 Min. : 0
## 1st Qu.:4834 1st Qu.:6021 1st Qu.:4563
## Median :4855 Median :6047 Median :4587
## Mean :4829 Mean :6016 Mean :4563
## 3rd Qu.:4876 3rd Qu.:6069 3rd Qu.:4609
## Max. :4990 Max. :6161 Max. :4710
## ManufacturingProcess28 ManufacturingProcess29 ManufacturingProcess30
## Min. : 0.0 Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.0 1st Qu.:19.70 1st Qu.: 8.80
## Median :10.4 Median :19.90 Median : 9.10
## Mean : 6.7 Mean :20.01 Mean : 9.16
## 3rd Qu.:10.7 3rd Qu.:20.40 3rd Qu.: 9.70
## Max. :11.5 Max. :22.00 Max. :11.20
## ManufacturingProcess31 ManufacturingProcess32 ManufacturingProcess33
## Min. : 0.0 Min. :143.0 Min. :56.00
## 1st Qu.:70.1 1st Qu.:155.0 1st Qu.:62.00
## Median :70.8 Median :158.0 Median :64.00
## Mean :70.2 Mean :158.5 Mean :63.56
## 3rd Qu.:71.4 3rd Qu.:162.0 3rd Qu.:65.00
## Max. :72.5 Max. :173.0 Max. :70.00
## ManufacturingProcess34 ManufacturingProcess35 ManufacturingProcess36
## Min. :2.300 Min. :463.0 Min. :0.01700
## 1st Qu.:2.500 1st Qu.:490.0 1st Qu.:0.01900
## Median :2.500 Median :495.0 Median :0.02000
## Mean :2.494 Mean :495.6 Mean :0.01959
## 3rd Qu.:2.500 3rd Qu.:501.0 3rd Qu.:0.02000
## Max. :2.600 Max. :522.0 Max. :0.02200
## ManufacturingProcess37 ManufacturingProcess38 ManufacturingProcess39
## Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.700 1st Qu.:2.000 1st Qu.:7.100
## Median :1.000 Median :3.000 Median :7.200
## Mean :1.014 Mean :2.534 Mean :6.851
## 3rd Qu.:1.300 3rd Qu.:3.000 3rd Qu.:7.300
## Max. :2.300 Max. :3.000 Max. :7.500
## ManufacturingProcess40 ManufacturingProcess41 ManufacturingProcess42
## Min. :0.00000 Min. :0.00000 Min. : 0.00
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:11.40
## Median :0.00000 Median :0.00000 Median :11.60
## Mean :0.01761 Mean :0.02358 Mean :11.21
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:11.70
## Max. :0.10000 Max. :0.20000 Max. :12.10
## ManufacturingProcess43 ManufacturingProcess44 ManufacturingProcess45
## Min. : 0.0000 Min. :0.000 Min. :0.000
## 1st Qu.: 0.6000 1st Qu.:1.800 1st Qu.:2.100
## Median : 0.8000 Median :1.900 Median :2.200
## Mean : 0.9119 Mean :1.805 Mean :2.138
## 3rd Qu.: 1.0250 3rd Qu.:1.900 3rd Qu.:2.300
## Max. :11.0000 Max. :2.100 Max. :2.600
set.seed(1234)
sample_set <- sample(nrow(ChemicalManufacturingProcess),round(nrow(ChemicalManufacturingProcess)*.75), replace=FALSE)
train_set <- ChemicalManufacturingProcess[sample_set, ]
test_set <- ChemicalManufacturingProcess[-sample_set, ]
lm_mod <- lm(Yield ~ ., data=train_set)
summary(lm_mod)
##
## Call:
## lm(formula = Yield ~ ., data = train_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.29591 -0.47313 -0.03832 0.52560 1.95060
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.045e+02 1.649e+02 0.634 0.52805
## BiologicalMaterial01 -3.738e-01 5.217e-01 -0.716 0.47596
## BiologicalMaterial02 -1.886e-01 1.564e-01 -1.206 0.23146
## BiologicalMaterial03 5.307e-01 2.977e-01 1.783 0.07869 .
## BiologicalMaterial04 -6.683e-01 7.489e-01 -0.892 0.37499
## BiologicalMaterial05 1.619e-01 1.450e-01 1.117 0.26774
## BiologicalMaterial06 -1.864e-01 3.687e-01 -0.506 0.61462
## BiologicalMaterial07 -1.466e+00 1.224e+00 -1.198 0.23483
## BiologicalMaterial08 1.170e+00 8.969e-01 1.305 0.19594
## BiologicalMaterial09 -2.906e+00 1.829e+00 -1.589 0.11628
## BiologicalMaterial10 1.397e+00 1.929e+00 0.724 0.47112
## BiologicalMaterial11 1.433e-02 1.221e-01 0.117 0.90689
## BiologicalMaterial12 3.197e-01 8.077e-01 0.396 0.69333
## ManufacturingProcess01 6.107e-02 1.232e-01 0.496 0.62156
## ManufacturingProcess02 5.395e-02 6.278e-02 0.859 0.39285
## ManufacturingProcess03 -7.360e+00 6.296e+00 -1.169 0.24609
## ManufacturingProcess04 6.652e-02 3.927e-02 1.694 0.09440 .
## ManufacturingProcess05 -1.504e-03 4.747e-03 -0.317 0.75231
## ManufacturingProcess06 1.229e-01 7.911e-02 1.554 0.12439
## ManufacturingProcess07 -1.529e-02 2.652e-01 -0.058 0.95419
## ManufacturingProcess08 -2.349e-01 3.126e-01 -0.751 0.45483
## ManufacturingProcess09 2.552e-01 2.333e-01 1.094 0.27759
## ManufacturingProcess10 -4.665e-01 6.717e-01 -0.694 0.48952
## ManufacturingProcess11 4.531e-01 8.798e-01 0.515 0.60805
## ManufacturingProcess12 5.780e-06 1.450e-04 0.040 0.96831
## ManufacturingProcess13 -5.438e-01 5.968e-01 -0.911 0.36514
## ManufacturingProcess14 1.485e-02 1.368e-02 1.085 0.28119
## ManufacturingProcess15 -1.329e-02 1.216e-02 -1.092 0.27817
## ManufacturingProcess16 4.847e-04 5.263e-04 0.921 0.36001
## ManufacturingProcess17 4.245e-01 5.078e-01 0.836 0.40583
## ManufacturingProcess18 5.217e-03 5.462e-03 0.955 0.34255
## ManufacturingProcess19 3.528e-03 1.078e-02 0.327 0.74448
## ManufacturingProcess20 -5.254e-03 5.742e-03 -0.915 0.36306
## ManufacturingProcess21 NA NA NA NA
## ManufacturingProcess22 -4.289e-03 5.601e-02 -0.077 0.93917
## ManufacturingProcess23 -1.722e-02 1.108e-01 -0.155 0.87692
## ManufacturingProcess24 1.018e-03 3.179e-02 0.032 0.97455
## ManufacturingProcess25 -5.984e-03 1.861e-02 -0.322 0.74869
## ManufacturingProcess26 7.468e-03 1.436e-02 0.520 0.60463
## ManufacturingProcess27 -1.207e-02 1.055e-02 -1.144 0.25629
## ManufacturingProcess28 -5.524e-02 4.448e-02 -1.242 0.21812
## ManufacturingProcess29 1.479e+00 1.344e+00 1.100 0.27465
## ManufacturingProcess30 6.995e-01 9.285e-01 0.753 0.45359
## ManufacturingProcess31 6.816e-02 1.421e-01 0.480 0.63295
## ManufacturingProcess32 2.514e-01 7.756e-02 3.241 0.00178 **
## ManufacturingProcess33 -3.230e-01 1.627e-01 -1.985 0.05082 .
## ManufacturingProcess34 -4.644e-02 3.453e+00 -0.013 0.98931
## ManufacturingProcess35 1.044e-02 2.202e-02 0.474 0.63689
## ManufacturingProcess36 -1.205e+02 4.011e+02 -0.300 0.76466
## ManufacturingProcess37 -7.608e-01 3.803e-01 -2.001 0.04904 *
## ManufacturingProcess38 -3.977e-01 3.150e-01 -1.262 0.21068
## ManufacturingProcess39 1.779e-01 1.689e-01 1.054 0.29540
## ManufacturingProcess40 1.039e+00 9.859e+00 0.105 0.91639
## ManufacturingProcess41 1.908e+00 6.904e+00 0.276 0.78308
## ManufacturingProcess42 -1.701e-01 3.265e-01 -0.521 0.60392
## ManufacturingProcess43 2.823e-01 4.537e-01 0.622 0.53561
## ManufacturingProcess44 -7.369e-01 1.698e+00 -0.434 0.66544
## ManufacturingProcess45 1.167e+00 7.182e-01 1.625 0.10841
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.073 on 75 degrees of freedom
## Multiple R-squared: 0.8105, Adjusted R-squared: 0.669
## F-statistic: 5.727 on 56 and 75 DF, p-value: 3.719e-12
lm_predict <- predict(lm_mod, test_set %>% dplyr::select(-Yield))
lmValues <- data.frame(obs = test_set$Yield, pred = lm_predict)
defaultSummary(lmValues)
## RMSE Rsquared MAE
## 1.5481630 0.4581323 1.1693219