Exercise 6.2

Part A

The matrix fingerprints contains the 1,107 binary molecular predic- tors for the 165 compounds, while permeability contains permeability response.

library("AppliedPredictiveModeling")
data(permeability)

# fingerprints 

head(permeability)
##   permeability
## 1       12.520
## 2        1.120
## 3       19.405
## 4        1.730
## 5        1.680
## 6        0.510

Part B

The fingerprint predictors indicate the presence or absence of substructures of a molecule and are often sparse meaning that relatively few of the molecules contain each substructure. Filter out the predictors that have low frequencies using the nearZeroVar function from the caret package. How many predictors are left for modeling?

library(caret)

# find near-zero variance predictors
nzv <- nearZeroVar(fingerprints)

# remove the near-zero variance predictors
filtered_fp <- fingerprints[ , -nzv]

# number of predictors left
num_predictors_left <- ncol(filtered_fp)
num_predictors_left
## [1] 388

There are 388 predictors left for modeling.

Part C

Split the data into a training and a test set, pre-process the data, and tune a PLS model. How many latent variables are optimal and what is the corresponding resampled estimate of \(R^2\)?

set.seed(1999)

# Split into training and testing
split <- createDataPartition(permeability, p = 0.8, list = FALSE)

# train
train_perm <- permeability[split, ]
train_finger <- filtered_fp[split,]

# test
test_perm <- permeability[-split, ]
test_finger <- filtered_fp[-split,]

# train and tune PLS model
ctrl <- trainControl(method = "cv", number = 10)

pls_model <- train(
  train_finger,
  train_perm,
  method = "pls",
  metric ="Rsquared",
  tuneLength = 20,
  trControl = ctrl,
  preProcess = c("center", "scale")
)

# check optimal number of latent variables and R^2
pls_model$bestTune
##   ncomp
## 7     7
max(pls_model$results$Rsquared)
## [1] 0.5989923

There are 7 optimal latent variables and the resampled estimated \(R^2 = 0.5989923\).

Part D

Predict the response for the test set. What is the test set estimate of \(R^2\)?

# predict on test set
test_pred <- predict(pls_model, test_finger)

postResample(pred = test_pred, obs = test_perm)
##       RMSE   Rsquared        MAE 
## 13.0656120  0.3460259  9.4570753

The test set estimate of \(R^2 = 0.3460259\).

Part E

Try building other models discussed in this chapter. Do any have better predictive performance?

# Ridge Regression
ridgeGrid <- data.frame(.lambda = seq(0, .1, length = 15))

ridge_model <- train(
  train_finger,
  train_perm,
  method = "ridge",
  ## Fir the model over many penalty values 
  tuneGrid = ridgeGrid,
  trControl = ctrl, 
  ## put the predictors on the same scale 
  preProc = c("center", "scale"))

# Elastic Net
enet_model <- train(
  train_finger, 
  train_perm, 
  method = "enet",
  trControl = ctrl, 
  preProc = c("center", "scale"))

# Lasso Model
lasso_model <-train(
  train_finger, 
  train_perm, 
  method = "lasso",
  trControl = ctrl, 
  preProc = c("center", "scale"))

# Compare resampled performance
resamps <- resamples(list(
  PLS = pls_model,
  Ridge = ridge_model,
  Lasso = lasso_model,
  ElasticNet = enet_model
))

summary(resamps)
## 
## Call:
## summary.resamples(object = resamps)
## 
## Models: PLS, Ridge, Lasso, ElasticNet 
## Number of resamples: 10 
## 
## MAE 
##                Min.  1st Qu.    Median         Mean      3rd Qu.         Max.
## PLS        4.924528 7.205850  7.759610 8.222309e+00 8.810132e+00 1.190481e+01
## Ridge      6.229809 7.351045  8.795244 8.710503e+00 9.527683e+00 1.146386e+01
## Lasso      6.011751 8.366535 11.575358 5.593351e+16 9.900714e+15 5.242374e+17
## ElasticNet 6.005219 6.828456  7.943841 8.132488e+00 8.681726e+00 1.288952e+01
##            NA's
## PLS           0
## Ridge         0
## Lasso         0
## ElasticNet    0
## 
## RMSE 
##                Min.   1st Qu.   Median         Mean      3rd Qu.         Max.
## PLS        6.076067  9.118263 10.46751 1.067299e+01 1.226606e+01 1.469694e+01
## Ridge      8.486751  9.945819 11.23078 1.143889e+01 1.317923e+01 1.473366e+01
## Lasso      7.979203 11.274779 15.97868 1.458469e+17 2.597406e+16 1.344887e+18
## ElasticNet 8.044979  8.591701 10.66512 1.065031e+01 1.173408e+01 1.614912e+01
##            NA's
## PLS           0
## Ridge         0
## Lasso         0
## ElasticNet    0
## 
## Rsquared 
##                   Min.    1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## PLS        0.269152004 0.48317227 0.6298900 0.5989923 0.7301767 0.8070123    0
## Ridge      0.153681613 0.34347812 0.5445163 0.5041510 0.6632503 0.7912857    0
## Lasso      0.007819875 0.05271164 0.2948675 0.3199956 0.5335093 0.8830764    0
## ElasticNet 0.128902356 0.42368227 0.6536112 0.5543604 0.7330841 0.7680089    0
bwplot(resamps, metric = "Rsquared")

PLS has the highest \(R^2\) and the lowest RMSE. Elastic net comes in close second, which suggests that there is some benefit from combining L1 and L2 penalties, but Ridge and Lasso individually perform worse.

# Predict and evaluate on the test set for all models
models <- list(
  PLS        = pls_model,
  Ridge      = ridge_model,
  Lasso      = lasso_model,
  ElasticNet = enet_model
)

# predictions
pred_list <- lapply(models, predict, newdata = test_finger)

# caret metrics
metric_list <- lapply(pred_list, function(p) postResample(pred = p, obs = test_perm))


# bind into one data frame
test_results <- do.call(rbind, metric_list)
test_results <- cbind(Model = rownames(test_results),
                      as.data.frame(test_results))
rownames(test_results) <- NULL

test_results[order(-test_results$Rsquared), ]
##        Model         RMSE   Rsquared          MAE
## 1        PLS 1.306561e+01 0.34602594 9.457075e+00
## 4 ElasticNet 1.383054e+01 0.28758334 9.296786e+00
## 2      Ridge 1.404174e+01 0.27068608 9.393488e+00
## 3      Lasso 7.956568e+06 0.02834479 2.425811e+06

PLS achieved the best predictive performance across all of the models with having the highest \(R^2\) and lowest RMSE. Elastic Net and Ridge Regression come in close behind PLS, while Lasso performed very poorly. In conclusion, these results show that PLS performs the best on unseen data.

Part E

Would you recommend any of your models to replace the permeability laboratory experiment?

The PLS model performs the best out of the other models tested. I would recommend that the use of PLS is to be continued.

Exercise 6.3

A chemical manufacturing process for a pharmaceutical product was discussed in Sect.1.4. In this problem, the objective is to understand the re- lationship between biological measurements of the raw materials (predictors), the corresponding resampled estimate of \(R^2\) measurements of the manufacturing process (predictors), and the response of product yield. Biological predictors cannot be changed but can be used to assess the quality of the raw material before processing. On the other hand, manufacturing process predictors can be changed in the manufacturing process. Improving product yield by 1% will boost revenue by approximately one hundred thousand dollars per batch:

Part A

The matrix processPredictors contains the 57 predictors (12 describing the input biological material and 45 describing the process predictors) for the 176 manufacturing runs. yield contains the percent yield for each run.

data(ChemicalManufacturingProcess)

# separate response and predictors
yield <- ChemicalManufacturingProcess$Yield
predictors <- ChemicalManufacturingProcess[, -1]

ncol(predictors)
## [1] 57

Part B

A small percentage of cells in the predictor set contain missing values. Use an imputation function to fill in these missing values

# Check NAs
sum(is.na(ChemicalManufacturingProcess))
## [1] 106
# Impute using KNN
preProc <- preProcess(
  ChemicalManufacturingProcess, 
  method = "knnImpute")

chemical_imp <- predict(preProc, ChemicalManufacturingProcess)

# check for missing values after imputation
sum(is.na(chemical_imp))
## [1] 0

Part C

Split the data into a training and a test set, pre-process the data, and tune a model of your choice from this chapter. What is the optimal value of the performance metric?

library(dplyr)
predictors_imp <- chemical_imp %>% select(-Yield)

# split data into X & y
X <- predictors_imp
y <- chemical_imp$Yield

# find near-zero variance predictors
nzv_pred <- nearZeroVar(X)

# remove the near-zero variance predictors
filtered_pred <- X[ , -nzv_pred]

# Indexing for train/test split
split <- createDataPartition(y, p = 0.8, list = FALSE)

# train and test sets
train_x <- X[ split, , drop = FALSE]
test_x  <- X[-split, , drop = FALSE]
train_y <- y[ split]
test_y  <- y[-split]

# Fit and train Elastic Net model
ctrl <- trainControl(method = "cv", number = 10)
enet_model_chem <- train(
  x = train_x, y = train_y,
  method = "enet",
  metric = "Rsquared",
  tuneLength = 20,
  trControl = ctrl,
  preProcess = c("center", "scale")
)

enet_model_chem
## Elasticnet 
## 
## 144 samples
##  57 predictor
## 
## Pre-processing: centered (57), scaled (57) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 130, 128, 131, 129, 130, 130, ... 
## Resampling results across tuning parameters:
## 
##   lambda        fraction  RMSE       Rsquared   MAE      
##   0.0000000000  0.05      0.6788593  0.5992809  0.5666962
##   0.0000000000  0.10      0.6258766  0.6088763  0.5117745
##   0.0000000000  0.15      0.7015448  0.5984460  0.5206475
##   0.0000000000  0.20      0.8982082  0.5955380  0.5738562
##   0.0000000000  0.25      1.1516827  0.5897668  0.6402826
##   0.0000000000  0.30      1.3120696  0.5847219  0.6832045
##   0.0000000000  0.35      1.3838960  0.5560296  0.7141306
##   0.0000000000  0.40      1.6307992  0.5216551  0.7907288
##   0.0000000000  0.45      1.8544283  0.5051638  0.8617333
##   0.0000000000  0.50      2.0444635  0.4960725  0.9237540
##   0.0000000000  0.55      2.3207614  0.4870526  1.0104655
##   0.0000000000  0.60      2.8877399  0.4839226  1.1719041
##   0.0000000000  0.65      3.4633291  0.4833160  1.3328196
##   0.0000000000  0.70      4.0890160  0.4843785  1.5067492
##   0.0000000000  0.75      4.6088013  0.4857715  1.6522424
##   0.0000000000  0.80      4.9538961  0.4892524  1.7499055
##   0.0000000000  0.85      5.4167456  0.4882967  1.8760011
##   0.0000000000  0.90      5.8685550  0.4863001  1.9983408
##   0.0000000000  0.95      6.2509458  0.4835518  2.1016244
##   0.0000000000  1.00      6.6165471  0.4808203  2.2038471
##   0.0001000000  0.05      0.7164245  0.5937625  0.5950563
##   0.0001000000  0.10      0.6572385  0.5931503  0.5292584
##   0.0001000000  0.15      0.6723377  0.6030883  0.5153848
##   0.0001000000  0.20      0.8563218  0.5944384  0.5667448
##   0.0001000000  0.25      1.0942166  0.5937401  0.6248712
##   0.0001000000  0.30      1.2671981  0.5906466  0.6702627
##   0.0001000000  0.35      1.3306518  0.5890952  0.6894903
##   0.0001000000  0.40      1.4573307  0.5833741  0.7240189
##   0.0001000000  0.45      1.6703210  0.5504478  0.7957001
##   0.0001000000  0.50      1.9051935  0.5303680  0.8663506
##   0.0001000000  0.55      2.0940884  0.5200143  0.9263136
##   0.0001000000  0.60      2.5780981  0.5118830  1.0656080
##   0.0001000000  0.65      3.1116055  0.5030835  1.2177236
##   0.0001000000  0.70      3.6828385  0.4932037  1.3803834
##   0.0001000000  0.75      4.2647934  0.4886267  1.5449731
##   0.0001000000  0.80      4.7070145  0.4833418  1.6712713
##   0.0001000000  0.85      5.0557320  0.4790780  1.7724920
##   0.0001000000  0.90      5.5019799  0.4747631  1.8972360
##   0.0001000000  0.95      5.8614243  0.4716302  1.9971163
##   0.0001000000  1.00      6.2076678  0.4697259  2.0927394
##   0.0001467799  0.05      0.7245967  0.5927038  0.6011757
##   0.0001467799  0.10      0.6627429  0.5903881  0.5329632
##   0.0001467799  0.15      0.6619466  0.6056642  0.5137748
##   0.0001467799  0.20      0.8397543  0.5937028  0.5626903
##   0.0001467799  0.25      1.0720314  0.5936926  0.6198183
##   0.0001467799  0.30      1.2492728  0.5910804  0.6656304
##   0.0001467799  0.35      1.3267368  0.5905475  0.6883110
##   0.0001467799  0.40      1.4313842  0.5851531  0.7168912
##   0.0001467799  0.45      1.6175050  0.5646512  0.7786967
##   0.0001467799  0.50      1.8456048  0.5389351  0.8478742
##   0.0001467799  0.55      2.0529205  0.5256570  0.9116374
##   0.0001467799  0.60      2.4627230  0.5162846  1.0306018
##   0.0001467799  0.65      2.9827133  0.5084161  1.1790714
##   0.0001467799  0.70      3.5259676  0.4986027  1.3334288
##   0.0001467799  0.75      4.1023654  0.4899500  1.4970378
##   0.0001467799  0.80      4.6050836  0.4854571  1.6395114
##   0.0001467799  0.85      4.9333524  0.4794612  1.7359642
##   0.0001467799  0.90      5.3499689  0.4741856  1.8533624
##   0.0001467799  0.95      5.7140669  0.4689434  1.9554350
##   0.0001467799  1.00      6.0493530  0.4659400  2.0484824
##   0.0002154435  0.05      0.7327723  0.5916631  0.6076845
##   0.0002154435  0.10      0.6693290  0.5872713  0.5378196
##   0.0002154435  0.15      0.6490334  0.6096433  0.5111934
##   0.0002154435  0.20      0.8303786  0.5932117  0.5605840
##   0.0002154435  0.25      1.0441950  0.5934397  0.6139080
##   0.0002154435  0.30      1.2258887  0.5918272  0.6594110
##   0.0002154435  0.35      1.3225572  0.5913070  0.6871218
##   0.0002154435  0.40      1.3976076  0.5864850  0.7076117
##   0.0002154435  0.45      1.5603480  0.5817190  0.7571815
##   0.0002154435  0.50      1.7747403  0.5507047  0.8262529
##   0.0002154435  0.55      2.0022962  0.5315089  0.8944341
##   0.0002154435  0.60      2.3077504  0.5212579  0.9850734
##   0.0002154435  0.65      2.8230828  0.5122337  1.1321337
##   0.0002154435  0.70      3.3190930  0.5041639  1.2734792
##   0.0002154435  0.75      3.8720433  0.4945680  1.4304335
##   0.0002154435  0.80      4.4047336  0.4868357  1.5812130
##   0.0002154435  0.85      4.7875540  0.4815670  1.6919074
##   0.0002154435  0.90      5.1490514  0.4747265  1.7958841
##   0.0002154435  0.95      5.5052148  0.4682716  1.8963170
##   0.0002154435  1.00      5.8383074  0.4638221  1.9893036
##   0.0003162278  0.05      0.7402961  0.5911471  0.6136946
##   0.0003162278  0.10      0.6763431  0.5842012  0.5425440
##   0.0003162278  0.15      0.6355428  0.6154413  0.5081219
##   0.0003162278  0.20      0.8205138  0.5933146  0.5579472
##   0.0003162278  0.25      1.0101778  0.5931847  0.6060690
##   0.0003162278  0.30      1.1975326  0.5927386  0.6520239
##   0.0003162278  0.35      1.3117537  0.5917486  0.6838742
##   0.0003162278  0.40      1.3573428  0.5884628  0.6976634
##   0.0003162278  0.45      1.5131233  0.5857125  0.7411382
##   0.0003162278  0.50      1.6995557  0.5649536  0.8028801
##   0.0003162278  0.55      1.9155975  0.5410484  0.8680097
##   0.0003162278  0.60      2.1124817  0.5273738  0.9291157
##   0.0003162278  0.65      2.6253056  0.5157199  1.0754408
##   0.0003162278  0.70      3.0957090  0.5077588  1.2098624
##   0.0003162278  0.75      3.5922247  0.4991610  1.3511591
##   0.0003162278  0.80      4.1225834  0.4903713  1.5012409
##   0.0003162278  0.85      4.5926608  0.4823625  1.6347690
##   0.0003162278  0.90      4.9222929  0.4763262  1.7308603
##   0.0003162278  0.95      5.2427207  0.4690085  1.8223490
##   0.0003162278  1.00      5.5678098  0.4637371  1.9134685
##   0.0004641589  0.05      0.7471778  0.5905727  0.6191679
##   0.0004641589  0.10      0.6738944  0.5844872  0.5431683
##   0.0004641589  0.15      0.6222986  0.6242720  0.5042106
##   0.0004641589  0.20      0.8004934  0.5942016  0.5524564
##   0.0004641589  0.25      0.9715420  0.5926913  0.5969000
##   0.0004641589  0.30      1.1652264  0.5927676  0.6441451
##   0.0004641589  0.35      1.2937281  0.5922235  0.6785470
##   0.0004641589  0.40      1.3227526  0.5901868  0.6891363
##   0.0004641589  0.45      1.4608083  0.5862401  0.7265077
##   0.0004641589  0.50      1.6202567  0.5814972  0.7752644
##   0.0004641589  0.55      1.8224156  0.5526949  0.8397866
##   0.0004641589  0.60      2.0184582  0.5333496  0.8999454
##   0.0004641589  0.65      2.3879235  0.5216788  1.0074124
##   0.0004641589  0.70      2.8525730  0.5112032  1.1405822
##   0.0004641589  0.75      3.2948609  0.5033814  1.2669383
##   0.0004641589  0.80      3.7774024  0.4950131  1.4042795
##   0.0004641589  0.85      4.2401537  0.4865239  1.5354917
##   0.0004641589  0.90      4.6751102  0.4782343  1.6590987
##   0.0004641589  0.95      4.9361734  0.4715460  1.7358408
##   0.0004641589  1.00      5.2385301  0.4656975  1.8211941
##   0.0006812921  0.05      0.7533980  0.5906116  0.6242481
##   0.0006812921  0.10      0.6676013  0.5872719  0.5417037
##   0.0006812921  0.15      0.6203827  0.6259190  0.5041319
##   0.0006812921  0.20      0.7669021  0.5961302  0.5429985
##   0.0006812921  0.25      0.9278340  0.5922698  0.5861976
##   0.0006812921  0.30      1.1279707  0.5927249  0.6352798
##   0.0006812921  0.35      1.2640211  0.5922248  0.6703494
##   0.0006812921  0.40      1.3198066  0.5911215  0.6882350
##   0.0006812921  0.45      1.4007788  0.5866049  0.7092787
##   0.0006812921  0.50      1.5459633  0.5857055  0.7505488
##   0.0006812921  0.55      1.7253898  0.5659194  0.8098939
##   0.0006812921  0.60      1.9251695  0.5428914  0.8706125
##   0.0006812921  0.65      2.1320154  0.5283738  0.9338194
##   0.0006812921  0.70      2.5801709  0.5157325  1.0629224
##   0.0006812921  0.75      2.9904224  0.5071097  1.1807943
##   0.0006812921  0.80      3.4046273  0.4995689  1.2992458
##   0.0006812921  0.85      3.8479433  0.4916187  1.4255705
##   0.0006812921  0.90      4.2521526  0.4825309  1.5404839
##   0.0006812921  0.95      4.6070903  0.4754359  1.6419168
##   0.0006812921  1.00      4.8609602  0.4695685  1.7152604
##   0.0010000000  0.05      0.7599721  0.5911200  0.6294868
##   0.0010000000  0.10      0.6637556  0.5900804  0.5419552
##   0.0010000000  0.15      0.6214868  0.6253447  0.5054438
##   0.0010000000  0.20      0.7379586  0.5982278  0.5347056
##   0.0010000000  0.25      0.8869150  0.5924181  0.5759340
##   0.0010000000  0.30      1.0803112  0.5929738  0.6237003
##   0.0010000000  0.35      1.2272001  0.5927309  0.6600795
##   0.0010000000  0.40      1.3105008  0.5917851  0.6850782
##   0.0010000000  0.45      1.3504455  0.5877953  0.6966131
##   0.0010000000  0.50      1.4888612  0.5859756  0.7344980
##   0.0010000000  0.55      1.6457073  0.5801174  0.7828400
##   0.0010000000  0.60      1.8342202  0.5540250  0.8425432
##   0.0010000000  0.65      2.0073859  0.5354154  0.8963138
##   0.0010000000  0.70      2.3566331  0.5231079  0.9976610
##   0.0010000000  0.75      2.7547298  0.5119583  1.1127957
##   0.0010000000  0.80      3.1252362  0.5044958  1.2195402
##   0.0010000000  0.85      3.5197194  0.4968957  1.3324019
##   0.0010000000  0.90      3.8953116  0.4885973  1.4398915
##   0.0010000000  0.95      4.2265954  0.4811643  1.5343714
##   0.0010000000  1.00      4.4543753  0.4751446  1.6007763
##   0.0014677993  0.05      0.7677525  0.5909378  0.6357168
##   0.0014677993  0.10      0.6603930  0.5950900  0.5434426
##   0.0014677993  0.15      0.6229387  0.6247682  0.5068955
##   0.0014677993  0.20      0.7055369  0.6006146  0.5259724
##   0.0014677993  0.25      0.8487366  0.5927871  0.5663255
##   0.0014677993  0.30      1.0297019  0.5928268  0.6116977
##   0.0014677993  0.35      1.1860839  0.5927101  0.6493698
##   0.0014677993  0.40      1.2856219  0.5921014  0.6774588
##   0.0014677993  0.45      1.3137390  0.5896117  0.6873126
##   0.0014677993  0.50      1.4258000  0.5865609  0.7164270
##   0.0014677993  0.55      1.5694890  0.5854626  0.7571547
##   0.0014677993  0.60      1.7373951  0.5668944  0.8124579
##   0.0014677993  0.65      1.9232701  0.5453975  0.8692585
##   0.0014677993  0.70      2.1351031  0.5302713  0.9332515
##   0.0014677993  0.75      2.5035619  0.5195426  1.0395878
##   0.0014677993  0.80      2.8483671  0.5100740  1.1398837
##   0.0014677993  0.85      3.1821066  0.5026790  1.2364870
##   0.0014677993  0.90      3.5298042  0.4954343  1.3365347
##   0.0014677993  0.95      3.8387931  0.4882212  1.4248221
##   0.0014677993  1.00      4.0418937  0.4821256  1.4840675
##   0.0021544347  0.05      0.7766459  0.5902713  0.6427716
##   0.0021544347  0.10      0.6604587  0.6013410  0.5466553
##   0.0021544347  0.15      0.6253381  0.6231990  0.5091199
##   0.0021544347  0.20      0.6711572  0.6055438  0.5168365
##   0.0021544347  0.25      0.8183788  0.5943489  0.5577391
##   0.0021544347  0.30      0.9767478  0.5922413  0.5990656
##   0.0021544347  0.35      1.1415316  0.5925104  0.6393178
##   0.0021544347  0.40      1.2598248  0.5923860  0.6694180
##   0.0021544347  0.45      1.3031538  0.5907757  0.6839480
##   0.0021544347  0.50      1.3557807  0.5877015  0.6976213
##   0.0021544347  0.55      1.4887915  0.5863173  0.7342103
##   0.0021544347  0.60      1.6326754  0.5812268  0.7780913
##   0.0021544347  0.65      1.8058375  0.5579280  0.8331019
##   0.0021544347  0.70      1.9638862  0.5396903  0.8821664
##   0.0021544347  0.75      2.2514684  0.5275111  0.9660834
##   0.0021544347  0.80      2.5680901  0.5177787  1.0582454
##   0.0021544347  0.85      2.8741733  0.5089987  1.1476743
##   0.0021544347  0.90      3.1672395  0.5023511  1.2330657
##   0.0021544347  0.95      3.4554398  0.4960623  1.3161608
##   0.0021544347  1.00      3.6439244  0.4900868  1.3713851
##   0.0031622777  0.05      0.7871146  0.5889321  0.6509113
##   0.0031622777  0.10      0.6665880  0.6040092  0.5524089
##   0.0031622777  0.15      0.6354424  0.6117612  0.5181184
##   0.0031622777  0.20      0.6409336  0.6139253  0.5095949
##   0.0031622777  0.25      0.7891829  0.5967348  0.5492342
##   0.0031622777  0.30      0.9239979  0.5917416  0.5864116
##   0.0031622777  0.35      1.0807635  0.5926094  0.6251195
##   0.0031622777  0.40      1.2055064  0.5925499  0.6544784
##   0.0031622777  0.45      1.2750134  0.5918375  0.6752335
##   0.0031622777  0.50      1.2995580  0.5896103  0.6834917
##   0.0031622777  0.55      1.4055117  0.5874439  0.7103415
##   0.0031622777  0.60      1.5372600  0.5862410  0.7478378
##   0.0031622777  0.65      1.6833105  0.5726352  0.7951041
##   0.0031622777  0.70      1.8478474  0.5520574  0.8457825
##   0.0031622777  0.75      2.0201860  0.5364148  0.8983168
##   0.0031622777  0.80      2.3085124  0.5262183  0.9820205
##   0.0031622777  0.85      2.5788630  0.5174551  1.0614049
##   0.0031622777  0.90      2.8428254  0.5099255  1.1389016
##   0.0031622777  0.95      3.0863311  0.5039009  1.2101578
##   0.0031622777  1.00      3.2735177  0.4985184  1.2652678
##   0.0046415888  0.05      0.7987549  0.5870544  0.6596716
##   0.0046415888  0.10      0.6773885  0.6014712  0.5623393
##   0.0046415888  0.15      0.6664055  0.5889346  0.5360607
##   0.0046415888  0.20      0.6201781  0.6266341  0.5038735
##   0.0046415888  0.25      0.7410746  0.5994122  0.5350586
##   0.0046415888  0.30      0.8663848  0.5934990  0.5709666
##   0.0046415888  0.35      1.0126305  0.5924462  0.6089403
##   0.0046415888  0.40      1.1486611  0.5927406  0.6416940
##   0.0046415888  0.45      1.2491482  0.5927497  0.6665067
##   0.0046415888  0.50      1.2817071  0.5910539  0.6777294
##   0.0046415888  0.55      1.3180805  0.5892896  0.6875967
##   0.0046415888  0.60      1.4428555  0.5876629  0.7205525
##   0.0046415888  0.65      1.5693314  0.5839316  0.7592508
##   0.0046415888  0.70      1.7159975  0.5659837  0.8055494
##   0.0046415888  0.75      1.8537713  0.5476407  0.8482717
##   0.0046415888  0.80      2.0769418  0.5340046  0.9138276
##   0.0046415888  0.85      2.3155139  0.5257192  0.9836522
##   0.0046415888  0.90      2.5455089  0.5185937  1.0516818
##   0.0046415888  0.95      2.7586280  0.5125274  1.1147546
##   0.0046415888  1.00      2.9355861  0.5069287  1.1667371
##   0.0068129207  0.05      0.8110334  0.5844760  0.6686486
##   0.0068129207  0.10      0.6896210  0.6000396  0.5724275
##   0.0068129207  0.15      0.6791438  0.5818832  0.5441548
##   0.0068129207  0.20      0.6225217  0.6251567  0.5064708
##   0.0068129207  0.25      0.6951920  0.6026596  0.5233475
##   0.0068129207  0.30      0.8135712  0.5963757  0.5561233
##   0.0068129207  0.35      0.9532286  0.5920444  0.5942386
##   0.0068129207  0.40      1.0882420  0.5928691  0.6277525
##   0.0068129207  0.45      1.1908371  0.5931549  0.6516214
##   0.0068129207  0.50      1.2478543  0.5927732  0.6669172
##   0.0068129207  0.55      1.2756479  0.5914294  0.6756553
##   0.0068129207  0.60      1.3486723  0.5897592  0.6943385
##   0.0068129207  0.65      1.4662560  0.5882797  0.7265769
##   0.0068129207  0.70      1.5927435  0.5782451  0.7683139
##   0.0068129207  0.75      1.7211350  0.5610837  0.8085330
##   0.0068129207  0.80      1.8682792  0.5452893  0.8531876
##   0.0068129207  0.85      2.0902243  0.5332969  0.9171962
##   0.0068129207  0.90      2.2917675  0.5262402  0.9765861
##   0.0068129207  0.95      2.4752899  0.5200652  1.0312661
##   0.0068129207  1.00      2.6293012  0.5149263  1.0770808
##   0.0100000000  0.05      0.8235625  0.5808799  0.6778438
##   0.0100000000  0.10      0.7039138  0.5977001  0.5834764
##   0.0100000000  0.15      0.6720808  0.5856118  0.5447896
##   0.0100000000  0.20      0.6252128  0.6234733  0.5097162
##   0.0100000000  0.25      0.6566324  0.6085207  0.5143750
##   0.0100000000  0.30      0.7665738  0.5988391  0.5419625
##   0.0100000000  0.35      0.8949871  0.5941002  0.5784361
##   0.0100000000  0.40      1.0149415  0.5926631  0.6101514
##   0.0100000000  0.45      1.1316063  0.5932771  0.6382064
##   0.0100000000  0.50      1.2152192  0.5932677  0.6582833
##   0.0100000000  0.55      1.2476214  0.5931308  0.6673950
##   0.0100000000  0.60      1.2744061  0.5918976  0.6755062
##   0.0100000000  0.65      1.3674575  0.5907183  0.6984125
##   0.0100000000  0.70      1.4829807  0.5834140  0.7353645
##   0.0100000000  0.75      1.6016470  0.5677970  0.7731160
##   0.0100000000  0.80      1.7166446  0.5524558  0.8085178
##   0.0100000000  0.85      1.8991815  0.5405114  0.8614339
##   0.0100000000  0.90      2.0803707  0.5320814  0.9142761
##   0.0100000000  0.95      2.2292327  0.5267601  0.9584236
##   0.0100000000  1.00      2.3515552  0.5222257  0.9951519
##   0.0146779927  0.05      0.8357810  0.5762259  0.6876136
##   0.0146779927  0.10      0.7189245  0.5954855  0.5948685
##   0.0146779927  0.15      0.6648247  0.5928743  0.5455902
##   0.0146779927  0.20      0.6324344  0.6156773  0.5170251
##   0.0146779927  0.25      0.6303733  0.6187771  0.5084701
##   0.0146779927  0.30      0.7250809  0.6002688  0.5311531
##   0.0146779927  0.35      0.8382764  0.5967256  0.5621737
##   0.0146779927  0.40      0.9549439  0.5937916  0.5940639
##   0.0146779927  0.45      1.0662940  0.5935159  0.6227861
##   0.0146779927  0.50      1.1536931  0.5941232  0.6434568
##   0.0146779927  0.55      1.2092620  0.5944517  0.6567778
##   0.0146779927  0.60      1.2440429  0.5939029  0.6666250
##   0.0146779927  0.65      1.2693721  0.5928124  0.6738525
##   0.0146779927  0.70      1.3782265  0.5849638  0.7053668
##   0.0146779927  0.75      1.4855205  0.5690733  0.7391371
##   0.0146779927  0.80      1.5921719  0.5562454  0.7714840
##   0.0146779927  0.85      1.7331492  0.5450088  0.8125727
##   0.0146779927  0.90      1.8875324  0.5365930  0.8569652
##   0.0146779927  0.95      2.0028235  0.5321471  0.8910178
##   0.0146779927  1.00      2.0996835  0.5286321  0.9201730
##   0.0215443469  0.05      0.8479110  0.5702861  0.6977428
##   0.0215443469  0.10      0.7344565  0.5934433  0.6074385
##   0.0215443469  0.15      0.6659116  0.6019422  0.5522435
##   0.0215443469  0.20      0.6774296  0.5849547  0.5407098
##   0.0215443469  0.25      0.6232403  0.6250442  0.5073381
##   0.0215443469  0.30      0.6934810  0.6012489  0.5244320
##   0.0215443469  0.35      0.7887192  0.5985100  0.5472854
##   0.0215443469  0.40      0.8870614  0.5960806  0.5750789
##   0.0215443469  0.45      0.9911017  0.5944007  0.6033821
##   0.0215443469  0.50      1.0910569  0.5942433  0.6287167
##   0.0215443469  0.55      1.1618349  0.5953146  0.6452715
##   0.0215443469  0.60      1.2004113  0.5957368  0.6544723
##   0.0215443469  0.65      1.2349998  0.5947028  0.6639866
##   0.0215443469  0.70      1.2680038  0.5852693  0.6778711
##   0.0215443469  0.75      1.3654598  0.5707406  0.7045911
##   0.0215443469  0.80      1.4641799  0.5581198  0.7336327
##   0.0215443469  0.85      1.5666186  0.5486686  0.7639793
##   0.0215443469  0.90      1.6902172  0.5416945  0.7990738
##   0.0215443469  0.95      1.7922766  0.5368844  0.8288359
##   0.0215443469  1.00      1.8726776  0.5340727  0.8523146
##   0.0316227766  0.05      0.8597827  0.5629103  0.7075396
##   0.0316227766  0.10      0.7506792  0.5913636  0.6206196
##   0.0316227766  0.15      0.6775730  0.6044448  0.5624449
##   0.0316227766  0.20      0.6775028  0.5846686  0.5442053
##   0.0316227766  0.25      0.6213627  0.6285109  0.5064400
##   0.0316227766  0.30      0.6569657  0.6060711  0.5171326
##   0.0316227766  0.35      0.7469450  0.5989678  0.5374627
##   0.0316227766  0.40      0.8324846  0.5984543  0.5587709
##   0.0316227766  0.45      0.9162689  0.5963467  0.5824947
##   0.0316227766  0.50      1.0142840  0.5953637  0.6091205
##   0.0316227766  0.55      1.0956619  0.5955207  0.6296820
##   0.0316227766  0.60      1.1444499  0.5966728  0.6404890
##   0.0316227766  0.65      1.1725941  0.5967191  0.6474761
##   0.0316227766  0.70      1.2034494  0.5889626  0.6598985
##   0.0316227766  0.75      1.2444456  0.5717283  0.6740658
##   0.0316227766  0.80      1.3311040  0.5603706  0.6966380
##   0.0316227766  0.85      1.4115116  0.5519664  0.7183602
##   0.0316227766  0.90      1.5078918  0.5460334  0.7461657
##   0.0316227766  0.95      1.6016818  0.5416709  0.7728913
##   0.0316227766  1.00      1.6709586  0.5386574  0.7928982
##   0.0464158883  0.05      0.8710600  0.5543198  0.7167080
##   0.0464158883  0.10      0.7670056  0.5898604  0.6342624
##   0.0464158883  0.15      0.6941294  0.6018546  0.5763423
##   0.0464158883  0.20      0.6668994  0.5915312  0.5440361
##   0.0464158883  0.25      0.6559089  0.5966230  0.5299599
##   0.0464158883  0.30      0.6244240  0.6238285  0.5079370
##   0.0464158883  0.35      0.7061429  0.5994013  0.5289703
##   0.0464158883  0.40      0.7814977  0.5993837  0.5449943
##   0.0464158883  0.45      0.8572125  0.5991528  0.5648728
##   0.0464158883  0.50      0.9308567  0.5981553  0.5853104
##   0.0464158883  0.55      1.0257555  0.5974395  0.6113205
##   0.0464158883  0.60      1.0730428  0.5972548  0.6233429
##   0.0464158883  0.65      1.1096910  0.5978645  0.6315256
##   0.0464158883  0.70      1.1393426  0.5935618  0.6416313
##   0.0464158883  0.75      1.1794944  0.5772407  0.6561125
##   0.0464158883  0.80      1.2110034  0.5627339  0.6664161
##   0.0464158883  0.85      1.2719598  0.5553363  0.6825228
##   0.0464158883  0.90      1.3456015  0.5500240  0.7013733
##   0.0464158883  0.95      1.4329031  0.5453880  0.7241749
##   0.0464158883  1.00      1.4953193  0.5426873  0.7415996
##   0.0681292069  0.05      0.8817037  0.5440658  0.7252979
##   0.0681292069  0.10      0.7835308  0.5875238  0.6476990
##   0.0681292069  0.15      0.7118640  0.5988115  0.5896119
##   0.0681292069  0.20      0.6659416  0.5999157  0.5507224
##   0.0681292069  0.25      0.6815841  0.5851085  0.5425247
##   0.0681292069  0.30      0.6238221  0.6240680  0.5111304
##   0.0681292069  0.35      0.6567414  0.6070657  0.5175289
##   0.0681292069  0.40      0.7402624  0.5990626  0.5366784
##   0.0681292069  0.45      0.8067196  0.6001459  0.5507772
##   0.0681292069  0.50      0.8714630  0.6004696  0.5678006
##   0.0681292069  0.55      0.9356576  0.6005043  0.5851993
##   0.0681292069  0.60      1.0118024  0.5995584  0.6065200
##   0.0681292069  0.65      1.0482727  0.5991260  0.6163057
##   0.0681292069  0.70      1.0829098  0.5946326  0.6281433
##   0.0681292069  0.75      1.1183520  0.5837514  0.6396675
##   0.0681292069  0.80      1.1525183  0.5700342  0.6507775
##   0.0681292069  0.85      1.1805433  0.5592896  0.6597225
##   0.0681292069  0.90      1.2123018  0.5534378  0.6680065
##   0.0681292069  0.95      1.2833723  0.5496998  0.6861547
##   0.0681292069  1.00      1.3458925  0.5465620  0.7023510
##   0.1000000000  0.05      0.8913186  0.5321494  0.7329343
##   0.1000000000  0.10      0.7994363  0.5846277  0.6602027
##   0.1000000000  0.15      0.7292866  0.5963155  0.6025383
##   0.1000000000  0.20      0.6754996  0.6057307  0.5617422
##   0.1000000000  0.25      0.6739722  0.5877633  0.5438605
##   0.1000000000  0.30      0.6531419  0.5996279  0.5280549
##   0.1000000000  0.35      0.6129639  0.6364291  0.4991091
##   0.1000000000  0.40      0.6804064  0.6035920  0.5232174
##   0.1000000000  0.45      0.7586248  0.6002633  0.5399929
##   0.1000000000  0.50      0.8263413  0.6011419  0.5553249
##   0.1000000000  0.55      0.8807686  0.6023221  0.5693816
##   0.1000000000  0.60      0.9428068  0.6016159  0.5870308
##   0.1000000000  0.65      0.9933360  0.6005464  0.6014912
##   0.1000000000  0.70      1.0303581  0.5979245  0.6143594
##   0.1000000000  0.75      1.0662874  0.5890715  0.6273687
##   0.1000000000  0.80      1.0960848  0.5771348  0.6374875
##   0.1000000000  0.85      1.1253582  0.5661179  0.6463700
##   0.1000000000  0.90      1.1471871  0.5586036  0.6528692
##   0.1000000000  0.95      1.1780382  0.5535541  0.6613094
##   0.1000000000  1.00      1.2217660  0.5506177  0.6723645
## 
## Rsquared was used to select the optimal model using the largest value.
## The final values used for the model were fraction = 0.35 and lambda = 0.1.
max(enet_model_chem$results$Rsquared)
## [1] 0.6364291

The Elastic Net model achieved optimal tuning parameters of fraction = 0.15 and lambda = 0.001. This resulted in a cross-validated training \(R^2\) of 0.6625444.

Part D

Predict the response for the test set. What is the value of the performance metric and how does this compare with the resampled performance metric on the training set?

# Predict on test set
test_pred <- predict(enet_model_chem, newdata = test_x)

# Evaluate test performance
postResample(pred = test_pred, obs = test_y)
##      RMSE  Rsquared       MAE 
## 0.6749481 0.4915518 0.5484792

The Elastic Net model achieved a test-set \(R^2\) of 0.4785406, RMSE of 0.7282641, and MAE of 0.5623523. Comparing to the resampled training \(R^2\) of 0.6625444, the test performance is slightly lower but still strong, indicating that the model generalizes well to unseen data.

Part E

Which predictors are most important in the model you have trained? Do either the biological or process predictors dominate the list?

enet_imp <- varImp(enet_model_chem, scale = TRUE)
plot(enet_imp, top = 20)

enet_imp
## loess r-squared variable importance
## 
##   only 20 most important variables shown (out of 57)
## 
##                        Overall
## ManufacturingProcess32  100.00
## BiologicalMaterial06     97.08
## ManufacturingProcess13   91.16
## BiologicalMaterial03     82.20
## ManufacturingProcess36   82.07
## ManufacturingProcess17   75.42
## ManufacturingProcess09   74.87
## BiologicalMaterial02     74.26
## ManufacturingProcess06   68.55
## ManufacturingProcess31   68.18
## BiologicalMaterial12     67.41
## BiologicalMaterial04     56.75
## ManufacturingProcess11   52.87
## BiologicalMaterial11     52.81
## ManufacturingProcess33   52.63
## BiologicalMaterial08     51.55
## ManufacturingProcess29   46.89
## BiologicalMaterial01     45.98
## ManufacturingProcess30   42.39
## ManufacturingProcess18   34.47

The top predictors in the Elastic Net model include both Manufacturing and Biological predictors, with the top predictor being ManufacturingProcess32, followed closely by BiologicalMaterial06 and ManufacturingProcess13. Among the top 10 predictors, 6 are manufacturing and 4 are biological. This suggests that both types of predictors are important to the predictive performance, but manufacturing processses slightly drive the performance.

Part F

Explore the relationships between each of the top predictors and the response. How could this information be helpful in improving yield in future runs of the manufacturing process?

library(tidyr)
library(ggplot2)

top_vars <- c("ManufacturingProcess32", "BiologicalMaterial06", "ManufacturingProcess13",
              "ManufacturingProcess17", "BiologicalMaterial02")

chem_long <- chemical_imp %>%
  select(Yield, all_of(top_vars)) %>%
  pivot_longer(
    cols = -Yield,
    names_to = "Predictor",
    values_to = "Value"
  )

# Facet plot
ggplot(chem_long, aes(x = Value, y = Yield)) +
  geom_point(alpha = 0.7) +
  geom_smooth(method = "lm", se = FALSE, color = "blue") +
  facet_wrap(~ Predictor, scales = "free_x") +
  theme_minimal() +
  labs(title = "Relationships Between Top Predictors and Yield",
       x = "Predictor",
       y = "Yield") +
  theme(plot.title = element_text(hjust = 0.5))

sapply(top_vars, function(x) cor(chemical_imp[[x]], chemical_imp$Yield, use = "complete.obs"))
## ManufacturingProcess32   BiologicalMaterial06 ManufacturingProcess13 
##              0.6083321              0.4781634             -0.5036797 
## ManufacturingProcess17   BiologicalMaterial02 
##             -0.4258069              0.4815158

The scatterplots show that BiologicalMaterial02, BiologicalMaterial06, and ManufacturingProcess32 have positive correlations, or yield increases with higher values of these predictors. ManufacturingProcess17 and ManufacturingProcess13 have negative correlations. These relationships indicate that both manufacturing processes and biological material are key drivers of yield, with process control offering the most opportunity for improvement in future runs since they are controllable parameters.