Data 624 Homework 8

Textbook: Max Kuhn and Kjell Johnson. Applied Predictive Modeling. Springer, New York, 2013.

#Load packages
library(tidyverse)
library(AppliedPredictiveModeling)
library(mlbench)
library(caret)
library(kableExtra)

Exercise 7.2

Friedman (1991) introduced several benchmark data sets create by simulation. One of these simulations used the following nonlinear equation to create data:

\[y = 10 sin(\pi x_1x_2) + 20(x_3 − 0.5)^2 + 10x_4 + 5x_5 + N(0, \sigma^2)\]

where the x values are random variables uniformly distributed between [0, 1] (there are also 5 other non-informative variables also created in the simulation). The package mlbench contains a function called mlbench.friedman1 that simulates these data:

set.seed(200)
trainingData = mlbench.friedman1(200, sd = 1)
## We convert the 'x' data from a matrix to a data frame
## One reason is that this will give the columns names.
trainingData$x = data.frame(trainingData$x)
## Look at the data using
featurePlot(trainingData$x, trainingData$y)

## or other methods.

## This creates a list with a vector 'y' and a matrix
## of predictors 'x'. Also simulate a large test set to
## estimate the true error rate with good precision:
testData = mlbench.friedman1(5000, sd = 1)
testData$x = data.frame(testData$x)

## or other methods.

## This creates a list with a vector 'y' and a matrix
## of predictors 'x'. Also simulate a large test set to
## estimate the true error rate with good precision:
testData <- mlbench.friedman1(5000, sd = 1)
testData$x <- data.frame(testData$x)

Tune several models on these data. For example:

kNN Model

set.seed(522)
knn_model <- train(trainingData$x, trainingData$y,
                   method="knn", preProc = c("center", "scale"),
                   tuneLength = 10,
                   trControl = trainControl(method="cv"))

knn_model

## k-Nearest Neighbors 
## 
## 200 samples
##  10 predictor
## 
## Pre-processing: centered (10), scaled (10) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 180, 180, 180, 180, 180, 180, ... 
## Resampling results across tuning parameters:
## 
##   k   RMSE      Rsquared   MAE     
##    5  3.228481  0.6036980  2.673649
##    7  3.178096  0.6423871  2.584947
##    9  3.104596  0.6860325  2.523957
##   11  3.076279  0.7050698  2.500496
##   13  3.143129  0.6954467  2.534185
##   15  3.150320  0.7049198  2.548329
##   17  3.166626  0.7156530  2.559601
##   19  3.114165  0.7416197  2.527780
##   21  3.187294  0.7253828  2.589612
##   23  3.205232  0.7244692  2.611535
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 11.

knn_model$bestTune

##    k
## 4 11

#Predict
knn_pred <- predict(knn_model, testData$x) 

#Performance using postResample
knn_PR <- postResample(knn_pred, testData$y)
knn_PR

##      RMSE  Rsquared       MAE 
## 3.0977783 0.6726827 2.4817458

We will do SVM, Mars and Neural Network models

SVM Model

#SVM Model
set.seed(523)
svm_model <- train(trainingData$x, trainingData$y,
                   method='svmRadial', preProc = c('center', 'scale'),
                   tuneLength = 10,
                   trControl = trainControl(method="cv"))

svm_model

## Support Vector Machines with Radial Basis Function Kernel 
## 
## 200 samples
##  10 predictor
## 
## Pre-processing: centered (10), scaled (10) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 180, 180, 180, 180, 180, 180, ... 
## Resampling results across tuning parameters:
## 
##   C       RMSE      Rsquared   MAE     
##     0.25  2.497214  0.7962809  1.994198
##     0.50  2.253418  0.8142203  1.781069
##     1.00  2.074618  0.8356609  1.642539
##     2.00  1.977957  0.8483550  1.550495
##     4.00  1.908320  0.8566316  1.510192
##     8.00  1.894891  0.8598874  1.508390
##    16.00  1.897449  0.8606028  1.514757
##    32.00  1.896460  0.8608272  1.513806
##    64.00  1.896460  0.8608272  1.513806
##   128.00  1.896460  0.8608272  1.513806
## 
## Tuning parameter 'sigma' was held constant at a value of 0.06321079
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were sigma = 0.06321079 and C = 8.

svm_model$bestTune

##        sigma C
## 6 0.06321079 8

#Predict
svm_pred <- predict(svm_model, testData$x) 

#Performance using postResample
svm_PR <- postResample(svm_pred, testData$y)
svm_PR

##      RMSE  Rsquared       MAE 
## 2.0116240 0.8341343 1.5603945

Multivariate Adaptive Regression Spline (MARS)

#MARS
grid <- expand.grid(.degree = 1:2, .nprune = 2:38) 
mars_model = train(x = trainingData$x, 
                  y = trainingData$y, 
                  method = 'earth', 
                  tuneGrid = grid, 
                  trControl = trainControl(method = 'cv', 
                                           number = 10))

## Loading required package: earth

## Loading required package: Formula

## Loading required package: plotmo

## Loading required package: plotrix

## Loading required package: TeachingDemos

mars_model

## Multivariate Adaptive Regression Spline 
## 
## 200 samples
##  10 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 180, 180, 180, 180, 180, 180, ... 
## Resampling results across tuning parameters:
## 
##   degree  nprune  RMSE      Rsquared   MAE      
##   1        2      4.450919  0.2143516  3.6734248
##   1        3      3.830815  0.4088022  3.0984800
##   1        4      2.675243  0.7165022  2.1280381
##   1        5      2.365007  0.7816522  1.8525112
##   1        6      2.289095  0.7924202  1.8212124
##   1        7      1.789719  0.8713738  1.4351452
##   1        8      1.694092  0.8901055  1.3234340
##   1        9      1.643706  0.8977664  1.2811407
##   1       10      1.646732  0.8968699  1.2752174
##   1       11      1.607833  0.9021749  1.2575943
##   1       12      1.579022  0.9040028  1.2270389
##   1       13      1.647705  0.8946842  1.2809962
##   1       14      1.643119  0.8958301  1.2696374
##   1       15      1.630169  0.8980775  1.2668929
##   1       16      1.630169  0.8980775  1.2668929
##   1       17      1.630169  0.8980775  1.2668929
##   1       18      1.630169  0.8980775  1.2668929
##   1       19      1.630169  0.8980775  1.2668929
##   1       20      1.630169  0.8980775  1.2668929
##   1       21      1.630169  0.8980775  1.2668929
##   1       22      1.630169  0.8980775  1.2668929
##   1       23      1.630169  0.8980775  1.2668929
##   1       24      1.630169  0.8980775  1.2668929
##   1       25      1.630169  0.8980775  1.2668929
##   1       26      1.630169  0.8980775  1.2668929
##   1       27      1.630169  0.8980775  1.2668929
##   1       28      1.630169  0.8980775  1.2668929
##   1       29      1.630169  0.8980775  1.2668929
##   1       30      1.630169  0.8980775  1.2668929
##   1       31      1.630169  0.8980775  1.2668929
##   1       32      1.630169  0.8980775  1.2668929
##   1       33      1.630169  0.8980775  1.2668929
##   1       34      1.630169  0.8980775  1.2668929
##   1       35      1.630169  0.8980775  1.2668929
##   1       36      1.630169  0.8980775  1.2668929
##   1       37      1.630169  0.8980775  1.2668929
##   1       38      1.630169  0.8980775  1.2668929
##   2        2      4.450919  0.2143516  3.6734248
##   2        3      3.830815  0.4088022  3.0984800
##   2        4      2.675243  0.7165022  2.1280381
##   2        5      2.362650  0.7821262  1.8469646
##   2        6      2.353512  0.7788763  1.8558599
##   2        7      1.836903  0.8667230  1.4606448
##   2        8      1.695449  0.8911044  1.2703273
##   2        9      1.480061  0.9148698  1.1322960
##   2       10      1.384976  0.9286363  1.0674459
##   2       11      1.348503  0.9316396  1.0331269
##   2       12      1.283040  0.9384212  0.9921473
##   2       13      1.271339  0.9389152  0.9850019
##   2       14      1.234497  0.9421385  0.9688186
##   2       15      1.252860  0.9394388  0.9817668
##   2       16      1.265204  0.9382027  0.9871744
##   2       17      1.256888  0.9392198  0.9792562
##   2       18      1.256888  0.9392198  0.9792562
##   2       19      1.256888  0.9392198  0.9792562
##   2       20      1.256888  0.9392198  0.9792562
##   2       21      1.256888  0.9392198  0.9792562
##   2       22      1.256888  0.9392198  0.9792562
##   2       23      1.256888  0.9392198  0.9792562
##   2       24      1.256888  0.9392198  0.9792562
##   2       25      1.256888  0.9392198  0.9792562
##   2       26      1.256888  0.9392198  0.9792562
##   2       27      1.256888  0.9392198  0.9792562
##   2       28      1.256888  0.9392198  0.9792562
##   2       29      1.256888  0.9392198  0.9792562
##   2       30      1.256888  0.9392198  0.9792562
##   2       31      1.256888  0.9392198  0.9792562
##   2       32      1.256888  0.9392198  0.9792562
##   2       33      1.256888  0.9392198  0.9792562
##   2       34      1.256888  0.9392198  0.9792562
##   2       35      1.256888  0.9392198  0.9792562
##   2       36      1.256888  0.9392198  0.9792562
##   2       37      1.256888  0.9392198  0.9792562
##   2       38      1.256888  0.9392198  0.9792562
## 
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were nprune = 14 and degree = 2.

mars_model$bestTune

##    nprune degree
## 50     14      2

#Predict
mars_pred <- predict(mars_model, testData$x) 

#Performance using postResample
mars_PR <- postResample(mars_pred, testData$y)
mars_PR

##      RMSE  Rsquared       MAE 
## 1.1409847 0.9468881 0.9050118

Neural network

set.seed(524)
#Neural network
neural_grid <- expand.grid(.decay=c(0, 0.01, 0.1, 0.5, 0.9),
                        .size=c(1, 10, 15, 20),
                        .bag=FALSE)

nnet_model <- train(x = trainingData$x,
                   y = trainingData$y,
                   method = "avNNet",
                   tuneGrid = neural_grid,
                   preProc = c("center", "scale"),
                   trace=FALSE,
                   linout=TRUE,
                   maxit=500)

## Warning: executing %dopar% sequentially: no parallel backend registered

nnet_model

## Model Averaged Neural Network 
## 
## 200 samples
##  10 predictor
## 
## Pre-processing: centered (10), scaled (10) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 200, 200, 200, 200, 200, 200, ... 
## Resampling results across tuning parameters:
## 
##   decay  size  RMSE      Rsquared   MAE     
##   0.00    1    2.549738  0.7509075  2.004835
##   0.00   10    2.866368  0.7013522  2.213792
##   0.00   15    2.621844  0.7313389  2.079292
##   0.00   20    2.531681  0.7514466  2.006906
##   0.01    1    2.510233  0.7547375  1.947588
##   0.01   10    2.715290  0.7178671  2.153340
##   0.01   15    2.382769  0.7771917  1.873671
##   0.01   20    2.242542  0.8020937  1.762573
##   0.10    1    2.497886  0.7559208  1.935479
##   0.10   10    2.467482  0.7646477  1.951694
##   0.10   15    2.227135  0.8046949  1.747027
##   0.10   20    2.169068  0.8144227  1.691132
##   0.50    1    2.535862  0.7491868  1.966092
##   0.50   10    2.253272  0.7999209  1.773413
##   0.50   15    2.165550  0.8144580  1.680169
##   0.50   20    2.166531  0.8144850  1.684386
##   0.90    1    2.565883  0.7440710  1.991535
##   0.90   10    2.216199  0.8057737  1.731018
##   0.90   15    2.166214  0.8145014  1.686501
##   0.90   20    2.165934  0.8146613  1.676838
## 
## Tuning parameter 'bag' was held constant at a value of FALSE
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were size = 15, decay = 0.5 and bag = FALSE.

nnet_model$bestTune

##    size decay   bag
## 15   15   0.5 FALSE

#Predict
nnet_pred <- predict(nnet_model, testData$x) 

#Performance using postResample
nnet_PR <- postResample(nnet_pred, testData$y)
nnet_PR

##      RMSE  Rsquared       MAE 
## 1.9049449 0.8523567 1.4737367

Which models appear to give the best performance? Does MARS select the informative predictors (those named X1–X5)?

comp<-data.frame(rbind(knn_PR,svm_PR, mars_PR, nnet_PR ))
comp %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed"), 
                full_width = F)

	RMSE	Rsquared	MAE
knn_PR	3.097778	0.6726827	2.4817458
svm_PR	2.011624	0.8341343	1.5603945
mars_PR	1.140985	0.9468881	0.9050118
nnet_PR	1.904945	0.8523567	1.4737367

From the table above, Mars has the highest \(r^2\) and explains the highest portion of the variability with X1-X5 informative predictors. The RMSE and MAE are also lowest for the Mars model.

varImp(mars_model)

## earth variable importance
## 
##    Overall
## X1  100.00
## X4   75.24
## X2   48.74
## X5   15.53
## X3    0.00

MARS did select the informative predictors with X1 as the most important variable and X3 as the least important variable.

Exercise 7.3

Exercise 6.3 describes data for a chemical manufacturing process. Use the same data imputation, data splitting, and pre-processing steps as before and train several nonlinear regression models.

data(ChemicalManufacturingProcess)

The matrix process Predictors contains the 57 predictors (12 describing the input biological material and 45 describing the process predictors) for the 176 manufacturing runs. yield contains the percent yield for each run.

Impute missing values

pre_process <-preProcess(ChemicalManufacturingProcess[, -c(1)], method = "knnImpute")
chemical_imp <- predict(pre_process, ChemicalManufacturingProcess[, -c(1)])

Remove pairs with correlation abobe 0.90

correlations <- cor(chemical_imp)
highCorr <- findCorrelation(correlations, cutoff = .9)
chemical_imp <- chemical_imp[, -highCorr]

Remove near zero variance

near0 <- nearZeroVar(chemical_imp)
chemical_imp <- chemical_imp[,-near0]

Split

set.seed(420)
trainRow <- createDataPartition(ChemicalManufacturingProcess$Yield, p=0.8, list=FALSE)
train_X <- chemical_imp[trainRow, ]
train_y <- ChemicalManufacturingProcess$Yield[trainRow]
test_X <- chemical_imp[-trainRow, ]
test_y <- ChemicalManufacturingProcess$Yield[-trainRow]

kNN model

set.seed(421)
knn_model <- train(x = train_X,
                  y = train_y,
                  method = "knn",
                  preProc = c("center", "scale"),
                  tuneLength = 10)

knn_model

## k-Nearest Neighbors 
## 
## 144 samples
##  46 predictor
## 
## Pre-processing: centered (46), scaled (46) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 144, 144, 144, 144, 144, 144, ... 
## Resampling results across tuning parameters:
## 
##   k   RMSE      Rsquared   MAE     
##    5  1.409536  0.3776307  1.137217
##    7  1.377913  0.3997948  1.111391
##    9  1.354392  0.4181553  1.096218
##   11  1.346958  0.4284230  1.101977
##   13  1.330118  0.4492008  1.092717
##   15  1.336551  0.4447446  1.103701
##   17  1.343605  0.4423617  1.107250
##   19  1.346562  0.4434431  1.109906
##   21  1.351739  0.4440656  1.112934
##   23  1.355510  0.4488470  1.117200
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 13.

MARS

mars_grid <- expand.grid(.degree=1:2,
                        .nprune=2:10)
set.seed(1)
mars_model <- train(x = train_X,
                   y = train_y,
                   method = "earth",
                   tuneGrid = mars_grid,
                   preProc = c("center", "scale"))

mars_model

## Multivariate Adaptive Regression Spline 
## 
## 144 samples
##  46 predictor
## 
## Pre-processing: centered (46), scaled (46) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 144, 144, 144, 144, 144, 144, ... 
## Resampling results across tuning parameters:
## 
##   degree  nprune  RMSE      Rsquared   MAE     
##   1        2      1.474240  0.3910106  1.172063
##   1        3      1.460509  0.4932852  1.069873
##   1        4      1.295495  0.5303977  1.018799
##   1        5      1.466921  0.5071752  1.060716
##   1        6      1.468971  0.5102403  1.061393
##   1        7      1.496935  0.5110456  1.058454
##   1        8      1.530576  0.5043556  1.070192
##   1        9      1.567819  0.4944717  1.091810
##   1       10      1.585640  0.4868699  1.103259
##   2        2      1.474240  0.3910106  1.172063
##   2        3      1.375036  0.4630789  1.084824
##   2        4      1.366646  0.4916712  1.074731
##   2        5      1.369570  0.4907674  1.072225
##   2        6      1.488398  0.4376590  1.132782
##   2        7      1.480439  0.4498945  1.124263
##   2        8      1.512448  0.4337287  1.144685
##   2        9      1.511464  0.4358332  1.139535
##   2       10      1.551652  0.4223434  1.160382
## 
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were nprune = 4 and degree = 1.

SVM

set.seed(424)
svm_model <- train(x = train_X,
                        y = train_y,
                        method = "svmRadial",
                        tuneLength=10,
                        preProc = c("center", "scale"))
svm_model

## Support Vector Machines with Radial Basis Function Kernel 
## 
## 144 samples
##  46 predictor
## 
## Pre-processing: centered (46), scaled (46) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 144, 144, 144, 144, 144, 144, ... 
## Resampling results across tuning parameters:
## 
##   C       RMSE      Rsquared   MAE      
##     0.25  1.381268  0.4446768  1.1241875
##     0.50  1.315707  0.4698946  1.0646536
##     1.00  1.267987  0.4947014  1.0240494
##     2.00  1.244078  0.5110422  1.0075866
##     4.00  1.231350  0.5211678  0.9965781
##     8.00  1.222248  0.5273994  0.9877924
##    16.00  1.221956  0.5276342  0.9875321
##    32.00  1.221956  0.5276342  0.9875321
##    64.00  1.221956  0.5276342  0.9875321
##   128.00  1.221956  0.5276342  0.9875321
## 
## Tuning parameter 'sigma' was held constant at a value of 0.01645132
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were sigma = 0.01645132 and C = 16.

Neural Network

set.seed(425)
#Neural Network
neural_grid <- expand.grid(.decay=c(0, 0.01, 0.1),
                        .size=c(1, 10, 15, 20),
                        .bag=FALSE)

nnet_model <- train(x = train_X,
                        y = train_y,
                   method = "avNNet",
                   tuneGrid = neural_grid,
                   preProc = c("center", "scale"),
                   trace=FALSE,
                   linout=TRUE,
                   maxit=500)
nnet_model

## Model Averaged Neural Network 
## 
## 144 samples
##  46 predictor
## 
## Pre-processing: centered (46), scaled (46) 
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 144, 144, 144, 144, 144, 144, ... 
## Resampling results across tuning parameters:
## 
##   decay  size  RMSE       Rsquared    MAE     
##   0.00    1     1.551880  0.32561407  1.267736
##   0.00   10    11.402856  0.06924055  6.467790
##   0.00   15     3.245216  0.13853026  2.196686
##   0.00   20     2.169981  0.22106320  1.637850
##   0.01    1     1.523509  0.40201287  1.203582
##   0.01   10     2.832644  0.20785194  2.025772
##   0.01   15     2.303925  0.24294869  1.639688
##   0.01   20     2.191011  0.22904556  1.557309
##   0.10    1     1.729906  0.36008637  1.272695
##   0.10   10     1.894456  0.27469643  1.436813
##   0.10   15     2.113697  0.26317723  1.409187
##   0.10   20     2.018267  0.27793686  1.394605
## 
## Tuning parameter 'bag' was held constant at a value of FALSE
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were size = 1, decay = 0.01 and bag = FALSE.

(a) Which non-linear regression model gives the optimal resampling and test set performance?

#Predict
svm_pred <- predict(svm_model, test_X)
mars_pred <- predict(mars_model, test_X)
knn_pred <- predict(knn_model, test_X)
nnet_pred <- predict(nnet_model, test_X)

#Performance of test data
svm_rs <- postResample(svm_pred, test_y)
mars_rs <- postResample(mars_pred, test_y)
knn_rs <- postResample(knn_pred, test_y)
nnet_rs <- postResample(nnet_pred, test_y)

comp1 <- data.frame(rbind(svm_rs, mars_rs, knn_rs, nnet_rs))
comp1 %>% kable() %>% kable_styling()

	RMSE	Rsquared	MAE
svm_rs	1.114729	0.7043145	0.9095641
mars_rs	1.243172	0.6172914	0.9682900
knn_rs	1.643258	0.3571572	1.2951116
nnet_rs	1.602106	0.3688266	1.2949973

It looks like the SVM model with an \(r^2\) of 70% and lowest RMSE and MAE is the optimal model.

(b) Which predictors are most important in the optimal nonlinear regression model? Do either the biological or process variables dominate the list? How do the top ten important predictors compare to the top ten predictors from the optimal linear model?

(svm_imp = varImp(svm_model))

## loess r-squared variable importance
## 
##   only 20 most important variables shown (out of 46)
## 
##                        Overall
## ManufacturingProcess32  100.00
## BiologicalMaterial06     97.23
## ManufacturingProcess13   85.70
## BiologicalMaterial03     81.56
## ManufacturingProcess36   76.57
## ManufacturingProcess17   63.98
## ManufacturingProcess09   62.34
## ManufacturingProcess33   51.74
## BiologicalMaterial08     49.36
## BiologicalMaterial01     48.71
## BiologicalMaterial11     45.32
## ManufacturingProcess06   43.50
## ManufacturingProcess11   41.74
## ManufacturingProcess02   39.72
## BiologicalMaterial09     34.20
## ManufacturingProcess30   32.47
## ManufacturingProcess20   29.05
## ManufacturingProcess12   28.82
## ManufacturingProcess35   24.44
## ManufacturingProcess10   23.52

model_pls <- train(x = train_X, y = train_y, method='pls', metric='RMSE',
                   tuneLength=20, trControl = trainControl(method='cv'))
(pls_imp = varImp(model_pls))

## pls variable importance
## 
##   only 20 most important variables shown (out of 46)
## 
##                        Overall
## ManufacturingProcess32  100.00
## ManufacturingProcess36   82.94
## ManufacturingProcess13   77.78
## ManufacturingProcess09   71.26
## BiologicalMaterial06     67.87
## ManufacturingProcess17   65.54
## ManufacturingProcess33   63.92
## BiologicalMaterial03     63.18
## BiologicalMaterial08     62.64
## BiologicalMaterial01     57.74
## BiologicalMaterial11     52.39
## ManufacturingProcess06   52.29
## ManufacturingProcess11   51.33
## ManufacturingProcess12   47.71
## ManufacturingProcess28   46.37
## ManufacturingProcess04   42.18
## ManufacturingProcess02   38.43
## BiologicalMaterial10     36.00
## ManufacturingProcess10   35.36
## ManufacturingProcess30   29.08

p1<-plot(svm_imp, top=10, main='SVM')
p2<-plot(pls_imp, top=10, main='PLS')
gridExtra::grid.arrange(p1, p2, ncol = 2)

ManufacturingProcess32 dominates both the models. There are 2 biological processes in the top 5 compared to only 1 in the PLS model. Even though the rank of the variables have changed, the top ten list contains the same predictors.

(c) Explore the relationships between the top predictors and the response for the predictors that are unique to the optimal nonlinear regression model. Do these plots reveal intuition about the biological or process predictors and their relationship with yield?

temp <- svm_imp$importance
temp$predictor <- row.names(temp)
temp <- temp[order(temp$Overall, decreasing = TRUE),]
temp_v = row.names(temp[1:10,])

par(mfrow = c(5,2))

for (i in 1:10){
  x = ChemicalManufacturingProcess[,temp_v[i]]
  y = ChemicalManufacturingProcess$Yield
  plot(x, y, xlab = temp_v[i], ylab = 'Yield')
  abline(lm(y~x))
}

The plots show linear relationships between the biological and process predictors and yield.