Problem

In a process development study on yield, four factors were studied, each at two levels: time (A), concentration (B), pressure (C), and temperature (D). The following data was provided:

Run Number Actual Run Order A B C D Yield (lbs)
1 5 - - - - 12
2 9 + - - - 18
3 8 - + - - 13
4 13 + + - - 16
5 3 - - + - 17
6 7 + - + - 15
7 14 - + + - 20
8 1 + + + - 15
9 6 - - - + 10
10 11 + - - + 25
11 2 - + - + 13
12 15 + + - + 24
13 4 - - + + 19
14 16 + - + + 21
15 10 - + + + 17
16 12 + + + + 23


a) Display the half-normal plot for this data and determine which factors appear to be significant.
b) Pull terms that do not appear to be significant into error and test for the significance of the other effects at the 0.05 level of significance.

Setup

Setup Libraries

# Setup Libraries
library(dplyr)
library(tidyr)
library(readr)
library(knitr)
library(agricolae)
library(lawstat)
library(car)
library(GAD)
library(BSDA)
library(pwr)
library(WebPower)
library(ggplot2)
library(ggfortify)
library(ggpubr)
library(SixSigma)
library(DoE.base)

Part (a)

Display the half-normal plot for this data and determine which factors appear to be significant.

factors <- expand.grid(A = gl(2, 1, labels = c(-1, 1)),
                       B = gl(2, 1, labels = c(-1, 1)),
                       C = gl(2, 1, labels = c(-1, 1)),
                       D = gl(2, 1, labels = c(-1, 1)))

runNum <- 1:16

runNumAct <- c(5,9,8,13,3,7,14,1,6,11,2,15,4,16,10,12)

response <- c(12,18,13,16,17,15,20,15,10,25,13,24,19,21,17,23)

dat <- data.frame(runNum,runNumAct,factors,response)

indx <- sapply(dat, is.factor)
dat[indx] <- lapply(dat[indx], function(x) as.numeric(as.character(x)))

#(a)
datmodel <- lm(response~A*B*C*D,data = dat)

halfnormal(datmodel)
## 
## Significant effects (alpha=0.05, Lenth method):
## [1] A   A:C A:D D


From the half-normal plot, we can see that the main effects A and D are significant, while the interactions of AC and AD are also significant to an alpha of 0.05.

Part (b)

Pull terms that do not appear to be significant into error and test for the significance of the other effects at the 0.05 level of significance.

We need to first run a model with all effects and interactions present. We get the following results when running a one-way ANOVA.

datmodel <- lm(response~A*B*C*D,data = dat)
dataov <- aov(datmodel)
summary(dataov)
##             Df Sum Sq Mean Sq
## A            1  81.00   81.00
## B            1   1.00    1.00
## C            1  16.00   16.00
## D            1  42.25   42.25
## A:B          1   2.25    2.25
## A:C          1  72.25   72.25
## B:C          1   0.25    0.25
## A:D          1  64.00   64.00
## B:D          1   0.00    0.00
## C:D          1   0.00    0.00
## A:B:C        1   4.00    4.00
## A:B:D        1   2.25    2.25
## A:C:D        1   0.25    0.25
## B:C:D        1   2.25    2.25
## A:B:C:D      1   4.00    4.00


Because the original half-normal plot did not show the interaction ABCD as significant, we remove that from the model and re-run the analysis.

datmodel <- lm(response~A+B+C+D+
                 A*B+
                 A*C+
                 B*C+
                 A*D+
                 B*D+
                 C*D+
                 A*B*C+
                 A*B*D+
                 A*C*D+
                 B*C*D
               ,data = dat)
dataov <- aov(datmodel)
summary(dataov)
##             Df Sum Sq Mean Sq F value Pr(>F)
## A            1  81.00   81.00  20.250  0.139
## B            1   1.00    1.00   0.250  0.705
## C            1  16.00   16.00   4.000  0.295
## D            1  42.25   42.25  10.562  0.190
## A:B          1   2.25    2.25   0.562  0.590
## A:C          1  72.25   72.25  18.062  0.147
## B:C          1   0.25    0.25   0.062  0.844
## A:D          1  64.00   64.00  16.000  0.156
## B:D          1   0.00    0.00   0.000  1.000
## C:D          1   0.00    0.00   0.000  1.000
## A:B:C        1   4.00    4.00   1.000  0.500
## A:B:D        1   2.25    2.25   0.562  0.590
## A:C:D        1   0.25    0.25   0.062  0.844
## B:C:D        1   2.25    2.25   0.562  0.590
## Residuals    1   4.00    4.00

We now remove all of the 3rd level interactions because of their high p-values and re-run the model.

datmodel <- lm(response~A+B+C+D+
                 A*B+
                 A*C+
                 B*C+
                 A*D+
                 B*D+
                 C*D
               ,data = dat)
dataov <- aov(datmodel)
summary(dataov)
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## A            1  81.00   81.00  31.765 0.00244 **
## B            1   1.00    1.00   0.392 0.55864   
## C            1  16.00   16.00   6.275 0.05416 . 
## D            1  42.25   42.25  16.569 0.00963 **
## A:B          1   2.25    2.25   0.882 0.39068   
## A:C          1  72.25   72.25  28.333 0.00313 **
## B:C          1   0.25    0.25   0.098 0.76684   
## A:D          1  64.00   64.00  25.098 0.00407 **
## B:D          1   0.00    0.00   0.000 1.00000   
## C:D          1   0.00    0.00   0.000 1.00000   
## Residuals    5  12.75    2.55                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1


We now see that both the CD and BD interactions are not significant so we can remove them and re-run the model.

datmodel <- lm(response~A+B+C+D+
                 A*B+
                 A*C+
                 B*C+
                 A*D
               ,data = dat)
dataov <- aov(datmodel)
summary(dataov)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## A            1  81.00   81.00  44.471 0.000286 ***
## B            1   1.00    1.00   0.549 0.482829    
## C            1  16.00   16.00   8.784 0.020991 *  
## D            1  42.25   42.25  23.196 0.001930 ** 
## A:B          1   2.25    2.25   1.235 0.303091    
## A:C          1  72.25   72.25  39.667 0.000405 ***
## B:C          1   0.25    0.25   0.137 0.721982    
## A:D          1  64.00   64.00  35.137 0.000583 ***
## Residuals    7  12.75    1.82                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

We now see that BC is insignificant, so we remove it and and re-run the model.

datmodel <- lm(response~A+B+C+D+
                 A*B+
                 A*C+
                 A*D
               ,data = dat)
dataov <- aov(datmodel)
summary(dataov)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## A            1  81.00   81.00  49.846 0.000106 ***
## B            1   1.00    1.00   0.615 0.455366    
## C            1  16.00   16.00   9.846 0.013850 *  
## D            1  42.25   42.25  26.000 0.000931 ***
## A:B          1   2.25    2.25   1.385 0.273139    
## A:C          1  72.25   72.25  44.462 0.000158 ***
## A:D          1  64.00   64.00  39.385 0.000239 ***
## Residuals    8  13.00    1.62                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

We now see that AB is insignificant, so we remove it and and re-run the model.

datmodel <- lm(response~A+B+C+D+
                 A*C+
                 A*D
               ,data = dat)
dataov <- aov(datmodel)
summary(dataov)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## A            1  81.00   81.00  47.803 6.96e-05 ***
## B            1   1.00    1.00   0.590 0.462036    
## C            1  16.00   16.00   9.443 0.013292 *  
## D            1  42.25   42.25  24.934 0.000746 ***
## A:C          1  72.25   72.25  42.639 0.000108 ***
## A:D          1  64.00   64.00  37.770 0.000170 ***
## Residuals    9  15.25    1.69                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

We keep the interactions AC and AD because they are significant and now look at the main effects in the model. Because B is not significant to an alpha of 0.05 and it isn’t in the interaction terms we’ve kept, we remove B from the model.

datmodel <- lm(response~A+C+D+
                 A*C+
                 A*D
               ,data = dat)
dataov <- aov(datmodel)
summary(dataov)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## A            1  81.00   81.00  49.846 3.46e-05 ***
## C            1  16.00   16.00   9.846 0.010549 *  
## D            1  42.25   42.25  26.000 0.000465 ***
## A:C          1  72.25   72.25  44.462 5.58e-05 ***
## A:D          1  64.00   64.00  39.385 9.19e-05 ***
## Residuals   10  16.25    1.62                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

We now stop as all of the factors and interactions are significant and we have a hierarchical model with all lower level terms of the significant interactions still in the model. We can now check for model adequacy by looking at the residual plots.

autoplot(dataov)

We find that the residuals are both normal and of equal variance so we find the model has met the adequacy criteria.

Source Code

# Setup Libraries
library(dplyr)
library(tidyr)
library(readr)
library(knitr)
library(agricolae)
library(lawstat)
library(car)
library(GAD)
library(BSDA)
library(pwr)
library(WebPower)
library(ggplot2)
library(ggfortify)
library(ggpubr)
library(SixSigma)
library(DoE.base)

factors <- expand.grid(A = gl(2, 1, labels = c(-1, 1)),
                       B = gl(2, 1, labels = c(-1, 1)),
                       C = gl(2, 1, labels = c(-1, 1)),
                       D = gl(2, 1, labels = c(-1, 1)))

runNum <- 1:16

runNumAct <- c(5,9,8,13,3,7,14,1,6,11,2,15,4,16,10,12)

response <- c(12,18,13,16,17,15,20,15,10,25,13,24,19,21,17,23)

dat <- data.frame(runNum,runNumAct,factors,response)

indx <- sapply(dat, is.factor)
dat[indx] <- lapply(dat[indx], function(x) as.numeric(as.character(x)))

#(a)
datmodel <- lm(response~A*B*C*D,data = dat)

halfnormal(datmodel)

datmodel <- lm(response~A*B*C*D,data = dat)
dataov <- aov(datmodel)
summary(dataov)

datmodel <- lm(response~A+B+C+D+
                 A*B+
                 A*C+
                 B*C+
                 A*D+
                 B*D+
                 C*D+
                 A*B*C+
                 A*B*D+
                 A*C*D+
                 B*C*D
               ,data = dat)
dataov <- aov(datmodel)
summary(dataov)

datmodel <- lm(response~A+B+C+D+
                 A*B+
                 A*C+
                 B*C+
                 A*D+
                 B*D+
                 C*D
               ,data = dat)
dataov <- aov(datmodel)
summary(dataov)

datmodel <- lm(response~A+B+C+D+
                 A*B+
                 A*C+
                 B*C+
                 A*D
               ,data = dat)
dataov <- aov(datmodel)
summary(dataov)

datmodel <- lm(response~A+B+C+D+
                 A*B+
                 A*C+
                 A*D
               ,data = dat)
dataov <- aov(datmodel)
summary(dataov)

datmodel <- lm(response~A+B+C+D+
                 A*C+
                 A*D
               ,data = dat)
dataov <- aov(datmodel)
summary(dataov)

datmodel <- lm(response~A+C+D+
                 A*C+
                 A*D
               ,data = dat)
dataov <- aov(datmodel)
summary(dataov)

autoplot(dataov)