2^k design Homework 11

Question 6.8

H_o: \(\alpha_{i} = 0\) - Null Hypothesis

H_a: \(\alpha_{i} \ne 0\) - Alternative Hypothesis

H_o: \(\beta_{j} = 0\) - Null Hypothesis

H_a: \(\beta_{j} \ne 0\) - Alternative Hypothesis

H_o: \(\alpha \beta_{ij} = 0\) - Null Hypothesis

H_a: \(\alpha \beta_{ij} \ne 0\) - Alternative Hypothesis

Model Equation

\(y_{ijk} = \mu + \alpha_{i} + \beta_j + \alpha \beta_{ij} + \epsilon_{ijk}\)

time<-c(rep(1,12),rep(2,12))
culture<-c(rep(1:2, each = 6),rep(1:2, each = 6))
response1<-c(21,22,23,28,20,26,25,26,24,25,29,27,37,39,38,38,35,36,31,34,29,33,30,35)
#length(response)
data.frame(culture,time,response1)

##    culture time response1
## 1        1    1        21
## 2        1    1        22
## 3        1    1        23
## 4        1    1        28
## 5        1    1        20
## 6        1    1        26
## 7        2    1        25
## 8        2    1        26
## 9        2    1        24
## 10       2    1        25
## 11       2    1        29
## 12       2    1        27
## 13       1    2        37
## 14       1    2        39
## 15       1    2        38
## 16       1    2        38
## 17       1    2        35
## 18       1    2        36
## 19       2    2        31
## 20       2    2        34
## 21       2    2        29
## 22       2    2        33
## 23       2    2        30
## 24       2    2        35

culture<-as.factor(culture)
time<-as.factor(time)
m<-aov(response1~culture*time)
summary(m)

##              Df Sum Sq Mean Sq F value   Pr(>F)    
## culture       1    9.4     9.4   1.835 0.190617    
## time          1  590.0   590.0 115.506 9.29e-10 ***
## culture:time  1   92.0    92.0  18.018 0.000397 ***
## Residuals    20  102.2     5.1                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

From the result values of “Prob > F” less than 0.0500 indicate model terms are significant. In this case time (B) and interaction (AB) are significant model terms

plot(m)

There is nothing unusual about the plots, Normal plot appears to be on a straight line with 1 extreme value at the tail end of the data distribution. Other than that, everything is fairly normal

interaction.plot(culture,time,response1,fun="mean",type = "b", col = 5:7, main ="Interraction Plot", ylab = "Virus response", xlab = "Culture medium", trace.label = "Time", lwd = 3, lty = 1, ylim = c(20,40), pch = c(4,2))

From the plot as time increases, there is a positive response on the growth of the virus at the lower culture medium but the opposite effect at culture medium 2. The reverse is the case when the time is low.

Question 6.12

H_o: \(\alpha_{i} = 0\) - Null Hypothesis

H_a: \(\alpha_{i} \ne 0\) - Alternative Hypothesis

H_o: \(\beta_{j} = 0\) - Null Hypothesis

H_a: \(\beta_{j} \ne 0\) - Alternative Hypothesis

H_o: \(\alpha \beta_{ij} = 0\) - Null Hypothesis

H_a: \(\alpha \beta_{ij} \ne 0\) - Alternative Hypothesis

Model Equation

\(y_{ijk} = \mu + \alpha_{i} + \beta_j + \alpha \beta_{ij} + \epsilon_{ijk}\)

#a) Estimate the factor effects.
library(DoE.base)

## Loading required package: grid

## Loading required package: conf.design

## Registered S3 method overwritten by 'DoE.base':
##   method           from       
##   factorize.factor conf.design

## 
## Attaching package: 'DoE.base'

## The following objects are masked from 'package:stats':
## 
##     aov, lm

## The following object is masked from 'package:graphics':
## 
##     plot.design

## The following object is masked from 'package:base':
## 
##     lengths

A<-c(-1,+1,-1,+1,-1,+1,-1,+1,-1,+1,-1,+1,-1,+1,-1,+1)
B<-c(-1,-1,+1,+1,-1,-1,+1,+1,-1,-1,+1,+1,-1,-1,+1,+1)
response<-c(14.037,13.880,14.821,14.888,16.165,13.860,14.757,14.921,13.972,14.032,14.843,14.415,13.907,13.914,14.878,14.932)
flowrate<-c(rep(seq(1:2),8))
dtime<-c(rep(rep(1:2, each = 2),4))
A<-as.factor(A)
B<-as.factor(B)
flowrate<-as.factor(flowrate)
dtime<-as.factor(dtime)
Att<-data.frame(A,B,response)
#data.frame(response,flowrate,dtime)

#b) Conduct an analysis of variance. Which factors are important?
model<-aov(response~A*B)
summary(model)

##             Df Sum Sq Mean Sq F value Pr(>F)  
## A            1  0.403  0.4026   1.262 0.2833  
## B            1  1.374  1.3736   4.305 0.0602 .
## A:B          1  0.317  0.3170   0.994 0.3386  
## Residuals   12  3.828  0.3190                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

From the result, values of “Prob > F” less than 0.0500 indicate model terms are significant. No factor appears to be significant.

#a) estimate the factor effects
SSA<-0.4026
SSB<-1.374
SSAB<-0.317
n<-2^2*4
effA<-(sqrt(SSA*n)*2)/n
effB<-(sqrt(SSB*n)*2)/n
effAB<-(sqrt(SSAB*n)*2)/n

effect<-c(effA,effB,effAB)
source<-c("A","B","AB")

data.frame(source,effect)

##   source    effect
## 1      A 0.3172538
## 2      B 0.5860887
## 3     AB 0.2815138

#c) Write down a regression equation that could be used to predict epitaxial layer thickness over the region of arsenic flow rate and deposition time used in this experiment.
f1<-lm(response~A*B, data = Att)
summary(f1)$coefficients

##             Estimate Std. Error    t value     Pr(>|t|)
## (Intercept) 14.52025  0.2824181 51.4140118 1.925139e-15
## A1          -0.59875  0.3993996 -1.4991253 1.596813e-01
## B1           0.30450  0.3993996  0.7623944 4.605484e-01
## A1:B1        0.56300  0.5648363  0.9967490 3.385615e-01

#coded factors
effect/2

## [1] 0.1586269 0.2930444 0.1407569

\(y_{i} = \beta_{0} + \beta_{1}x_{i1} + \beta_{2}x_{i2} + \beta_{3}x_{i1}x_{i2} +\epsilon_{i}\)

\(Thickness = {14.5} - 0.16x_{i1} + 0.29x_{i2} + 0.14x_{i1}x_{i2}\)

#d) Analyze the residuals. Are there any residuals that should cause concern?
plot(model,(1:2))

The plots don’t appear to be normal as there is presence of an outlier on both normal probability plot and the plot of residuals vs fitted.

#e) Discuss how you might deal with the potential outlier found in part (d).

One way to appropriately deal with the outlier is to replace the value with the average of the observations. A log transformation might be another option in a situation after your outlier has been replaced with mean value of the observations and your anova result has a significant change when compared with the anova result with the original value of the outlier in the data.

Question 6.21

#6.21
A<-c(rep(c(-1,1), 56))
B<-c(rep(c(-1,-1,+1,+1), 28))
C<-c(rep(c(-1,-1,-1,-1,+1,+1,+1,+1),14))
D<-c(rep(c(-1,-1,-1,-1,-1,-1,-1,-1,+1,+1,+1,+1,+1,+1,+1,+1),7))
response<-c(10,0,4,0,0,5,6.5,16.5,4.5,19.5,15,41.5,8,21.5,0,18,
            18,16.5,6,10,0,20.5,18.5,4.5,18,18,16,39,4.5,10.5,0,5,
            14,4.5,1,34,18.5,18,7.5,0,14.5,16,8.5,6.5,6.5,6.5,0,7,
            12.5,17.5,14.5,11,19.5,20,6,23.5,10,5.5,0,3.5,10,0,4.5,10,
            19,20.5,12,25.5,16,29.5,0,8,0,10,0.5,7,13,15.5,1,32.5,
            16,17.5,14,21.5,15,19,10,8,17.5,7,9,8.5,41,24,4,18.5,
            18.5,33,5,0,11,10,0,8,6,36,3,36,14,16,6.5,8)
length(response)

## [1] 112

dat<-data.frame(A,B,C,D,response)
dat[1:20,]

##     A  B  C  D response
## 1  -1 -1 -1 -1     10.0
## 2   1 -1 -1 -1      0.0
## 3  -1  1 -1 -1      4.0
## 4   1  1 -1 -1      0.0
## 5  -1 -1  1 -1      0.0
## 6   1 -1  1 -1      5.0
## 7  -1  1  1 -1      6.5
## 8   1  1  1 -1     16.5
## 9  -1 -1 -1  1      4.5
## 10  1 -1 -1  1     19.5
## 11 -1  1 -1  1     15.0
## 12  1  1 -1  1     41.5
## 13 -1 -1  1  1      8.0
## 14  1 -1  1  1     21.5
## 15 -1  1  1  1      0.0
## 16  1  1  1  1     18.0
## 17 -1 -1 -1 -1     18.0
## 18  1 -1 -1 -1     16.5
## 19 -1  1 -1 -1      6.0
## 20  1  1 -1 -1     10.0

pmod<-lm(response~A*B*C*D, data = dat)
coef(pmod)

## (Intercept)           A           B           C           D         A:B 
##  12.2991071   2.8616071  -1.8616071  -1.1383929  -0.1116071   1.3973214 
##         A:C         B:C         A:D         B:D         C:D       A:B:C 
##  -0.3258929  -1.0133929   0.9151786   0.7098214  -0.1205357  -0.2544643 
##       A:B:D       A:C:D       B:C:D     A:B:C:D 
##   1.0044643  -0.5937500  -0.5491071   0.9241071

#a)Analyze the data from this experiment. Which factors significantly affect putting performance?
halfnormal(pmod, col = "blue", cex = -1, pch = 16)

## Warning in halfnormal.lm(pmod, col = "blue", cex = -1, pch = 16): halfnormal not
## recommended for models with more residual df than model df

## 
## Significant effects (alpha=0.05, Lenth method):

## [1] A

From the plot factor A appears to be the only significant factor labelled, however we can run anova test to make sure as the observations seem to be clustered.

golf<-aov(response~A*B*C*D, data = dat)
summary(golf)

##             Df Sum Sq Mean Sq F value  Pr(>F)   
## A            1    917   917.1  10.588 0.00157 **
## B            1    388   388.1   4.481 0.03686 * 
## C            1    145   145.1   1.676 0.19862   
## D            1      1     1.4   0.016 0.89928   
## A:B          1    219   218.7   2.525 0.11538   
## A:C          1     12    11.9   0.137 0.71178   
## B:C          1    115   115.0   1.328 0.25205   
## A:D          1     94    93.8   1.083 0.30066   
## B:D          1     56    56.4   0.651 0.42159   
## C:D          1      2     1.6   0.019 0.89127   
## A:B:C        1      7     7.3   0.084 0.77294   
## A:B:D        1    113   113.0   1.305 0.25623   
## A:C:D        1     39    39.5   0.456 0.50121   
## B:C:D        1     34    33.8   0.390 0.53386   
## A:B:C:D      1     96    95.6   1.104 0.29599   
## Residuals   96   8316    86.6                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

From the result, values of “Prob > F” less than 0.0500 indicate model terms are significant. In this case A and B are significant model terms.

New model with factors A and B only

golf2<-aov(response~A+B, data = dat)
summary(golf2)

##              Df Sum Sq Mean Sq F value  Pr(>F)   
## A             1    917   917.1  10.809 0.00136 **
## B             1    388   388.1   4.574 0.03469 * 
## Residuals   109   9249    84.9                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

From the result, values of “Prob > F” less than 0.0500 indicate model terms are significant. In this case, our assumptions was right. Factors A and B are the only significant model terms.

#(b) Analyze the residuals from this experiment. Are there any indications of model inadequacy?

plot(golf2,(1:2))

The plots don’t appear to be normal as there is presence of an outlier on both normal probability plot and the plot of residuals vs fitted. The extreme values of the normal probability plot don’t apear to be on a straight line. We can correct this with log transformation of the observations.

#log transformation
dat$nresponse<-log(dat$response+1)
golf3<-aov(nresponse~A+B, data = dat)
summary(golf3)

##              Df Sum Sq Mean Sq F value  Pr(>F)   
## A             1   7.37   7.373   7.603 0.00683 **
## B             1   6.98   6.976   7.193 0.00846 **
## Residuals   109 105.71   0.970                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plot(golf3,(1:2))

#sqrt transformation
#dat$sreponse<-sqrt(dat$response)
#golf4<-aov(nresponse~A+B, data = dat)
#summary(golf4)
#plot(golf4,(1:2))

interaction.plot(A,B,response,fun="mean",type = "b", col = 5:7, main ="Interraction Plot", ylab = "distance", xlab = "length of putt", trace.label = "Type of putter", lwd = 3, lty = 1, pch = c(4,2))

Question 6.36

#6.36
A<-c(rep(c(-1,1), 8))
B<-c(rep(c(-1,-1,+1,+1), 4))
C<-c(rep(c(-1,-1,-1,-1,+1,+1,+1,+1),2))
D<-c(-1,-1,-1,-1,-1,-1,-1,-1,+1,+1,+1,+1,+1,+1,+1,+1)
resistivity<-c(1.92,11.28,1.09,5.75,2.13,9.53,1.03,5.35,1.60,11.73,1.16,4.68,2.16,9.11,1.07,5.30)

#(a) Estimate the factor effects. Plot the effect estimates on a normal probability plot and select a tentative model.
#?DoE.base
dat1<-data.frame(A,B,C,D,resistivity)
head(dat1)

##    A  B  C  D resistivity
## 1 -1 -1 -1 -1        1.92
## 2  1 -1 -1 -1       11.28
## 3 -1  1 -1 -1        1.09
## 4  1  1 -1 -1        5.75
## 5 -1 -1  1 -1        2.13
## 6  1 -1  1 -1        9.53

library(DoE.base)
mod<-lm(resistivity~A*B*C*D, data = dat1)
coef(mod)

## (Intercept)           A           B           C           D         A:B 
##    4.680625    3.160625   -1.501875   -0.220625   -0.079375   -1.069375 
##         A:C         B:C         A:D         B:D         C:D       A:B:C 
##   -0.298125    0.229375   -0.056875   -0.046875    0.029375    0.344375 
##       A:B:D       A:C:D       B:C:D     A:B:C:D 
##   -0.096875   -0.010625    0.094375    0.141875

halfnormal(mod, col = "red", cex = -1, pch = 16)

## 
## Significant effects (alpha=0.05, Lenth method):

## [1] A     B     A:B   A:B:C

From the plot, factors A, B and AB are significant model terms

dat1$AB<-A*B
#dat1$ABC<-A*B*C
dat1

##     A  B  C  D resistivity AB
## 1  -1 -1 -1 -1        1.92  1
## 2   1 -1 -1 -1       11.28 -1
## 3  -1  1 -1 -1        1.09 -1
## 4   1  1 -1 -1        5.75  1
## 5  -1 -1  1 -1        2.13  1
## 6   1 -1  1 -1        9.53 -1
## 7  -1  1  1 -1        1.03 -1
## 8   1  1  1 -1        5.35  1
## 9  -1 -1 -1  1        1.60  1
## 10  1 -1 -1  1       11.73 -1
## 11 -1  1 -1  1        1.16 -1
## 12  1  1 -1  1        4.68  1
## 13 -1 -1  1  1        2.16  1
## 14  1 -1  1  1        9.11 -1
## 15 -1  1  1  1        1.07 -1
## 16  1  1  1  1        5.30  1

model2<-aov(resistivity~A+B+AB, data = dat1)
summary(model2)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## A            1 159.83  159.83  333.09 4.05e-10 ***
## B            1  36.09   36.09   75.21 1.63e-06 ***
## AB           1  18.30   18.30   38.13 4.76e-05 ***
## Residuals   12   5.76    0.48                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

From the result, values of “Prob > F” less than 0.0500 indicate model terms are significant. In this case all the factors in the model are significant model terms.

plot(model2,(1:2))

The plots don’t appear to be normal as there is presence of outliers on both normal probability plot and the plot of residuals vs fitted. Also, the normal probability plot does not apear to be on a straight line. We can correct this with log transformation of the observations.

#(c) Repeat the analysis from parts (a) and (b) using ln (y) as the response variable. Is there an indication that the transformation has been useful?
mod<-lm(log(resistivity)~A*B*C*D, data = dat1)
coef(mod)

##  (Intercept)            A            B            C            D          A:B 
##  1.185417116  0.812870345 -0.314277554 -0.006408558 -0.018077390 -0.024684570 
##          A:C          B:C          A:D          B:D          C:D        A:B:C 
## -0.039723700 -0.004225796 -0.009578245  0.003708723  0.017780432  0.063434408 
##        A:B:D        A:C:D        B:C:D      A:B:C:D 
## -0.029875960 -0.003740235  0.003765760  0.031322043

halfnormal(mod, col = "red", cex = -1, pch = 16)

## 
## Significant effects (alpha=0.05, Lenth method):

## [1] A     B     A:B:C

lnmodel<-aov(log(resistivity)~A+B, data = dat1)
summary(lnmodel)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## A            1 10.572  10.572   962.9 1.41e-13 ***
## B            1  1.580   1.580   143.9 2.09e-08 ***
## Residuals   13  0.143   0.011                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plot(lnmodel,(1:2))

#(d) Fit a model in terms of the coded variables that can be used to predict the resistivity.
coef(mod)

##  (Intercept)            A            B            C            D          A:B 
##  1.185417116  0.812870345 -0.314277554 -0.006408558 -0.018077390 -0.024684570 
##          A:C          B:C          A:D          B:D          C:D        A:B:C 
## -0.039723700 -0.004225796 -0.009578245  0.003708723  0.017780432  0.063434408 
##        A:B:D        A:C:D        B:C:D      A:B:C:D 
## -0.029875960 -0.003740235  0.003765760  0.031322043

intercept<-1.185
SSA<-0.812
SSB<-0.314

effect<-c(intercept,SSA,SSB)
source<-c("intercept","A","B")
data.frame(source,effect)

##      source effect
## 1 intercept  1.185
## 2         A  0.812
## 3         B  0.314

\(y_{i} = \beta_{0} + \beta_{1}x_{i1} + \beta_{2}x_{i2} +\epsilon_{i}\)

\(Resistivity = {1.185} - 0.812x_{i1} + 0.314x_{i2}\)

Questions 6.39

#6.39
A<-c(rep(c(-1,1), 16))
B<-c(rep(c(-1,-1,+1,+1), 8))
C<-c(rep(c(-1,-1,-1,-1,+1,+1,+1,+1),4))
D<-c(rep(c(-1,-1,-1,-1,-1,-1,-1,-1,+1,+1,+1,+1,+1,+1,+1,+1),2))
E<-c(-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1)
y<-c(8.11,5.56,5.77,5.82,9.17,7.8,3.23,5.69,8.82,14.23,9.2,8.94,8.68,
     11.49,6.25,9.12,7.93,5,7.47,12,9.86,3.65,6.4,11.61,12.43,17.55,
     8.87,25.38,13.06,18.85,11.78,26.05)
length(y)

## [1] 32

#a)Analyze the data from this experiment. Identify the significant factors and interactions.
dat2<-data.frame(A,B,C,D,E,y)
head(dat2)

##    A  B  C  D  E    y
## 1 -1 -1 -1 -1 -1 8.11
## 2  1 -1 -1 -1 -1 5.56
## 3 -1  1 -1 -1 -1 5.77
## 4  1  1 -1 -1 -1 5.82
## 5 -1 -1  1 -1 -1 9.17
## 6  1 -1  1 -1 -1 7.80

ymod<-lm(y~A*B*C*D*E, data = dat2)
coef(ymod)

## (Intercept)           A           B           C           D           E 
##  10.1803125   1.6159375   0.0434375  -0.0121875   2.9884375   2.1878125 
##         A:B         A:C         B:C         A:D         B:D         C:D 
##   1.2365625  -0.0015625  -0.1953125   1.6665625  -0.0134375   0.0034375 
##         A:E         B:E         C:E         D:E       A:B:C       A:B:D 
##   1.0271875   1.2834375   0.3015625   1.3896875   0.2503125  -0.3453125 
##       A:C:D       B:C:D       A:B:E       A:C:E       B:C:E       A:D:E 
##  -0.0634375   0.3053125   1.1853125  -0.2590625   0.1709375   0.9015625 
##       B:D:E       C:D:E     A:B:C:D     A:B:C:E     A:B:D:E     A:C:D:E 
##  -0.0396875   0.3959375  -0.0740625  -0.1846875   0.4071875   0.1278125 
##     B:C:D:E   A:B:C:D:E 
##  -0.0746875  -0.3553125

halfnormal(ymod, col = "green", cex = -1, pch = 16)

## 
## Significant effects (alpha=0.05, Lenth method):

##  [1] D     E     A:D   A     D:E   B:E   A:B   A:B:E A:E   A:D:E

From the plot, factors A, D, E, AB, AD, AE, BE, DE, ABE, ADE are significant model terms.

dat2$AD<-A*D
dat2$AB<-A*B
dat2$DE<-D*E
dat2$BE<-B*E
AE<-A*E
ABE<-A*B*E
ADE<-A*D*E
#(b) Fit the model identified in part (a) and analyze the residuals. Is there any indication of model inadequacy?
ymodel<-aov(y~A+B+D+E+AB+AD+AE+BE+DE+ABE+ADE, data = dat2)
summary(ymodel)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## A            1  83.56   83.56  51.362 6.10e-07 ***
## B            1   0.06    0.06   0.037 0.849178    
## D            1 285.78  285.78 175.664 2.30e-11 ***
## E            1 153.17  153.17  94.149 5.24e-09 ***
## AB           1  48.93   48.93  30.076 2.28e-05 ***
## AD           1  88.88   88.88  54.631 3.87e-07 ***
## AE           1  33.76   33.76  20.754 0.000192 ***
## BE           1  52.71   52.71  32.400 1.43e-05 ***
## DE           1  61.80   61.80  37.986 5.07e-06 ***
## ABE          1  44.96   44.96  27.635 3.82e-05 ***
## ADE          1  26.01   26.01  15.988 0.000706 ***
## Residuals   20  32.54    1.63                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

From the result, values of “Prob > F” less than 0.0500 indicate model terms are significant. In this case A, D, E, AB, AD, AE, BE, DE, ABE, ADE are significant model terms.

#b)Analyze the residuals from this experiment. Are there any indications of model inadequacy or violations of the assumptions?
plot(ymodel,(1:2))

There is nothing unusual about the plots, Normal plot appears to be on a straight line with extreme values on tail ends of the data distribution. Other than that, everything is fairly normal.

#c)One of the factors from this experiment does not seem to be important. If you drop this factor, what type of design remains? Analyze the data using the full facto- rial model for only the four active factors. Compare your results with those obtained in part (a).
dat2$BD<-B*D
dat2$ABD<-A*B*D
dat2$BDE<-B*D*E
dat2$ABDE<-A*B*D*E
ymodel2<-aov(y~A+B+D+E+AB+AD+AE+BD+BE+DE+ABD+ABE+ADE+BDE+ABDE, data = dat2)
summary(ymodel2)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## A            1  83.56   83.56  57.233 1.14e-06 ***
## B            1   0.06    0.06   0.041 0.841418    
## D            1 285.78  285.78 195.742 2.16e-10 ***
## E            1 153.17  153.17 104.910 1.97e-08 ***
## AB           1  48.93   48.93  33.514 2.77e-05 ***
## AD           1  88.88   88.88  60.875 7.66e-07 ***
## AE           1  33.76   33.76  23.126 0.000193 ***
## BD           1   0.01    0.01   0.004 0.950618    
## BE           1  52.71   52.71  36.103 1.82e-05 ***
## DE           1  61.80   61.80  42.328 7.24e-06 ***
## ABD          1   3.82    3.82   2.613 0.125501    
## ABE          1  44.96   44.96  30.794 4.40e-05 ***
## ADE          1  26.01   26.01  17.815 0.000650 ***
## BDE          1   0.05    0.05   0.035 0.854935    
## ABDE         1   5.31    5.31   3.634 0.074735 .  
## Residuals   16  23.36    1.46                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

From the result values of “Prob > F” less than 0.0500 indicate model terms are significant. In this case A, D, E, AB, AD, AE, BE, DE, ABE, ADE are significant model terms. After comparing results, it appears to be the same.

#d)Find settings of the active factors that maximize the predicted response.

library("FrF2")
a <- lm(y~A+D+B+1+AB+AD+AE+BD+BE+DE+ABD+ABE+ADE+BDE+ABDE,data = dat2)
cubePlot(a,"A","D","B",main = "y Cube plot experiment")

When factors A=A, B=B, D=D, E = 1 and C = 0,

#6.8

time<-c(rep(1,12),rep(2,12))
culture<-c(rep(1:2, each = 6),rep(1:2, each = 6))
response1<-c(21,22,23,28,20,26,25,26,24,25,29,27,37,39,38,38,35,36,31,34,29,33,30,35)
#length(response)
data.frame(culture,time,response)
culture<-as.factor(culture)
time<-as.factor(time)
m<-aov(response1~culture*time)
summary(m)

interaction.plot(culture,time,response1,fun="mean",type = "b", col = 5:7, main ="Interraction Plot", ylab = "Virus response", xlab = "Culture medium", trace.label = "Time", lwd = 3, lty = 1, ylim = c(20,40), pch = c(4,2))

#6.12
library(DoE.base)
A<-c(-1,+1,-1,+1,-1,+1,-1,+1,-1,+1,-1,+1,-1,+1,-1,+1)
B<-c(-1,-1,+1,+1,-1,-1,+1,+1,-1,-1,+1,+1,-1,-1,+1,+1)
response<-c(14.037,13.880,14.821,14.888,16.165,13.860,14.757,14.921,13.972,14.032,14.843,14.415,13.907,13.914,14.878,14.932)
flowrate<-c(rep(seq(1:2),8))
dtime<-c(rep(rep(1:2, each = 2),4))
A<-as.factor(A)
B<-as.factor(B)
flowrate<-as.factor(flowrate)
dtime<-as.factor(dtime)
Att<-data.frame(A,B,response)

#b) Conduct an analysis of variance. Which factors are important?
model<-aov(response~A*B)
summary(model)

#a) estimate the factor effects
SSA<-0.4026
SSB<-1.374
SSAB<-0.317
n<-2^2*4
effA<-(sqrt(SSA*n)*2)/n
effB<-(sqrt(SSB*n)*2)/n
effAB<-(sqrt(SSAB*n)*2)/n
effect<-c(effA,effB,effAB)
source<-c("A","B","AB")
data.frame(source,effect)

#c) Write down a regression equation that could be used to predict epitaxial layer thickness over the region of arsenic flow rate and deposition time used in this experiment.
f1<-lm(response~A*B, data = Att)
summary(f1)$coefficients
#coded factors
effect/2

#residuals
plot(model,(1:2))


#6.21
A<-c(rep(c(-1,1), 56))
B<-c(rep(c(-1,-1,+1,+1), 28))
C<-c(rep(c(-1,-1,-1,-1,+1,+1,+1,+1),14))
D<-c(rep(c(-1,-1,-1,-1,-1,-1,-1,-1,+1,+1,+1,+1,+1,+1,+1,+1),7))
response<-c(10,0,4,0,0,5,6.5,16.5,4.5,19.5,15,41.5,8,21.5,0,18,
            18,16.5,6,10,0,20.5,18.5,4.5,18,18,16,39,4.5,10.5,0,5,
            14,4.5,1,34,18.5,18,7.5,0,14.5,16,8.5,6.5,6.5,6.5,0,7,
            12.5,17.5,14.5,11,19.5,20,6,23.5,10,5.5,0,3.5,10,0,4.5,10,
            19,20.5,12,25.5,16,29.5,0,8,0,10,0.5,7,13,15.5,1,32.5,
            16,17.5,14,21.5,15,19,10,8,17.5,7,9,8.5,41,24,4,18.5,
            18.5,33,5,0,11,10,0,8,6,36,3,36,14,16,6.5,8)
length(response)
dat<-data.frame(A,B,C,D,response)
dat[1:20,]
pmod<-lm(response~A*B*C*D, data = dat)
coef(pmod)

halfnormal(pmod, col = "blue", cex = -1, pch = 16)

golf<-aov(response~A*B*C*D, data = dat)
summary(golf)
golf2<-aov(response~A+B, data = dat)
summary(golf2)

plot(golf2,(1:2))

#log transformation
dat$nresponse<-log(dat$response+1)
golf3<-aov(nresponse~A+B, data = dat)
summary(golf3)
plot(golf3,(1:2))


#6.36
A<-c(rep(c(-1,1), 8))
B<-c(rep(c(-1,-1,+1,+1), 4))
C<-c(rep(c(-1,-1,-1,-1,+1,+1,+1,+1),2))
D<-c(-1,-1,-1,-1,-1,-1,-1,-1,+1,+1,+1,+1,+1,+1,+1,+1)
resistivity<-c(1.92,11.28,1.09,5.75,2.13,9.53,1.03,5.35,1.60,11.73,1.16,4.68,2.16,9.11,1.07,5.30)

#(a) Estimate the factor effects. Plot the effect estimates on a normal probability plot and select a tentative model.
#?DoE.base
dat1<-data.frame(A,B,C,D,resistivity)
head(dat1)

library(DoE.base)
mod<-lm(resistivity~A*B*C*D, data = dat1)
coef(mod)
halfnormal(mod, col = "red", cex = -1, pch = 16)

dat1$AB<-A*B
dat1$ABC<-A*B*C
dat1

#(b) Fit the model identified in part (a) and analyze the residuals. Is there any indication of model inadequacy?
model1<-aov(resistivity~A+B+C+AB+ABC, data = dat1)
summary(model1)

model2<-aov(resistivity~A+B+AB, data = dat1)
summary(model2)

plot(model2)

#(c) Repeat the analysis from parts (a) and (b) using ln (y) as the response variable. Is there an indication that the transformation has been useful?
mod<-lm(log(resistivity)~A*B*C*D, data = dat1)
coef(mod)
halfnormal(mod, col = "red", cex = -1, pch = 16)

lnmodel<-aov(log(resistivity)~A+B, data = dat1)
summary(lnmodel)

plot(lnmodel,(1:2))

#(d) Fit a model in terms of the coded variables that can be used to predict the resistivity.
coef(mod)

intercept<-1.185
SSA<-0.812
SSB<-0.314

effect<-c(intercept,SSA,SSB)
source<-c("intercept","A","B")
data.frame(source,effect)



#6.39
A<-c(rep(c(-1,1), 16))
B<-c(rep(c(-1,-1,+1,+1), 8))
C<-c(rep(c(-1,-1,-1,-1,+1,+1,+1,+1),4))
D<-c(rep(c(-1,-1,-1,-1,-1,-1,-1,-1,+1,+1,+1,+1,+1,+1,+1,+1),2))
E<-c(-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1)
y<-c(8.11,5.56,5.77,5.82,9.17,7.8,3.23,5.69,8.82,14.23,9.2,8.94,8.68,
     11.49,6.25,9.12,7.93,5,7.47,12,9.86,3.65,6.4,11.61,12.43,17.55,
     8.87,25.38,13.06,18.85,11.78,26.05)
length(y)

#a)Analyze the data from this experiment. Identify the significant factors and interactions.
dat2<-data.frame(A,B,C,D,E,y)
head(dat2)

ymod<-lm(y~A*B*C*D*E, data = dat2)
coef(ymod)
halfnormal(ymod, col = "green", cex = -1, pch = 16)

dat2$AD<-A*D
dat2$AB<-A*B
dat2$DE<-D*E
dat2$BE<-B*E
AE<-A*E
ABE<-A*B*E
ADE<-A*D*E
#(b) Fit the model identified in part (a) and analyze the residuals. Is there any indication of model inadequacy?
ymodel<-aov(y~A+B+D+E+AB+AD+AE+BE+DE+ABE+ADE, data = dat2)
summary(ymodel)

plot(ymodel,(1:2))
#(c) 
dat2$BD<-B*D
dat2$ABD<-A*B*D
dat2$BDE<-B*D*E
dat2$ABDE<-A*B*D*E
ymodel2<-aov(y~A+B+D+E+AB+AD+AE+BD+BE+DE+ABD+ABE+ADE+BDE+ABDE, data = dat2)
summary(ymodel2)