Ho: \(\alpha_{i} = 0\) - Null Hypothesis
Ha: \(\alpha_{i} \ne 0\) - Alternative Hypothesis
Ho: \(\beta_{j} = 0\) - Null Hypothesis
Ha: \(\beta_{j} \ne 0\) - Alternative Hypothesis
Ho: \(\alpha \beta_{ij} = 0\) - Null Hypothesis
Ha: \(\alpha \beta_{ij} \ne 0\) - Alternative Hypothesis
Model Equation
\(y_{ijk} = \mu + \alpha_{i} + \beta_j + \alpha \beta_{ij} + \epsilon_{ijk}\)
time<-c(rep(1,12),rep(2,12))
culture<-c(rep(1:2, each = 6),rep(1:2, each = 6))
response1<-c(21,22,23,28,20,26,25,26,24,25,29,27,37,39,38,38,35,36,31,34,29,33,30,35)
#length(response)
data.frame(culture,time,response1)
## culture time response1
## 1 1 1 21
## 2 1 1 22
## 3 1 1 23
## 4 1 1 28
## 5 1 1 20
## 6 1 1 26
## 7 2 1 25
## 8 2 1 26
## 9 2 1 24
## 10 2 1 25
## 11 2 1 29
## 12 2 1 27
## 13 1 2 37
## 14 1 2 39
## 15 1 2 38
## 16 1 2 38
## 17 1 2 35
## 18 1 2 36
## 19 2 2 31
## 20 2 2 34
## 21 2 2 29
## 22 2 2 33
## 23 2 2 30
## 24 2 2 35
culture<-as.factor(culture)
time<-as.factor(time)
m<-aov(response1~culture*time)
summary(m)
## Df Sum Sq Mean Sq F value Pr(>F)
## culture 1 9.4 9.4 1.835 0.190617
## time 1 590.0 590.0 115.506 9.29e-10 ***
## culture:time 1 92.0 92.0 18.018 0.000397 ***
## Residuals 20 102.2 5.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
From the result values of “Prob > F” less than 0.0500 indicate model terms are significant. In this case time (B) and interaction (AB) are significant model terms
plot(m)
There is nothing unusual about the plots, Normal plot appears to be on a straight line with 1 extreme value at the tail end of the data distribution. Other than that, everything is fairly normal
interaction.plot(culture,time,response1,fun="mean",type = "b", col = 5:7, main ="Interraction Plot", ylab = "Virus response", xlab = "Culture medium", trace.label = "Time", lwd = 3, lty = 1, ylim = c(20,40), pch = c(4,2))
From the plot as time increases, there is a positive response on the growth of the virus at the lower culture medium but the opposite effect at culture medium 2. The reverse is the case when the time is low.
Ho: \(\alpha_{i} = 0\) - Null Hypothesis
Ha: \(\alpha_{i} \ne 0\) - Alternative Hypothesis
Ho: \(\beta_{j} = 0\) - Null Hypothesis
Ha: \(\beta_{j} \ne 0\) - Alternative Hypothesis
Ho: \(\alpha \beta_{ij} = 0\) - Null Hypothesis
Ha: \(\alpha \beta_{ij} \ne 0\) - Alternative Hypothesis
Model Equation
\(y_{ijk} = \mu + \alpha_{i} + \beta_j + \alpha \beta_{ij} + \epsilon_{ijk}\)
#a) Estimate the factor effects.
library(DoE.base)
## Loading required package: grid
## Loading required package: conf.design
## Registered S3 method overwritten by 'DoE.base':
## method from
## factorize.factor conf.design
##
## Attaching package: 'DoE.base'
## The following objects are masked from 'package:stats':
##
## aov, lm
## The following object is masked from 'package:graphics':
##
## plot.design
## The following object is masked from 'package:base':
##
## lengths
A<-c(-1,+1,-1,+1,-1,+1,-1,+1,-1,+1,-1,+1,-1,+1,-1,+1)
B<-c(-1,-1,+1,+1,-1,-1,+1,+1,-1,-1,+1,+1,-1,-1,+1,+1)
response<-c(14.037,13.880,14.821,14.888,16.165,13.860,14.757,14.921,13.972,14.032,14.843,14.415,13.907,13.914,14.878,14.932)
flowrate<-c(rep(seq(1:2),8))
dtime<-c(rep(rep(1:2, each = 2),4))
A<-as.factor(A)
B<-as.factor(B)
flowrate<-as.factor(flowrate)
dtime<-as.factor(dtime)
Att<-data.frame(A,B,response)
#data.frame(response,flowrate,dtime)
#b) Conduct an analysis of variance. Which factors are important?
model<-aov(response~A*B)
summary(model)
## Df Sum Sq Mean Sq F value Pr(>F)
## A 1 0.403 0.4026 1.262 0.2833
## B 1 1.374 1.3736 4.305 0.0602 .
## A:B 1 0.317 0.3170 0.994 0.3386
## Residuals 12 3.828 0.3190
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
From the result, values of “Prob > F” less than 0.0500 indicate model terms are significant. No factor appears to be significant.
#a) estimate the factor effects
SSA<-0.4026
SSB<-1.374
SSAB<-0.317
n<-2^2*4
effA<-(sqrt(SSA*n)*2)/n
effB<-(sqrt(SSB*n)*2)/n
effAB<-(sqrt(SSAB*n)*2)/n
effect<-c(effA,effB,effAB)
source<-c("A","B","AB")
data.frame(source,effect)
## source effect
## 1 A 0.3172538
## 2 B 0.5860887
## 3 AB 0.2815138
#c) Write down a regression equation that could be used to predict epitaxial layer thickness over the region of arsenic flow rate and deposition time used in this experiment.
f1<-lm(response~A*B, data = Att)
summary(f1)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.52025 0.2824181 51.4140118 1.925139e-15
## A1 -0.59875 0.3993996 -1.4991253 1.596813e-01
## B1 0.30450 0.3993996 0.7623944 4.605484e-01
## A1:B1 0.56300 0.5648363 0.9967490 3.385615e-01
#coded factors
effect/2
## [1] 0.1586269 0.2930444 0.1407569
\(y_{i} = \beta_{0} + \beta_{1}x_{i1} + \beta_{2}x_{i2} + \beta_{3}x_{i1}x_{i2} +\epsilon_{i}\)
\(Thickness = {14.5} - 0.16x_{i1} + 0.29x_{i2} + 0.14x_{i1}x_{i2}\)
#d) Analyze the residuals. Are there any residuals that should cause concern?
plot(model,(1:2))
The plots don’t appear to be normal as there is presence of an outlier on both normal probability plot and the plot of residuals vs fitted.
#e) Discuss how you might deal with the potential outlier found in part (d).
One way to appropriately deal with the outlier is to replace the value with the average of the observations. A log transformation might be another option in a situation after your outlier has been replaced with mean value of the observations and your anova result has a significant change when compared with the anova result with the original value of the outlier in the data.
#6.21
A<-c(rep(c(-1,1), 56))
B<-c(rep(c(-1,-1,+1,+1), 28))
C<-c(rep(c(-1,-1,-1,-1,+1,+1,+1,+1),14))
D<-c(rep(c(-1,-1,-1,-1,-1,-1,-1,-1,+1,+1,+1,+1,+1,+1,+1,+1),7))
response<-c(10,0,4,0,0,5,6.5,16.5,4.5,19.5,15,41.5,8,21.5,0,18,
18,16.5,6,10,0,20.5,18.5,4.5,18,18,16,39,4.5,10.5,0,5,
14,4.5,1,34,18.5,18,7.5,0,14.5,16,8.5,6.5,6.5,6.5,0,7,
12.5,17.5,14.5,11,19.5,20,6,23.5,10,5.5,0,3.5,10,0,4.5,10,
19,20.5,12,25.5,16,29.5,0,8,0,10,0.5,7,13,15.5,1,32.5,
16,17.5,14,21.5,15,19,10,8,17.5,7,9,8.5,41,24,4,18.5,
18.5,33,5,0,11,10,0,8,6,36,3,36,14,16,6.5,8)
length(response)
## [1] 112
dat<-data.frame(A,B,C,D,response)
dat[1:20,]
## A B C D response
## 1 -1 -1 -1 -1 10.0
## 2 1 -1 -1 -1 0.0
## 3 -1 1 -1 -1 4.0
## 4 1 1 -1 -1 0.0
## 5 -1 -1 1 -1 0.0
## 6 1 -1 1 -1 5.0
## 7 -1 1 1 -1 6.5
## 8 1 1 1 -1 16.5
## 9 -1 -1 -1 1 4.5
## 10 1 -1 -1 1 19.5
## 11 -1 1 -1 1 15.0
## 12 1 1 -1 1 41.5
## 13 -1 -1 1 1 8.0
## 14 1 -1 1 1 21.5
## 15 -1 1 1 1 0.0
## 16 1 1 1 1 18.0
## 17 -1 -1 -1 -1 18.0
## 18 1 -1 -1 -1 16.5
## 19 -1 1 -1 -1 6.0
## 20 1 1 -1 -1 10.0
pmod<-lm(response~A*B*C*D, data = dat)
coef(pmod)
## (Intercept) A B C D A:B
## 12.2991071 2.8616071 -1.8616071 -1.1383929 -0.1116071 1.3973214
## A:C B:C A:D B:D C:D A:B:C
## -0.3258929 -1.0133929 0.9151786 0.7098214 -0.1205357 -0.2544643
## A:B:D A:C:D B:C:D A:B:C:D
## 1.0044643 -0.5937500 -0.5491071 0.9241071
#a)Analyze the data from this experiment. Which factors significantly affect putting performance?
halfnormal(pmod, col = "blue", cex = -1, pch = 16)
## Warning in halfnormal.lm(pmod, col = "blue", cex = -1, pch = 16): halfnormal not
## recommended for models with more residual df than model df
##
## Significant effects (alpha=0.05, Lenth method):
## [1] A
From the plot factor A appears to be the only significant factor labelled, however we can run anova test to make sure as the observations seem to be clustered.
golf<-aov(response~A*B*C*D, data = dat)
summary(golf)
## Df Sum Sq Mean Sq F value Pr(>F)
## A 1 917 917.1 10.588 0.00157 **
## B 1 388 388.1 4.481 0.03686 *
## C 1 145 145.1 1.676 0.19862
## D 1 1 1.4 0.016 0.89928
## A:B 1 219 218.7 2.525 0.11538
## A:C 1 12 11.9 0.137 0.71178
## B:C 1 115 115.0 1.328 0.25205
## A:D 1 94 93.8 1.083 0.30066
## B:D 1 56 56.4 0.651 0.42159
## C:D 1 2 1.6 0.019 0.89127
## A:B:C 1 7 7.3 0.084 0.77294
## A:B:D 1 113 113.0 1.305 0.25623
## A:C:D 1 39 39.5 0.456 0.50121
## B:C:D 1 34 33.8 0.390 0.53386
## A:B:C:D 1 96 95.6 1.104 0.29599
## Residuals 96 8316 86.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
From the result, values of “Prob > F” less than 0.0500 indicate model terms are significant. In this case A and B are significant model terms.
New model with factors A and B only
golf2<-aov(response~A+B, data = dat)
summary(golf2)
## Df Sum Sq Mean Sq F value Pr(>F)
## A 1 917 917.1 10.809 0.00136 **
## B 1 388 388.1 4.574 0.03469 *
## Residuals 109 9249 84.9
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
From the result, values of “Prob > F” less than 0.0500 indicate model terms are significant. In this case, our assumptions was right. Factors A and B are the only significant model terms.
#(b) Analyze the residuals from this experiment. Are there any indications of model inadequacy?
plot(golf2,(1:2))
The plots don’t appear to be normal as there is presence of an outlier on both normal probability plot and the plot of residuals vs fitted. The extreme values of the normal probability plot don’t apear to be on a straight line. We can correct this with log transformation of the observations.
#log transformation
dat$nresponse<-log(dat$response+1)
golf3<-aov(nresponse~A+B, data = dat)
summary(golf3)
## Df Sum Sq Mean Sq F value Pr(>F)
## A 1 7.37 7.373 7.603 0.00683 **
## B 1 6.98 6.976 7.193 0.00846 **
## Residuals 109 105.71 0.970
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(golf3,(1:2))
#sqrt transformation
#dat$sreponse<-sqrt(dat$response)
#golf4<-aov(nresponse~A+B, data = dat)
#summary(golf4)
#plot(golf4,(1:2))
interaction.plot(A,B,response,fun="mean",type = "b", col = 5:7, main ="Interraction Plot", ylab = "distance", xlab = "length of putt", trace.label = "Type of putter", lwd = 3, lty = 1, pch = c(4,2))
#6.36
A<-c(rep(c(-1,1), 8))
B<-c(rep(c(-1,-1,+1,+1), 4))
C<-c(rep(c(-1,-1,-1,-1,+1,+1,+1,+1),2))
D<-c(-1,-1,-1,-1,-1,-1,-1,-1,+1,+1,+1,+1,+1,+1,+1,+1)
resistivity<-c(1.92,11.28,1.09,5.75,2.13,9.53,1.03,5.35,1.60,11.73,1.16,4.68,2.16,9.11,1.07,5.30)
#(a) Estimate the factor effects. Plot the effect estimates on a normal probability plot and select a tentative model.
#?DoE.base
dat1<-data.frame(A,B,C,D,resistivity)
head(dat1)
## A B C D resistivity
## 1 -1 -1 -1 -1 1.92
## 2 1 -1 -1 -1 11.28
## 3 -1 1 -1 -1 1.09
## 4 1 1 -1 -1 5.75
## 5 -1 -1 1 -1 2.13
## 6 1 -1 1 -1 9.53
library(DoE.base)
mod<-lm(resistivity~A*B*C*D, data = dat1)
coef(mod)
## (Intercept) A B C D A:B
## 4.680625 3.160625 -1.501875 -0.220625 -0.079375 -1.069375
## A:C B:C A:D B:D C:D A:B:C
## -0.298125 0.229375 -0.056875 -0.046875 0.029375 0.344375
## A:B:D A:C:D B:C:D A:B:C:D
## -0.096875 -0.010625 0.094375 0.141875
halfnormal(mod, col = "red", cex = -1, pch = 16)
##
## Significant effects (alpha=0.05, Lenth method):
## [1] A B A:B A:B:C
From the plot, factors A, B and AB are significant model terms
dat1$AB<-A*B
#dat1$ABC<-A*B*C
dat1
## A B C D resistivity AB
## 1 -1 -1 -1 -1 1.92 1
## 2 1 -1 -1 -1 11.28 -1
## 3 -1 1 -1 -1 1.09 -1
## 4 1 1 -1 -1 5.75 1
## 5 -1 -1 1 -1 2.13 1
## 6 1 -1 1 -1 9.53 -1
## 7 -1 1 1 -1 1.03 -1
## 8 1 1 1 -1 5.35 1
## 9 -1 -1 -1 1 1.60 1
## 10 1 -1 -1 1 11.73 -1
## 11 -1 1 -1 1 1.16 -1
## 12 1 1 -1 1 4.68 1
## 13 -1 -1 1 1 2.16 1
## 14 1 -1 1 1 9.11 -1
## 15 -1 1 1 1 1.07 -1
## 16 1 1 1 1 5.30 1
model2<-aov(resistivity~A+B+AB, data = dat1)
summary(model2)
## Df Sum Sq Mean Sq F value Pr(>F)
## A 1 159.83 159.83 333.09 4.05e-10 ***
## B 1 36.09 36.09 75.21 1.63e-06 ***
## AB 1 18.30 18.30 38.13 4.76e-05 ***
## Residuals 12 5.76 0.48
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
From the result, values of “Prob > F” less than 0.0500 indicate model terms are significant. In this case all the factors in the model are significant model terms.
plot(model2,(1:2))
The plots don’t appear to be normal as there is presence of outliers on both normal probability plot and the plot of residuals vs fitted. Also, the normal probability plot does not apear to be on a straight line. We can correct this with log transformation of the observations.
#(c) Repeat the analysis from parts (a) and (b) using ln (y) as the response variable. Is there an indication that the transformation has been useful?
mod<-lm(log(resistivity)~A*B*C*D, data = dat1)
coef(mod)
## (Intercept) A B C D A:B
## 1.185417116 0.812870345 -0.314277554 -0.006408558 -0.018077390 -0.024684570
## A:C B:C A:D B:D C:D A:B:C
## -0.039723700 -0.004225796 -0.009578245 0.003708723 0.017780432 0.063434408
## A:B:D A:C:D B:C:D A:B:C:D
## -0.029875960 -0.003740235 0.003765760 0.031322043
halfnormal(mod, col = "red", cex = -1, pch = 16)
##
## Significant effects (alpha=0.05, Lenth method):
## [1] A B A:B:C
lnmodel<-aov(log(resistivity)~A+B, data = dat1)
summary(lnmodel)
## Df Sum Sq Mean Sq F value Pr(>F)
## A 1 10.572 10.572 962.9 1.41e-13 ***
## B 1 1.580 1.580 143.9 2.09e-08 ***
## Residuals 13 0.143 0.011
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(lnmodel,(1:2))
#(d) Fit a model in terms of the coded variables that can be used to predict the resistivity.
coef(mod)
## (Intercept) A B C D A:B
## 1.185417116 0.812870345 -0.314277554 -0.006408558 -0.018077390 -0.024684570
## A:C B:C A:D B:D C:D A:B:C
## -0.039723700 -0.004225796 -0.009578245 0.003708723 0.017780432 0.063434408
## A:B:D A:C:D B:C:D A:B:C:D
## -0.029875960 -0.003740235 0.003765760 0.031322043
intercept<-1.185
SSA<-0.812
SSB<-0.314
effect<-c(intercept,SSA,SSB)
source<-c("intercept","A","B")
data.frame(source,effect)
## source effect
## 1 intercept 1.185
## 2 A 0.812
## 3 B 0.314
\(y_{i} = \beta_{0} + \beta_{1}x_{i1} + \beta_{2}x_{i2} +\epsilon_{i}\)
\(Resistivity = {1.185} - 0.812x_{i1} + 0.314x_{i2}\)
#6.39
A<-c(rep(c(-1,1), 16))
B<-c(rep(c(-1,-1,+1,+1), 8))
C<-c(rep(c(-1,-1,-1,-1,+1,+1,+1,+1),4))
D<-c(rep(c(-1,-1,-1,-1,-1,-1,-1,-1,+1,+1,+1,+1,+1,+1,+1,+1),2))
E<-c(-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1)
y<-c(8.11,5.56,5.77,5.82,9.17,7.8,3.23,5.69,8.82,14.23,9.2,8.94,8.68,
11.49,6.25,9.12,7.93,5,7.47,12,9.86,3.65,6.4,11.61,12.43,17.55,
8.87,25.38,13.06,18.85,11.78,26.05)
length(y)
## [1] 32
#a)Analyze the data from this experiment. Identify the significant factors and interactions.
dat2<-data.frame(A,B,C,D,E,y)
head(dat2)
## A B C D E y
## 1 -1 -1 -1 -1 -1 8.11
## 2 1 -1 -1 -1 -1 5.56
## 3 -1 1 -1 -1 -1 5.77
## 4 1 1 -1 -1 -1 5.82
## 5 -1 -1 1 -1 -1 9.17
## 6 1 -1 1 -1 -1 7.80
ymod<-lm(y~A*B*C*D*E, data = dat2)
coef(ymod)
## (Intercept) A B C D E
## 10.1803125 1.6159375 0.0434375 -0.0121875 2.9884375 2.1878125
## A:B A:C B:C A:D B:D C:D
## 1.2365625 -0.0015625 -0.1953125 1.6665625 -0.0134375 0.0034375
## A:E B:E C:E D:E A:B:C A:B:D
## 1.0271875 1.2834375 0.3015625 1.3896875 0.2503125 -0.3453125
## A:C:D B:C:D A:B:E A:C:E B:C:E A:D:E
## -0.0634375 0.3053125 1.1853125 -0.2590625 0.1709375 0.9015625
## B:D:E C:D:E A:B:C:D A:B:C:E A:B:D:E A:C:D:E
## -0.0396875 0.3959375 -0.0740625 -0.1846875 0.4071875 0.1278125
## B:C:D:E A:B:C:D:E
## -0.0746875 -0.3553125
halfnormal(ymod, col = "green", cex = -1, pch = 16)
##
## Significant effects (alpha=0.05, Lenth method):
## [1] D E A:D A D:E B:E A:B A:B:E A:E A:D:E
From the plot, factors A, D, E, AB, AD, AE, BE, DE, ABE, ADE are significant model terms.
dat2$AD<-A*D
dat2$AB<-A*B
dat2$DE<-D*E
dat2$BE<-B*E
AE<-A*E
ABE<-A*B*E
ADE<-A*D*E
#(b) Fit the model identified in part (a) and analyze the residuals. Is there any indication of model inadequacy?
ymodel<-aov(y~A+B+D+E+AB+AD+AE+BE+DE+ABE+ADE, data = dat2)
summary(ymodel)
## Df Sum Sq Mean Sq F value Pr(>F)
## A 1 83.56 83.56 51.362 6.10e-07 ***
## B 1 0.06 0.06 0.037 0.849178
## D 1 285.78 285.78 175.664 2.30e-11 ***
## E 1 153.17 153.17 94.149 5.24e-09 ***
## AB 1 48.93 48.93 30.076 2.28e-05 ***
## AD 1 88.88 88.88 54.631 3.87e-07 ***
## AE 1 33.76 33.76 20.754 0.000192 ***
## BE 1 52.71 52.71 32.400 1.43e-05 ***
## DE 1 61.80 61.80 37.986 5.07e-06 ***
## ABE 1 44.96 44.96 27.635 3.82e-05 ***
## ADE 1 26.01 26.01 15.988 0.000706 ***
## Residuals 20 32.54 1.63
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
From the result, values of “Prob > F” less than 0.0500 indicate model terms are significant. In this case A, D, E, AB, AD, AE, BE, DE, ABE, ADE are significant model terms.
#b)Analyze the residuals from this experiment. Are there any indications of model inadequacy or violations of the assumptions?
plot(ymodel,(1:2))
There is nothing unusual about the plots, Normal plot appears to be on a straight line with extreme values on tail ends of the data distribution. Other than that, everything is fairly normal.
#c)One of the factors from this experiment does not seem to be important. If you drop this factor, what type of design remains? Analyze the data using the full facto- rial model for only the four active factors. Compare your results with those obtained in part (a).
dat2$BD<-B*D
dat2$ABD<-A*B*D
dat2$BDE<-B*D*E
dat2$ABDE<-A*B*D*E
ymodel2<-aov(y~A+B+D+E+AB+AD+AE+BD+BE+DE+ABD+ABE+ADE+BDE+ABDE, data = dat2)
summary(ymodel2)
## Df Sum Sq Mean Sq F value Pr(>F)
## A 1 83.56 83.56 57.233 1.14e-06 ***
## B 1 0.06 0.06 0.041 0.841418
## D 1 285.78 285.78 195.742 2.16e-10 ***
## E 1 153.17 153.17 104.910 1.97e-08 ***
## AB 1 48.93 48.93 33.514 2.77e-05 ***
## AD 1 88.88 88.88 60.875 7.66e-07 ***
## AE 1 33.76 33.76 23.126 0.000193 ***
## BD 1 0.01 0.01 0.004 0.950618
## BE 1 52.71 52.71 36.103 1.82e-05 ***
## DE 1 61.80 61.80 42.328 7.24e-06 ***
## ABD 1 3.82 3.82 2.613 0.125501
## ABE 1 44.96 44.96 30.794 4.40e-05 ***
## ADE 1 26.01 26.01 17.815 0.000650 ***
## BDE 1 0.05 0.05 0.035 0.854935
## ABDE 1 5.31 5.31 3.634 0.074735 .
## Residuals 16 23.36 1.46
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
From the result values of “Prob > F” less than 0.0500 indicate model terms are significant. In this case A, D, E, AB, AD, AE, BE, DE, ABE, ADE are significant model terms. After comparing results, it appears to be the same.
#d)Find settings of the active factors that maximize the predicted response.
library("FrF2")
a <- lm(y~A+D+B+1+AB+AD+AE+BD+BE+DE+ABD+ABE+ADE+BDE+ABDE,data = dat2)
cubePlot(a,"A","D","B",main = "y Cube plot experiment")
When factors A=A, B=B, D=D, E = 1 and C = 0,
#6.8
time<-c(rep(1,12),rep(2,12))
culture<-c(rep(1:2, each = 6),rep(1:2, each = 6))
response1<-c(21,22,23,28,20,26,25,26,24,25,29,27,37,39,38,38,35,36,31,34,29,33,30,35)
#length(response)
data.frame(culture,time,response)
culture<-as.factor(culture)
time<-as.factor(time)
m<-aov(response1~culture*time)
summary(m)
interaction.plot(culture,time,response1,fun="mean",type = "b", col = 5:7, main ="Interraction Plot", ylab = "Virus response", xlab = "Culture medium", trace.label = "Time", lwd = 3, lty = 1, ylim = c(20,40), pch = c(4,2))
#6.12
library(DoE.base)
A<-c(-1,+1,-1,+1,-1,+1,-1,+1,-1,+1,-1,+1,-1,+1,-1,+1)
B<-c(-1,-1,+1,+1,-1,-1,+1,+1,-1,-1,+1,+1,-1,-1,+1,+1)
response<-c(14.037,13.880,14.821,14.888,16.165,13.860,14.757,14.921,13.972,14.032,14.843,14.415,13.907,13.914,14.878,14.932)
flowrate<-c(rep(seq(1:2),8))
dtime<-c(rep(rep(1:2, each = 2),4))
A<-as.factor(A)
B<-as.factor(B)
flowrate<-as.factor(flowrate)
dtime<-as.factor(dtime)
Att<-data.frame(A,B,response)
#b) Conduct an analysis of variance. Which factors are important?
model<-aov(response~A*B)
summary(model)
#a) estimate the factor effects
SSA<-0.4026
SSB<-1.374
SSAB<-0.317
n<-2^2*4
effA<-(sqrt(SSA*n)*2)/n
effB<-(sqrt(SSB*n)*2)/n
effAB<-(sqrt(SSAB*n)*2)/n
effect<-c(effA,effB,effAB)
source<-c("A","B","AB")
data.frame(source,effect)
#c) Write down a regression equation that could be used to predict epitaxial layer thickness over the region of arsenic flow rate and deposition time used in this experiment.
f1<-lm(response~A*B, data = Att)
summary(f1)$coefficients
#coded factors
effect/2
#residuals
plot(model,(1:2))
#6.21
A<-c(rep(c(-1,1), 56))
B<-c(rep(c(-1,-1,+1,+1), 28))
C<-c(rep(c(-1,-1,-1,-1,+1,+1,+1,+1),14))
D<-c(rep(c(-1,-1,-1,-1,-1,-1,-1,-1,+1,+1,+1,+1,+1,+1,+1,+1),7))
response<-c(10,0,4,0,0,5,6.5,16.5,4.5,19.5,15,41.5,8,21.5,0,18,
18,16.5,6,10,0,20.5,18.5,4.5,18,18,16,39,4.5,10.5,0,5,
14,4.5,1,34,18.5,18,7.5,0,14.5,16,8.5,6.5,6.5,6.5,0,7,
12.5,17.5,14.5,11,19.5,20,6,23.5,10,5.5,0,3.5,10,0,4.5,10,
19,20.5,12,25.5,16,29.5,0,8,0,10,0.5,7,13,15.5,1,32.5,
16,17.5,14,21.5,15,19,10,8,17.5,7,9,8.5,41,24,4,18.5,
18.5,33,5,0,11,10,0,8,6,36,3,36,14,16,6.5,8)
length(response)
dat<-data.frame(A,B,C,D,response)
dat[1:20,]
pmod<-lm(response~A*B*C*D, data = dat)
coef(pmod)
halfnormal(pmod, col = "blue", cex = -1, pch = 16)
golf<-aov(response~A*B*C*D, data = dat)
summary(golf)
golf2<-aov(response~A+B, data = dat)
summary(golf2)
plot(golf2,(1:2))
#log transformation
dat$nresponse<-log(dat$response+1)
golf3<-aov(nresponse~A+B, data = dat)
summary(golf3)
plot(golf3,(1:2))
#6.36
A<-c(rep(c(-1,1), 8))
B<-c(rep(c(-1,-1,+1,+1), 4))
C<-c(rep(c(-1,-1,-1,-1,+1,+1,+1,+1),2))
D<-c(-1,-1,-1,-1,-1,-1,-1,-1,+1,+1,+1,+1,+1,+1,+1,+1)
resistivity<-c(1.92,11.28,1.09,5.75,2.13,9.53,1.03,5.35,1.60,11.73,1.16,4.68,2.16,9.11,1.07,5.30)
#(a) Estimate the factor effects. Plot the effect estimates on a normal probability plot and select a tentative model.
#?DoE.base
dat1<-data.frame(A,B,C,D,resistivity)
head(dat1)
library(DoE.base)
mod<-lm(resistivity~A*B*C*D, data = dat1)
coef(mod)
halfnormal(mod, col = "red", cex = -1, pch = 16)
dat1$AB<-A*B
dat1$ABC<-A*B*C
dat1
#(b) Fit the model identified in part (a) and analyze the residuals. Is there any indication of model inadequacy?
model1<-aov(resistivity~A+B+C+AB+ABC, data = dat1)
summary(model1)
model2<-aov(resistivity~A+B+AB, data = dat1)
summary(model2)
plot(model2)
#(c) Repeat the analysis from parts (a) and (b) using ln (y) as the response variable. Is there an indication that the transformation has been useful?
mod<-lm(log(resistivity)~A*B*C*D, data = dat1)
coef(mod)
halfnormal(mod, col = "red", cex = -1, pch = 16)
lnmodel<-aov(log(resistivity)~A+B, data = dat1)
summary(lnmodel)
plot(lnmodel,(1:2))
#(d) Fit a model in terms of the coded variables that can be used to predict the resistivity.
coef(mod)
intercept<-1.185
SSA<-0.812
SSB<-0.314
effect<-c(intercept,SSA,SSB)
source<-c("intercept","A","B")
data.frame(source,effect)
#6.39
A<-c(rep(c(-1,1), 16))
B<-c(rep(c(-1,-1,+1,+1), 8))
C<-c(rep(c(-1,-1,-1,-1,+1,+1,+1,+1),4))
D<-c(rep(c(-1,-1,-1,-1,-1,-1,-1,-1,+1,+1,+1,+1,+1,+1,+1,+1),2))
E<-c(-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1,+1)
y<-c(8.11,5.56,5.77,5.82,9.17,7.8,3.23,5.69,8.82,14.23,9.2,8.94,8.68,
11.49,6.25,9.12,7.93,5,7.47,12,9.86,3.65,6.4,11.61,12.43,17.55,
8.87,25.38,13.06,18.85,11.78,26.05)
length(y)
#a)Analyze the data from this experiment. Identify the significant factors and interactions.
dat2<-data.frame(A,B,C,D,E,y)
head(dat2)
ymod<-lm(y~A*B*C*D*E, data = dat2)
coef(ymod)
halfnormal(ymod, col = "green", cex = -1, pch = 16)
dat2$AD<-A*D
dat2$AB<-A*B
dat2$DE<-D*E
dat2$BE<-B*E
AE<-A*E
ABE<-A*B*E
ADE<-A*D*E
#(b) Fit the model identified in part (a) and analyze the residuals. Is there any indication of model inadequacy?
ymodel<-aov(y~A+B+D+E+AB+AD+AE+BE+DE+ABE+ADE, data = dat2)
summary(ymodel)
plot(ymodel,(1:2))
#(c)
dat2$BD<-B*D
dat2$ABD<-A*B*D
dat2$BDE<-B*D*E
dat2$ABDE<-A*B*D*E
ymodel2<-aov(y~A+B+D+E+AB+AD+AE+BD+BE+DE+ABD+ABE+ADE+BDE+ABDE, data = dat2)
summary(ymodel2)