1a
A suitable design for this data is the Latin Square Design
1b
#suitable model \(Y_{ijk}\)=\(\mu\)+\(\alpha_i\)+\(\beta_j\)+\(\tau_k\)+\(\epsilon_{ijk}\)
#model definition
\(\mu\)= mean of effects of the 5 different ingredients
\(\alpha_i\)=ith batch of new material
\(\beta_j\)=jth day
\(\tau_k\)=effect of kth different ingredients (A,B,C,D,E)
##We assume that for the Latin Square Design, our three factors (ingredients,days,batch) do not interact.
1c
batch <- c(rep("batch1",1), rep("batch2",1), rep("batch3",1), rep("batch4",1), rep("batch5",1))
day <- c(rep("day1",5), rep("day2",5), rep("day3",5), rep("day4",5), rep("day5",5))
ingredients <- c("A","C","B","D","E","B","E","A","C","D","D","A","C","E","B","C","D","E","B","A","E","B","D","A","C")
freq <- c(8,11,4,6,4,7,2,9,8,2,1,7,10,6,3,7,3,1,6,8,3,8,5,10,8)
mydata <- data.frame(batch,day,ingredients,freq)
mydata
matrix(mydata$ingredients,5,5)
## [,1] [,2] [,3] [,4] [,5]
## [1,] "A" "B" "D" "C" "E"
## [2,] "C" "E" "A" "D" "B"
## [3,] "B" "A" "C" "E" "D"
## [4,] "D" "C" "E" "B" "A"
## [5,] "E" "D" "B" "A" "C"
f.mat <- matrix(mydata$freq,5,5)
f.mat
## [,1] [,2] [,3] [,4] [,5]
## [1,] 8 7 1 7 3
## [2,] 11 2 7 3 8
## [3,] 4 9 10 1 5
## [4,] 6 8 6 6 10
## [5,] 4 2 3 8 8
Y... <- mean(f.mat)
#Yi..
Yi.. <- apply(f.mat,1,sum)
Yi..
## [1] 26 31 29 36 25
ai <- Yi../5
ai
## [1] 5.2 6.2 5.8 7.2 5.0
a1 <- 5.2-Y...
a1
## [1] -0.68
a2 <- 6.2-Y...
a2
## [1] 0.32
a3 <- 5.8-Y...
a3
## [1] -0.08
a4 <- 7.2-Y...
a4
## [1] 1.32
a5 <- 5-Y...
a5
## [1] -0.88
#Y.j.
Y.j. <- apply(f.mat, 2,sum)
Y.j.
## [1] 33 28 27 25 34
Bi <- Y.j./5
Bi
## [1] 6.6 5.6 5.4 5.0 6.8
b1 <- 6.6-Y...
b1
## [1] 0.72
b2 <- 5.6-Y...
b2
## [1] -0.28
b3 <- 5.4-Y...
b3
## [1] -0.48
b4 <- 5-Y...
b4
## [1] -0.88
b5 <- 6.8-Y...
b5
## [1] 0.92
y..1 <- (8+9+7+8+10)/5
y..2 <- (7+4+3+6+8)/5
y..3 <- (11+8+10+7+8)/5
y..4 <- (6+2+1+3+5)/5
y..5 <- (4+2+6+1+3)/5
t1 <- 8.4-Y...
t1
## [1] 2.52
t2 <- 5.6-Y...
t2
## [1] -0.28
t3 <- 8.8-Y...
t3
## [1] 2.92
t4 <- 3.4-Y...
t4
## [1] -2.48
t5 <- 3.2-Y...
t5
## [1] -2.68
The parameter \(\hat{\mu}\) is 5.88, \(\alpha_1\) is-0.68, \(\alpha_2\) is 0.32, \(\alpha_3\) is -0.08, \(\alpha_4\) is 1.32, \(\alpha_5\) is -0.88, \(\beta_1\) is 0.72, \(\beta_2\) is -0.28, \(\beta_3\) is -0.48, \(\beta_4\) is -0.88, \(\beta_5\) is 0.92, \(\tau_1\) is 2.52, \(\tau_2\) is -0.28, \(\tau_3\) is 2.92, \(\tau_4\) is -2.48, \(\tau_5\) is -2.68, and \(e_{ijk}\) is 178.96.
1d
myfit <- lm(freq~batch+day+ingredients, mydata)
anova(myfit)
1e
anova(myfit)
\(H_{0A}\):\(\alpha_i\)=0 \(H_{0B}\):\(\beta_j\)=0 \(H_{0T}\):\(\tau_k\)=0 From the p-value of batch and day which is less than \(\alpha\)=0.05, we fail to reject the null hypotheses, therefore, the number of runs made in a day and the different batches of material are not significant. Since the p-value of ingredients is less than 0.05, we reject the null and conclude that there is significant effect of ingredients.
2a
library(readr)
memory <- read_table2("/Users/beckysu/Desktop/memory.txt")
## Parsed with column specification:
## cols(
## wordtype = col_integer(),
## distract = col_integer(),
## trtmt = col_integer(),
## y = col_integer(),
## z = col_double()
## )
#linearity
memory.lm <- lm(y~trtmt, data=memory)
m.resid <- memory.lm$residuals
m.fitted <- memory.lm$fitted.values
plot(m.resid~m.fitted, xlab="Fitted Values", ylab="Residuals", main="Residuals vs Fits Plot")
plot(memory.lm, which=1)
#equal variance
memory.lm <- lm(y~trtmt, data=memory)
m.resid <- memory.lm$residuals
m.fitted <- memory.lm$fitted.values
plot(m.resid~m.fitted, xlab="Fitted Values", ylab="Residuals", main="Residuals vs Fits Plot")
abline(h=0, lty=2)
#normality
yhat <- fitted(memory.lm)
e <- trtmt~yhat
plot(memory.lm, which=2)
2b
#variance stabilizing transformation
memory.aov <- aov(y~trtmt,data=memory)
yij <- tapply(memory$y, memory$trtmt, mean)
sij <- tapply(memory$y, memory$trtmt, var)
plot(log(yij),log(sij))
lin <- lm(log(sij)~log(yij))
lin
##
## Call:
## lm(formula = log(sij) ~ log(yij))
##
## Coefficients:
## (Intercept) log(yij)
## -6.993 3.372
abline(a=lin$coefficients[1],b=lin$coefficients[2])
q <- lin$coefficients[2]
q
## log(yij)
## 3.372173
#recheck assumptions
y.new <- (memory$y)^(1-q/2)
aov.new <- aov(y.new~memory$trtmt)
plot(aov.new$fitted.values,aov.new$residuals, xlab="Fitted Values",ylab = "Residuals")
3a
A <- c("-","+","-","+","-","+","-","+","-","+","-","+","-","+","-","+")
B <- c("-","-","+","+","-","-","+","+","-","-","+","+","-","-","+","+")
AB <- c("+","-","-","+","+","-","-","+","+","-","-","+","+","-","-","+")
C <- c("-","-","-","-","+","+","+","+","-","-","-","-","+","+","+","+")
AC <- c("+","-","+","-","-","+","-","+","+","-","+","-","-","+","-","+")
BC <- c("+","+","-","-","-","-","+","+","+","+","-","-","-","-","+","+")
ABC <- c("-","+","+","-","+","-","-","+","-","+","+","-","+","-","-","+")
D <- c("-","-","-","-","-","-","-","-","+","+","+","+","+","+","+","+")
AD <- c("+","-","+","-","+","-","+","-","-","+","-","+","-","+","-","+")
BD <- c("+","+","-","-","+","+","-","-","-","-","+","+","-","-","+","+")
ABD <- c("-","+","+","-","-","+","+","-","+","-","-","+","+","-","-","+")
CD <- c("+","+","+","+","-","-","-","-","-","-","-","-","+","+","+","+")
ACD <- c("-","+","-","+","-","+","+","-","+","-","-","+","+","-","-","+")
BCD <- c("-","-","+","+","+","+","-","-","+","+","-","-","-","-","+","+")
ABCD <- c("+","-","-","+","-","+","+","-","-","+","+","-","+","-","-","+")
mydata2 <- data.frame(A,B,AB,C,AC,BC,ABC,D,AD,BD,ABD,CD,ACD,BCD,ABCD)
mydata2
3b
A <- c("-","+","-","+","-","+","-","+","-","+","-","+","-","+","-","+")
B <- c("-","-","+","+","-","-","+","+","-","-","+","+","-","-","+","+")
C <- c("-","-","-","-","+","+","+","+","-","-","-","-","+","+","+","+")
D <- c("-","-","-","-","-","-","-","-","+","+","+","+","+","+","+","+")
TreatmentComb <- c("(1)","a","b","ab","c","ac","bc","abc","d","ad","bd","abd","cd","acd","bcd","abcd")
I <- c(7.037,14.707,11.635,17.273,10.403,4.368,9.360,13.440,8.561,16.867,13.876,19.824,11.846,6.125,11.190,15.653)
II <- c(6.376,15.219,12.089,17.815,10.151,4.098,9.253,12.923,8.951,17.052,13.658,19.639,12.337,5.904,10.935,15.053)
y <- c(7.037,14.707,11.635,17.273,10.403,4.368,9.360,13.440,8.561,16.867,13.876,19.824,11.846,6.125,11.190,15.653,6.376,15.219,12.089,17.815,10.151,4.098,9.253,12.923,8.951,17.052,13.658,19.639,12.337,5.904,10.935,15.053)
Total <- I+II
Block <- c(rep(1,16), rep(2,16))
trtdata <- data.frame(A,B,C,D,TreatmentComb,y,Block)
trtdata
#total factor effects [X]
TreatmentComb <- c("(1)","a","b","ab","c","ac","bc","abc","d","ad","bd","abd","cd","acd","bcd","abcd")
tfA <- (-13.413+29.926-23.724+35.088-20.554+8.466-18.613+26.363-17.512+33.919-27.534+39.463-24.183+12.029-22.125+30.706)
tfB <- (-13.413-29.926+23.724+35.088-20.554-8.466+18.613+26.363-17.512-33.919+27.534+39.463-24.183-12.029+22.125+30.706)
tfAB <- (13.413-29.926-23.724+35.088+20.554-8.466-18.613+26.363+17.512-33.919-27.534+39.463+24.183-12.029-22.125+30.706)
tfC <- (-13.413-29.926-23.724-35.088+20.554+8.466+18.613+26.363-17.512-33.919-27.534-39.463+24.183+12.029+22.125+30.706)
tfAC <- (13.413-29.926+23.724-35.088-20.554+8.466-18.613+26.363+17.512-33.919+27.534-39.463-24.183+12.029-22.125+30.706)
tfBC <- (13.413+29.926-23.724-35.088-20.554-8.466+18.613+26.363+17.512+33.919-27.534-39.463-24.183-12.029+22.125+30.706)
tfABC <- (-13.413+29.926+23.724-35.088+20.554-8.466-18.613+26.363-17.512+33.919+27.534-39.463+24.183-12.029-22.125+30.706)
tfD <- (-13.413-29.926-23.724-35.088-20.554-8.466-18.613-26.363+17.512+33.919+27.534+39.463+24.183+12.029+22.125+30.706)
tfAD <- (13.413-29.926+23.724-35.088+20.554-8.466+18.613-26.363-17.512+33.919-27.534+39.463-24.183+12.029-22.125+30.706)
tfBD <- (13.413+29.926-23.724-35.088+20.554+8.466-18.613-26.363-17.512-33.919+27.534+39.463-24.183-12.029+22.125+30.706)
tfABD <- (-13.413+29.926+23.724-35.088-20.554+8.466+18.613-26.363+17.512-33.919-27.534+39.463+24.183-12.029-22.125+30.706)
tfCD <- (13.413+29.926+23.724+35.088-20.554-8.466-18.613-26.363-17.512-33.919-27.534-39.463+24.183+12.029+22.125+30.706)
tfACD <- (-13.413+29.926-23.724+35.088-20.554+8.466+18.613-26.363+17.512-33.919-27.534+39.463+24.183-12.029-22.125+30.706)
tfBCD <- (-13.413-29.926+23.724+35.088+20.554+8.466-18.613-26.363+17.512+33.919-27.534-39.463-24.183-12.029+22.125+30.706)
tfABCD <- (13.413-29.926-23.724+35.088-20.554+8.466+18.613-26.363-17.512+33.919+27.534-39.463+24.183-12.029-22.125+30.706)
tfe <- rbind(tfA,tfB,tfAB,tfC,tfAC,tfBC,tfABC,tfD,tfAD,tfBD,tfABD,tfCD,tfACD,tfBCD,tfABCD)
tfe
## [,1]
## tfA 48.302
## tfB 63.614
## tfAB 30.946
## tfC -57.540
## tfAC -64.124
## tfBC 1.536
## tfABC 50.200
## tfD 31.324
## tfAD 1.224
## tfBD 0.756
## tfABD 1.568
## tfCD -1.230
## tfACD 24.296
## tfBCD 0.570
## tfABCD 0.226
#main effects (X)
meA <- (1/8*2)*tfA
meB <- (1/8*2)*tfB
meAB <- (1/8*2)*tfAB
meC <- (1/8*2)*tfC
meAC <- (1/8*2)*tfAC
meBC <- (1/8*2)*tfBC
meABC <- (1/8*2)*tfABC
meD <- (1/8*2)*tfD
meAD <- (1/8*2)*tfAD
meBD <- (1/8*2)*tfBD
meABD <- (1/8*2)*tfABD
meCD <- (1/8*2)*tfCD
meACD <- (1/8*2)*tfACD
meBCD <- (1/8*2)*tfBCD
meABCD <- (1/8*2)*tfABCD
me <- rbind(meA,meB,meAB,meC,meAC,meBC,meABC,meD,meAD,meBD,meABD,meCD,meACD,meBCD,meABCD)
facefs <- data.frame(tfe,me)
facefs
3c
red <- trtdata[1:16,]
red
red.aov <- anova(aov(lm(y~Block+A*B*C*D, data=red)))
## Warning in anova.lm(aov(lm(y ~ Block + A * B * C * D, data = red))): ANOVA
## F-tests on an essentially perfect fit are unreliable
red.aov
trtdata.aov <-anova(aov(lm(y~Block+A*B*C*D, data=trtdata)))
trtdata.aov
3d
Based on the ANOVA table and factor effect estimates A,B,C,D,AB,AC,and ABC have significantly high (absolute magnitude) factor effects and the SS values are also significant.
3e
trt.lm <- lm(y~Block+A*B*C*D, data=trtdata)
trt.resid <- trt.lm$residuals
trt.fitted <- trt.lm$fitted.values
plot(trt.resid~trt.fitted, xlab="Fitted Values", ylab="Residuals", main="Residuals vs Fits Plot")
abline(h=0, lty=2)
From the residuals vs fits plot, we can see that the errors are normally distributed.
4a
run <- c(1:16)
A <- c(rep(c(-1,1),4))
B <- c(rep(c(-1,-1,1,1),2))
C <- c(rep(c(-1,1),each=4))
D <- c(rep(c(-1,1),each=8))
rep1 <- c(7.037,14.707,11.635,17.273,10.403,4.368,9.360,13.440,8.561,16.867,13.876,19.824,11.846,6.125,11.190,15.653)
rep2 <- c(6.376,15.219,12.089,17.815,10.151,4.098,9.253,12.923,8.951,17.052,13.658,19.639,12.337,5.904,10.935,15.053)
labels <- c("(1)","a","b","ab","c","ac","bc","abc","d","ad","bd","abd","cd","acd","bcd","abcd")
rep<-rep(c("rep1","rep2"),c(8,8))
total <- rep1+rep2
data <- as.data.frame(cbind(run,A,B,C,D,rep1,rep2,labels,total))
data