RLabB

1a
A suitable design for this data is the Latin Square Design

1b
#suitable model \(Y_{ijk}\)=\(\mu\)+\(\alpha_i\)+\(\beta_j\)+\(\tau_k\)+\(\epsilon_{ijk}\)
#model definition
\(\mu\)= mean of effects of the 5 different ingredients
\(\alpha_i\)=ith batch of new material
\(\beta_j\)=jth day
\(\tau_k\)=effect of kth different ingredients (A,B,C,D,E)
##We assume that for the Latin Square Design, our three factors (ingredients,days,batch) do not interact.

batch <- c(rep("batch1",1), rep("batch2",1), rep("batch3",1), rep("batch4",1), rep("batch5",1))
day <- c(rep("day1",5), rep("day2",5), rep("day3",5), rep("day4",5), rep("day5",5))
ingredients <- c("A","C","B","D","E","B","E","A","C","D","D","A","C","E","B","C","D","E","B","A","E","B","D","A","C")
freq <- c(8,11,4,6,4,7,2,9,8,2,1,7,10,6,3,7,3,1,6,8,3,8,5,10,8)
mydata <- data.frame(batch,day,ingredients,freq)
mydata

matrix(mydata$ingredients,5,5)

##      [,1] [,2] [,3] [,4] [,5]
## [1,] "A"  "B"  "D"  "C"  "E" 
## [2,] "C"  "E"  "A"  "D"  "B" 
## [3,] "B"  "A"  "C"  "E"  "D" 
## [4,] "D"  "C"  "E"  "B"  "A" 
## [5,] "E"  "D"  "B"  "A"  "C"

f.mat <- matrix(mydata$freq,5,5)
f.mat

##      [,1] [,2] [,3] [,4] [,5]
## [1,]    8    7    1    7    3
## [2,]   11    2    7    3    8
## [3,]    4    9   10    1    5
## [4,]    6    8    6    6   10
## [5,]    4    2    3    8    8

Y... <- mean(f.mat)

#Yi..
Yi.. <- apply(f.mat,1,sum)
Yi..

## [1] 26 31 29 36 25

ai <- Yi../5
ai

## [1] 5.2 6.2 5.8 7.2 5.0

a1 <- 5.2-Y...
a1

## [1] -0.68

a2 <- 6.2-Y...
a2

## [1] 0.32

a3 <- 5.8-Y...
a3

## [1] -0.08

a4 <- 7.2-Y...
a4

## [1] 1.32

a5 <- 5-Y...
a5

## [1] -0.88

#Y.j.
Y.j. <- apply(f.mat, 2,sum)
Y.j.

## [1] 33 28 27 25 34

Bi <- Y.j./5
Bi

## [1] 6.6 5.6 5.4 5.0 6.8

b1 <- 6.6-Y...
b1

## [1] 0.72

b2 <- 5.6-Y...
b2

## [1] -0.28

b3 <- 5.4-Y...
b3

## [1] -0.48

b4 <- 5-Y...
b4

## [1] -0.88

b5 <- 6.8-Y...
b5

## [1] 0.92

y..1 <- (8+9+7+8+10)/5
y..2 <- (7+4+3+6+8)/5
y..3 <- (11+8+10+7+8)/5
y..4 <- (6+2+1+3+5)/5
y..5 <- (4+2+6+1+3)/5
t1 <- 8.4-Y...
t1

## [1] 2.52

t2 <- 5.6-Y...
t2

## [1] -0.28

t3 <- 8.8-Y...
t3

## [1] 2.92

t4 <- 3.4-Y...
t4

## [1] -2.48

t5 <- 3.2-Y...
t5

## [1] -2.68

The parameter \(\hat{\mu}\) is 5.88, \(\alpha_1\) is-0.68, \(\alpha_2\) is 0.32, \(\alpha_3\) is -0.08, \(\alpha_4\) is 1.32, \(\alpha_5\) is -0.88, \(\beta_1\) is 0.72, \(\beta_2\) is -0.28, \(\beta_3\) is -0.48, \(\beta_4\) is -0.88, \(\beta_5\) is 0.92, \(\tau_1\) is 2.52, \(\tau_2\) is -0.28, \(\tau_3\) is 2.92, \(\tau_4\) is -2.48, \(\tau_5\) is -2.68, and \(e_{ijk}\) is 178.96.

myfit <- lm(freq~batch+day+ingredients, mydata)
anova(myfit)

anova(myfit)

\(H_{0A}\):\(\alpha_i\)=0 \(H_{0B}\):\(\beta_j\)=0 \(H_{0T}\):\(\tau_k\)=0 From the p-value of batch and day which is less than \(\alpha\)=0.05, we fail to reject the null hypotheses, therefore, the number of runs made in a day and the different batches of material are not significant. Since the p-value of ingredients is less than 0.05, we reject the null and conclude that there is significant effect of ingredients.

library(readr)
memory <- read_table2("/Users/beckysu/Desktop/memory.txt")

## Parsed with column specification:
## cols(
##   wordtype = col_integer(),
##   distract = col_integer(),
##   trtmt = col_integer(),
##   y = col_integer(),
##   z = col_double()
## )

#linearity
memory.lm <- lm(y~trtmt, data=memory)
m.resid <- memory.lm$residuals
m.fitted <- memory.lm$fitted.values
plot(m.resid~m.fitted, xlab="Fitted Values", ylab="Residuals", main="Residuals vs Fits Plot")

plot(memory.lm, which=1)

#equal variance
memory.lm <- lm(y~trtmt, data=memory)
m.resid <- memory.lm$residuals
m.fitted <- memory.lm$fitted.values
plot(m.resid~m.fitted, xlab="Fitted Values", ylab="Residuals", main="Residuals vs Fits Plot")
abline(h=0, lty=2)

#normality
yhat <- fitted(memory.lm)
e <- trtmt~yhat
plot(memory.lm, which=2)

#variance stabilizing transformation
memory.aov <- aov(y~trtmt,data=memory)
yij <- tapply(memory$y, memory$trtmt, mean)
sij <- tapply(memory$y, memory$trtmt, var)
plot(log(yij),log(sij))
lin <- lm(log(sij)~log(yij))
lin

## 
## Call:
## lm(formula = log(sij) ~ log(yij))
## 
## Coefficients:
## (Intercept)     log(yij)  
##      -6.993        3.372

abline(a=lin$coefficients[1],b=lin$coefficients[2])

q <- lin$coefficients[2]
q

## log(yij) 
## 3.372173

#recheck assumptions
y.new <- (memory$y)^(1-q/2)
aov.new <- aov(y.new~memory$trtmt)
plot(aov.new$fitted.values,aov.new$residuals, xlab="Fitted Values",ylab = "Residuals")

A <- c("-","+","-","+","-","+","-","+","-","+","-","+","-","+","-","+")
B <- c("-","-","+","+","-","-","+","+","-","-","+","+","-","-","+","+")
AB <- c("+","-","-","+","+","-","-","+","+","-","-","+","+","-","-","+")
C <- c("-","-","-","-","+","+","+","+","-","-","-","-","+","+","+","+")
AC <- c("+","-","+","-","-","+","-","+","+","-","+","-","-","+","-","+")
BC <- c("+","+","-","-","-","-","+","+","+","+","-","-","-","-","+","+")
ABC <- c("-","+","+","-","+","-","-","+","-","+","+","-","+","-","-","+")
D <- c("-","-","-","-","-","-","-","-","+","+","+","+","+","+","+","+")
AD <- c("+","-","+","-","+","-","+","-","-","+","-","+","-","+","-","+")
BD <- c("+","+","-","-","+","+","-","-","-","-","+","+","-","-","+","+")
ABD <- c("-","+","+","-","-","+","+","-","+","-","-","+","+","-","-","+")
CD <- c("+","+","+","+","-","-","-","-","-","-","-","-","+","+","+","+")
ACD <- c("-","+","-","+","-","+","+","-","+","-","-","+","+","-","-","+")
BCD <- c("-","-","+","+","+","+","-","-","+","+","-","-","-","-","+","+")
ABCD <- c("+","-","-","+","-","+","+","-","-","+","+","-","+","-","-","+")

mydata2 <- data.frame(A,B,AB,C,AC,BC,ABC,D,AD,BD,ABD,CD,ACD,BCD,ABCD)
mydata2

A <- c("-","+","-","+","-","+","-","+","-","+","-","+","-","+","-","+")
B <- c("-","-","+","+","-","-","+","+","-","-","+","+","-","-","+","+")
C <- c("-","-","-","-","+","+","+","+","-","-","-","-","+","+","+","+")
D <- c("-","-","-","-","-","-","-","-","+","+","+","+","+","+","+","+")
TreatmentComb <- c("(1)","a","b","ab","c","ac","bc","abc","d","ad","bd","abd","cd","acd","bcd","abcd")
I <- c(7.037,14.707,11.635,17.273,10.403,4.368,9.360,13.440,8.561,16.867,13.876,19.824,11.846,6.125,11.190,15.653)
II <- c(6.376,15.219,12.089,17.815,10.151,4.098,9.253,12.923,8.951,17.052,13.658,19.639,12.337,5.904,10.935,15.053)
y <- c(7.037,14.707,11.635,17.273,10.403,4.368,9.360,13.440,8.561,16.867,13.876,19.824,11.846,6.125,11.190,15.653,6.376,15.219,12.089,17.815,10.151,4.098,9.253,12.923,8.951,17.052,13.658,19.639,12.337,5.904,10.935,15.053)
Total <- I+II
Block <- c(rep(1,16), rep(2,16))
trtdata <- data.frame(A,B,C,D,TreatmentComb,y,Block)
trtdata

#total factor effects [X]
TreatmentComb <- c("(1)","a","b","ab","c","ac","bc","abc","d","ad","bd","abd","cd","acd","bcd","abcd")
tfA <- (-13.413+29.926-23.724+35.088-20.554+8.466-18.613+26.363-17.512+33.919-27.534+39.463-24.183+12.029-22.125+30.706)
tfB <- (-13.413-29.926+23.724+35.088-20.554-8.466+18.613+26.363-17.512-33.919+27.534+39.463-24.183-12.029+22.125+30.706)
tfAB <- (13.413-29.926-23.724+35.088+20.554-8.466-18.613+26.363+17.512-33.919-27.534+39.463+24.183-12.029-22.125+30.706)
tfC <- (-13.413-29.926-23.724-35.088+20.554+8.466+18.613+26.363-17.512-33.919-27.534-39.463+24.183+12.029+22.125+30.706)
tfAC <- (13.413-29.926+23.724-35.088-20.554+8.466-18.613+26.363+17.512-33.919+27.534-39.463-24.183+12.029-22.125+30.706)
tfBC <- (13.413+29.926-23.724-35.088-20.554-8.466+18.613+26.363+17.512+33.919-27.534-39.463-24.183-12.029+22.125+30.706)
tfABC <- (-13.413+29.926+23.724-35.088+20.554-8.466-18.613+26.363-17.512+33.919+27.534-39.463+24.183-12.029-22.125+30.706)
tfD <- (-13.413-29.926-23.724-35.088-20.554-8.466-18.613-26.363+17.512+33.919+27.534+39.463+24.183+12.029+22.125+30.706)
tfAD <- (13.413-29.926+23.724-35.088+20.554-8.466+18.613-26.363-17.512+33.919-27.534+39.463-24.183+12.029-22.125+30.706)
tfBD <- (13.413+29.926-23.724-35.088+20.554+8.466-18.613-26.363-17.512-33.919+27.534+39.463-24.183-12.029+22.125+30.706)
tfABD <- (-13.413+29.926+23.724-35.088-20.554+8.466+18.613-26.363+17.512-33.919-27.534+39.463+24.183-12.029-22.125+30.706)
tfCD <- (13.413+29.926+23.724+35.088-20.554-8.466-18.613-26.363-17.512-33.919-27.534-39.463+24.183+12.029+22.125+30.706)
tfACD <- (-13.413+29.926-23.724+35.088-20.554+8.466+18.613-26.363+17.512-33.919-27.534+39.463+24.183-12.029-22.125+30.706)
tfBCD <- (-13.413-29.926+23.724+35.088+20.554+8.466-18.613-26.363+17.512+33.919-27.534-39.463-24.183-12.029+22.125+30.706)
tfABCD <- (13.413-29.926-23.724+35.088-20.554+8.466+18.613-26.363-17.512+33.919+27.534-39.463+24.183-12.029-22.125+30.706)
tfe <- rbind(tfA,tfB,tfAB,tfC,tfAC,tfBC,tfABC,tfD,tfAD,tfBD,tfABD,tfCD,tfACD,tfBCD,tfABCD)
tfe

##           [,1]
## tfA     48.302
## tfB     63.614
## tfAB    30.946
## tfC    -57.540
## tfAC   -64.124
## tfBC     1.536
## tfABC   50.200
## tfD     31.324
## tfAD     1.224
## tfBD     0.756
## tfABD    1.568
## tfCD    -1.230
## tfACD   24.296
## tfBCD    0.570
## tfABCD   0.226

#main effects (X)
meA <- (1/8*2)*tfA
meB <- (1/8*2)*tfB
meAB <- (1/8*2)*tfAB
meC <- (1/8*2)*tfC
meAC <- (1/8*2)*tfAC
meBC <- (1/8*2)*tfBC
meABC <- (1/8*2)*tfABC
meD <- (1/8*2)*tfD
meAD <- (1/8*2)*tfAD
meBD <- (1/8*2)*tfBD
meABD <- (1/8*2)*tfABD
meCD <- (1/8*2)*tfCD
meACD <- (1/8*2)*tfACD
meBCD <- (1/8*2)*tfBCD
meABCD <- (1/8*2)*tfABCD
me <- rbind(meA,meB,meAB,meC,meAC,meBC,meABC,meD,meAD,meBD,meABD,meCD,meACD,meBCD,meABCD)
facefs <- data.frame(tfe,me)
facefs

red <- trtdata[1:16,]
red

red.aov <- anova(aov(lm(y~Block+A*B*C*D, data=red)))

## Warning in anova.lm(aov(lm(y ~ Block + A * B * C * D, data = red))): ANOVA
## F-tests on an essentially perfect fit are unreliable

red.aov

trtdata.aov <-anova(aov(lm(y~Block+A*B*C*D, data=trtdata)))
trtdata.aov

3d
Based on the ANOVA table and factor effect estimates A,B,C,D,AB,AC,and ABC have significantly high (absolute magnitude) factor effects and the SS values are also significant.

trt.lm <- lm(y~Block+A*B*C*D, data=trtdata)
trt.resid <- trt.lm$residuals
trt.fitted <- trt.lm$fitted.values
plot(trt.resid~trt.fitted, xlab="Fitted Values", ylab="Residuals", main="Residuals vs Fits Plot")
abline(h=0, lty=2)

From the residuals vs fits plot, we can see that the errors are normally distributed.

run <- c(1:16)
A <- c(rep(c(-1,1),4))
B <- c(rep(c(-1,-1,1,1),2))
C <- c(rep(c(-1,1),each=4))
D <- c(rep(c(-1,1),each=8))
rep1 <- c(7.037,14.707,11.635,17.273,10.403,4.368,9.360,13.440,8.561,16.867,13.876,19.824,11.846,6.125,11.190,15.653)
rep2 <- c(6.376,15.219,12.089,17.815,10.151,4.098,9.253,12.923,8.951,17.052,13.658,19.639,12.337,5.904,10.935,15.053)
labels <- c("(1)","a","b","ab","c","ac","bc","abc","d","ad","bd","abd","cd","acd","bcd","abcd")
rep<-rep(c("rep1","rep2"),c(8,8))
total <- rep1+rep2
data <- as.data.frame(cbind(run,A,B,C,D,rep1,rep2,labels,total))
data

RLabB

Becky Su

3/2/2019