#Homework 11
#6.8
Time<-c(rep(12,6),rep(18,6),rep(12,6),rep(18,6))
Cm<-c(rep(1,6),rep(1,6),rep(2,6),rep(2,6))
obs_1<-c(21,22,
23,28,
20,26,
37,39,
38,38,
35,36,
25,26,
24,25,
29,27,
31,34,
29,33,
30,35)
Time<-as.factor(Time)
Cm<-as.factor(Cm)
dat_1<-data.frame(Time,Cm,obs_1)
dat_1
## Time Cm obs_1
## 1 12 1 21
## 2 12 1 22
## 3 12 1 23
## 4 12 1 28
## 5 12 1 20
## 6 12 1 26
## 7 18 1 37
## 8 18 1 39
## 9 18 1 38
## 10 18 1 38
## 11 18 1 35
## 12 18 1 36
## 13 12 2 25
## 14 12 2 26
## 15 12 2 24
## 16 12 2 25
## 17 12 2 29
## 18 12 2 27
## 19 18 2 31
## 20 18 2 34
## 21 18 2 29
## 22 18 2 33
## 23 18 2 30
## 24 18 2 35
model_1 <- aov(obs_1~Time+Cm+Time*Cm, data = dat_1)
summary(model_1)
## Df Sum Sq Mean Sq F value Pr(>F)
## Time 1 590.0 590.0 115.506 9.29e-10 ***
## Cm 1 9.4 9.4 1.835 0.190617
## Time:Cm 1 92.0 92.0 18.018 0.000397 ***
## Residuals 20 102.2 5.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(model_1,1)
plot(model_1,2)
The factor Time and the interaction between Time and Culture Medium are significant model terms.
The residual plots shows a fairly normal distribution and the residuals looks to be constant.
#6.12
A<-c(-1,-1,-1,-1,1,1,1,1,-1,-1,-1,-1,1,1,1,1)
B<-c(-1,-1,-1,-1,-1,-1,-1,-1,1,1,1,1,1,1,1,1)
obs<-c(14.037,16.165,13.972,13.907,
13.880,13.860,14.032,13.914,
14.821,14.757,14.843,14.878,
14.888,14.921,14.415,14.932)
A<-as.factor(A)
B<-as.factor(B)
dat<-data.frame(A,B,obs)
dat
## A B obs
## 1 -1 -1 14.037
## 2 -1 -1 16.165
## 3 -1 -1 13.972
## 4 -1 -1 13.907
## 5 1 -1 13.880
## 6 1 -1 13.860
## 7 1 -1 14.032
## 8 1 -1 13.914
## 9 -1 1 14.821
## 10 -1 1 14.757
## 11 -1 1 14.843
## 12 -1 1 14.878
## 13 1 1 14.888
## 14 1 1 14.921
## 15 1 1 14.415
## 16 1 1 14.932
mod<-lm(obs~A*B,data=dat) #It already gives A, B and C separately
coef(mod)
## (Intercept) A1 B1 A1:B1
## 14.52025 -0.59875 0.30450 0.56300
model<-aov(obs~A+B+A*B,data = dat)
summary(model)
## Df Sum Sq Mean Sq F value Pr(>F)
## A 1 0.403 0.4026 1.262 0.2833
## B 1 1.374 1.3736 4.305 0.0602 .
## A:B 1 0.317 0.3170 0.994 0.3386
## Residuals 12 3.828 0.3190
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(model,1)
plot(model,2)
Observation #2 falls outside the groupings in the normal probability plot and the plot of residual versus predicted.
One approach to deal with the outlier is to replace it with the average of the observations from that group.
obs <- c(10,0,4,0,0,5,6.5,16.5,4.5,19.5,15,41.5,8,21.5,0,18,18,16.5,6,10,0,20.5,18.5,4.5,18,18,16,39,4.5,10.5,0,5,14,4.5,1,34,18.5,18,7.5,0,14.5,16,8.5,6.5,6.5,6.5,0,7,12.5,17.5,14.5,11,19.5,20,6,23.5,10,5.5,0,3.5,10,0,4.5,10,19,20.5,12,25.5,16,29.5,0,8,0,10,0.5,7,13,15.5,1,32.5,16,17.5,14,21.5,15,19,10,8,17.5,7,9,8.5,41,24,4,18.5,18.5,33,5,0,11,10,0,8,6,36,3,36,14,16,6.5,8)
A <- c(-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1)
B <- c(rep(1,2),rep(-1,2),rep(1,2),rep(-1,2),rep(1,2),rep(-1,2),rep(1,2),rep(-1,2))
c <- c(rep(1,4),rep(-1,4),rep(1,4),rep(-1,4))
d <- c(rep(1,8),rep(-1,8))
dat <- data.frame(A,B,c,d,obs)
model <- aov(obs~A*B*c*d,data = dat)
summary(model)
## Df Sum Sq Mean Sq F value Pr(>F)
## A 1 917 917.1 10.588 0.00157 **
## B 1 388 388.1 4.481 0.03686 *
## c 1 145 145.1 1.676 0.19862
## d 1 1 1.4 0.016 0.89928
## A:B 1 219 218.7 2.525 0.11538
## A:c 1 12 11.9 0.137 0.71178
## B:c 1 115 115.0 1.328 0.25205
## A:d 1 94 93.8 1.083 0.30066
## B:d 1 56 56.4 0.651 0.42159
## c:d 1 2 1.6 0.019 0.89127
## A:B:c 1 7 7.3 0.084 0.77294
## A:B:d 1 113 113.0 1.305 0.25623
## A:c:d 1 39 39.5 0.456 0.50121
## B:c:d 1 34 33.8 0.390 0.53386
## A:B:c:d 1 96 95.6 1.104 0.29599
## Residuals 96 8316 86.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
As evident in the ANOVA results, only length of the put and the type of putter have a significant effect (\(\alpha=0.05\)).
plot(model,1)
From this plot we can see that the assumption of constant variance may not be satisfied here.
plot(model,2)
From this plot it seems that the assumption of normality may be violated too.
obs1 <- c(1.92,11.28,1.09,5.75,2.13,9.53,1.03,5.35,1.60,11.73,1.16,4.68,2.16,9.11,1.07,5.30)
A1 <- c(-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1)
B1 <- c(rep(-1,2),rep(1,2),rep(-1,2),rep(1,2),rep(-1,2),rep(1,2),rep(-1,2),rep(1,2))
C1 <- c(rep(-1,4),rep(1,4),rep(-1,4),rep(1,4))
D1 <- c(rep(-1,8),rep(1,8))
dat1 <- data.frame(A1,B1,C1,D1,obs1)
library(DoE.base)
## Warning: package 'DoE.base' was built under R version 4.2.2
## Loading required package: grid
## Loading required package: conf.design
## Registered S3 method overwritten by 'DoE.base':
## method from
## factorize.factor conf.design
##
## Attaching package: 'DoE.base'
## The following objects are masked from 'package:stats':
##
## aov, lm
## The following object is masked from 'package:graphics':
##
## plot.design
## The following object is masked from 'package:base':
##
## lengths
model1 <- lm(obs1~A1*B1*C1*D1,data = dat1)
coef(model1)
## (Intercept) A1 B1 C1 D1 A1:B1
## 4.680625 3.160625 -1.501875 -0.220625 -0.079375 -1.069375
## A1:C1 B1:C1 A1:D1 B1:D1 C1:D1 A1:B1:C1
## -0.298125 0.229375 -0.056875 -0.046875 0.029375 0.344375
## A1:B1:D1 A1:C1:D1 B1:C1:D1 A1:B1:C1:D1
## -0.096875 -0.010625 0.094375 0.141875
halfnormal(model1)
##
## Significant effects (alpha=0.05, Lenth method):
## [1] A1 B1 A1:B1 A1:B1:C1
qqnorm(coef(model1))
qqline(coef(model1))
The effects of each factor has been shown in the table. Furthermore, from the half-normal and normality plot we can guess that factors A and B may be significant.
aov_mod <- aov(obs1~A1*B1,data = dat1)
plot(aov_mod)
## hat values (leverages) are all = 0.25
## and there are no factor predictors; no plot no. 5
After dropping the factors D and C, these plots suggest that neither the assumption of normality nor assumption of constant variance may be satisfied here.
obs_ln <- log(obs1)
mod <- lm(obs_ln~A1*B1*C1)
coef(mod)
## (Intercept) A1 B1 C1 A1:B1 A1:C1
## 1.185417116 0.812870345 -0.314277554 -0.006408558 -0.024684570 -0.039723700
## B1:C1 A1:B1:C1
## -0.004225796 0.063434408
halfnormal(mod)
##
## Significant effects (alpha=0.05, Lenth method):
## [1] A1 B1 A1:B1:C1 A1:C1 e1 e3 A1:B1
qqnorm(coef(mod))
qqline(coef(mod))
Since these results suggest that the A and B interaction is no longer significant, we can make a simpler model.
aov_mod <- aov(obs_ln~A1+B1)
summary(aov_mod)
## Df Sum Sq Mean Sq F value Pr(>F)
## A1 1 10.572 10.572 962.9 1.41e-13 ***
## B1 1 1.580 1.580 143.9 2.09e-08 ***
## Residuals 13 0.143 0.011
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(aov_mod)
## hat values (leverages) are all = 0.1875
## and there are no factor predictors; no plot no. 5
The log transformation has improved the results.
Here are the coefficients for a linear model, considering only A and B factors and log of observations.
mod1 <- lm(obs_ln~A+B)
coef(mod1)
## (Intercept) A B
## 1.1854171 0.8128703 0.3142776
y <- c(8.11,5.56,5.77,5.82,9.17,7.8,3.23,5.69,8.82,14.23,9.2,8.94,8.68,11.49,6.25,9.12,7.93,5,7.47,12,9.86,3.65,6.4,11.61,12.43,17.55,8.87,25.38,13.06,18.85,11.78,26.05)
A2 <- c(-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1)
B2 <- c(rep(-1,2),rep(1,2),rep(-1,2),rep(1,2),rep(-1,2),rep(1,2),rep(-1,2),rep(1,2),rep(-1,2),rep(1,2),rep(-1,2),rep(1,2),rep(-1,2),rep(1,2),rep(-1,2),rep(1,2))
C2 <- c(rep(-1,4),rep(1,4),rep(-1,4),rep(1,4),rep(-1,4),rep(1,4),rep(-1,4),rep(1,4))
D2 <- c(rep(-1,8),rep(1,8),rep(-1,8),rep(1,8))
E2 <- c(rep(-1,16),rep(1,16))
dat2 <- data.frame(A2,B2,C2,D2,E2,y)
model3 <- aov(y~A2*B2*C2*D2*E2,data = dat2)
halfnormal(model3)
##
## Significant effects (alpha=0.05, Lenth method):
## [1] D2 E2 A2:D2 A2 D2:E2 B2:E2 A2:B2 A2:B2:E2
##
## [9] A2:E2 A2:D2:E2
This half-normal plot suggests that factor C may not be significant.
model4 <- aov(y~A2*B2*D2*E2,data = dat2)
plot(model4)
## hat values (leverages) are all = 0.5
## and there are no factor predictors; no plot no. 5
After dropping the factor C, if we analyze the above plots, they suggest that the constant variance assumption may be violated.
If we drop this factor (C) we will have a three factor design left.
summary(model4)
## Df Sum Sq Mean Sq F value Pr(>F)
## A2 1 83.56 83.56 57.233 1.14e-06 ***
## B2 1 0.06 0.06 0.041 0.841418
## D2 1 285.78 285.78 195.742 2.16e-10 ***
## E2 1 153.17 153.17 104.910 1.97e-08 ***
## A2:B2 1 48.93 48.93 33.514 2.77e-05 ***
## A2:D2 1 88.88 88.88 60.875 7.66e-07 ***
## B2:D2 1 0.01 0.01 0.004 0.950618
## A2:E2 1 33.76 33.76 23.126 0.000193 ***
## B2:E2 1 52.71 52.71 36.103 1.82e-05 ***
## D2:E2 1 61.80 61.80 42.328 7.24e-06 ***
## A2:B2:D2 1 3.82 3.82 2.613 0.125501
## A2:B2:E2 1 44.96 44.96 30.794 4.40e-05 ***
## A2:D2:E2 1 26.01 26.01 17.815 0.000650 ***
## B2:D2:E2 1 0.05 0.05 0.035 0.854935
## A2:B2:D2:E2 1 5.31 5.31 3.634 0.074735 .
## Residuals 16 23.36 1.46
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
From these results we can conclude that factors A, D, E, and interactions AB, AD, AE, BE, DE, ABE, and ADE are significant (\(\alpha=0.05\)).
lm_mod <- lm(y~A2+D2+E2+A2*B2+A2*E2+A2*D2+B2*E2+D2*E2+A2*B2*E2+A2*D2*E2,data = dat2)
coef(lm_mod)
## (Intercept) A2 D2 E2 B2 A2:B2
## 10.1803125 1.6159375 2.9884375 2.1878125 0.0434375 1.2365625
## A2:E2 A2:D2 E2:B2 D2:E2 A2:E2:B2 A2:D2:E2
## 1.0271875 1.6665625 1.2834375 1.3896875 1.1853125 0.9015625
These are the coefficients for the linear model with the significant factors in this experiment.