Problem 6.8

#Homework 11
#6.8
Time<-c(rep(12,6),rep(18,6),rep(12,6),rep(18,6))
Cm<-c(rep(1,6),rep(1,6),rep(2,6),rep(2,6))
obs_1<-c(21,22,
       23,28,
       20,26,
       37,39,
       38,38,
       35,36,
       25,26,
       24,25,
       29,27,
       31,34,
       29,33,
       30,35)
Time<-as.factor(Time)
Cm<-as.factor(Cm)
dat_1<-data.frame(Time,Cm,obs_1)
dat_1
##    Time Cm obs_1
## 1    12  1    21
## 2    12  1    22
## 3    12  1    23
## 4    12  1    28
## 5    12  1    20
## 6    12  1    26
## 7    18  1    37
## 8    18  1    39
## 9    18  1    38
## 10   18  1    38
## 11   18  1    35
## 12   18  1    36
## 13   12  2    25
## 14   12  2    26
## 15   12  2    24
## 16   12  2    25
## 17   12  2    29
## 18   12  2    27
## 19   18  2    31
## 20   18  2    34
## 21   18  2    29
## 22   18  2    33
## 23   18  2    30
## 24   18  2    35
model_1 <- aov(obs_1~Time+Cm+Time*Cm, data = dat_1)
summary(model_1)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## Time         1  590.0   590.0 115.506 9.29e-10 ***
## Cm           1    9.4     9.4   1.835 0.190617    
## Time:Cm      1   92.0    92.0  18.018 0.000397 ***
## Residuals   20  102.2     5.1                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(model_1,1)

plot(model_1,2)

The factor Time and the interaction between Time and Culture Medium are significant model terms.

The residual plots shows a fairly normal distribution and the residuals looks to be constant.

Problem 6.12

#6.12
A<-c(-1,-1,-1,-1,1,1,1,1,-1,-1,-1,-1,1,1,1,1)
B<-c(-1,-1,-1,-1,-1,-1,-1,-1,1,1,1,1,1,1,1,1)

obs<-c(14.037,16.165,13.972,13.907,
       13.880,13.860,14.032,13.914,
       14.821,14.757,14.843,14.878,
       14.888,14.921,14.415,14.932)
A<-as.factor(A)
B<-as.factor(B)
dat<-data.frame(A,B,obs)
dat
##     A  B    obs
## 1  -1 -1 14.037
## 2  -1 -1 16.165
## 3  -1 -1 13.972
## 4  -1 -1 13.907
## 5   1 -1 13.880
## 6   1 -1 13.860
## 7   1 -1 14.032
## 8   1 -1 13.914
## 9  -1  1 14.821
## 10 -1  1 14.757
## 11 -1  1 14.843
## 12 -1  1 14.878
## 13  1  1 14.888
## 14  1  1 14.921
## 15  1  1 14.415
## 16  1  1 14.932
mod<-lm(obs~A*B,data=dat) #It already gives A, B and C separately
coef(mod)
## (Intercept)          A1          B1       A1:B1 
##    14.52025    -0.59875     0.30450     0.56300
model<-aov(obs~A+B+A*B,data = dat)
summary(model)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## A            1  0.403  0.4026   1.262 0.2833  
## B            1  1.374  1.3736   4.305 0.0602 .
## A:B          1  0.317  0.3170   0.994 0.3386  
## Residuals   12  3.828  0.3190                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(model,1)

plot(model,2)

Observation #2 falls outside the groupings in the normal probability plot and the plot of residual versus predicted.

(d)

One approach to deal with the outlier is to replace it with the average of the observations from that group.

Q 6.21

a)

obs <- c(10,0,4,0,0,5,6.5,16.5,4.5,19.5,15,41.5,8,21.5,0,18,18,16.5,6,10,0,20.5,18.5,4.5,18,18,16,39,4.5,10.5,0,5,14,4.5,1,34,18.5,18,7.5,0,14.5,16,8.5,6.5,6.5,6.5,0,7,12.5,17.5,14.5,11,19.5,20,6,23.5,10,5.5,0,3.5,10,0,4.5,10,19,20.5,12,25.5,16,29.5,0,8,0,10,0.5,7,13,15.5,1,32.5,16,17.5,14,21.5,15,19,10,8,17.5,7,9,8.5,41,24,4,18.5,18.5,33,5,0,11,10,0,8,6,36,3,36,14,16,6.5,8)
A <- c(-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1)
B <- c(rep(1,2),rep(-1,2),rep(1,2),rep(-1,2),rep(1,2),rep(-1,2),rep(1,2),rep(-1,2))
c <- c(rep(1,4),rep(-1,4),rep(1,4),rep(-1,4))
d <- c(rep(1,8),rep(-1,8))
dat <- data.frame(A,B,c,d,obs)
model <- aov(obs~A*B*c*d,data = dat)
summary(model)
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## A            1    917   917.1  10.588 0.00157 **
## B            1    388   388.1   4.481 0.03686 * 
## c            1    145   145.1   1.676 0.19862   
## d            1      1     1.4   0.016 0.89928   
## A:B          1    219   218.7   2.525 0.11538   
## A:c          1     12    11.9   0.137 0.71178   
## B:c          1    115   115.0   1.328 0.25205   
## A:d          1     94    93.8   1.083 0.30066   
## B:d          1     56    56.4   0.651 0.42159   
## c:d          1      2     1.6   0.019 0.89127   
## A:B:c        1      7     7.3   0.084 0.77294   
## A:B:d        1    113   113.0   1.305 0.25623   
## A:c:d        1     39    39.5   0.456 0.50121   
## B:c:d        1     34    33.8   0.390 0.53386   
## A:B:c:d      1     96    95.6   1.104 0.29599   
## Residuals   96   8316    86.6                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

As evident in the ANOVA results, only length of the put and the type of putter have a significant effect (\(\alpha=0.05\)).

b)

plot(model,1)

From this plot we can see that the assumption of constant variance may not be satisfied here.

plot(model,2)

From this plot it seems that the assumption of normality may be violated too.

6.36

obs1 <- c(1.92,11.28,1.09,5.75,2.13,9.53,1.03,5.35,1.60,11.73,1.16,4.68,2.16,9.11,1.07,5.30)
A1 <- c(-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1)
B1 <- c(rep(-1,2),rep(1,2),rep(-1,2),rep(1,2),rep(-1,2),rep(1,2),rep(-1,2),rep(1,2))
C1 <- c(rep(-1,4),rep(1,4),rep(-1,4),rep(1,4))
D1 <- c(rep(-1,8),rep(1,8))
dat1 <- data.frame(A1,B1,C1,D1,obs1)

a)

library(DoE.base)
## Warning: package 'DoE.base' was built under R version 4.2.2
## Loading required package: grid
## Loading required package: conf.design
## Registered S3 method overwritten by 'DoE.base':
##   method           from       
##   factorize.factor conf.design
## 
## Attaching package: 'DoE.base'
## The following objects are masked from 'package:stats':
## 
##     aov, lm
## The following object is masked from 'package:graphics':
## 
##     plot.design
## The following object is masked from 'package:base':
## 
##     lengths
model1 <- lm(obs1~A1*B1*C1*D1,data = dat1)
coef(model1)
## (Intercept)          A1          B1          C1          D1       A1:B1 
##    4.680625    3.160625   -1.501875   -0.220625   -0.079375   -1.069375 
##       A1:C1       B1:C1       A1:D1       B1:D1       C1:D1    A1:B1:C1 
##   -0.298125    0.229375   -0.056875   -0.046875    0.029375    0.344375 
##    A1:B1:D1    A1:C1:D1    B1:C1:D1 A1:B1:C1:D1 
##   -0.096875   -0.010625    0.094375    0.141875
halfnormal(model1)
## 
## Significant effects (alpha=0.05, Lenth method):
## [1] A1       B1       A1:B1    A1:B1:C1

qqnorm(coef(model1))
qqline(coef(model1))

The effects of each factor has been shown in the table. Furthermore, from the half-normal and normality plot we can guess that factors A and B may be significant.

b)

aov_mod <- aov(obs1~A1*B1,data = dat1)
plot(aov_mod)

## hat values (leverages) are all = 0.25
##  and there are no factor predictors; no plot no. 5

After dropping the factors D and C, these plots suggest that neither the assumption of normality nor assumption of constant variance may be satisfied here.

c)

obs_ln <- log(obs1)
mod <- lm(obs_ln~A1*B1*C1)
coef(mod)
##  (Intercept)           A1           B1           C1        A1:B1        A1:C1 
##  1.185417116  0.812870345 -0.314277554 -0.006408558 -0.024684570 -0.039723700 
##        B1:C1     A1:B1:C1 
## -0.004225796  0.063434408
halfnormal(mod)
## 
## Significant effects (alpha=0.05, Lenth method):
## [1] A1       B1       A1:B1:C1 A1:C1    e1       e3       A1:B1

qqnorm(coef(mod))
qqline(coef(mod))

Since these results suggest that the A and B interaction is no longer significant, we can make a simpler model.

aov_mod <- aov(obs_ln~A1+B1)
summary(aov_mod)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## A1           1 10.572  10.572   962.9 1.41e-13 ***
## B1           1  1.580   1.580   143.9 2.09e-08 ***
## Residuals   13  0.143   0.011                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(aov_mod)

## hat values (leverages) are all = 0.1875
##  and there are no factor predictors; no plot no. 5

The log transformation has improved the results.

d)

Here are the coefficients for a linear model, considering only A and B factors and log of observations.

mod1 <- lm(obs_ln~A+B)
coef(mod1)
## (Intercept)           A           B 
##   1.1854171   0.8128703   0.3142776

6.39

y <- c(8.11,5.56,5.77,5.82,9.17,7.8,3.23,5.69,8.82,14.23,9.2,8.94,8.68,11.49,6.25,9.12,7.93,5,7.47,12,9.86,3.65,6.4,11.61,12.43,17.55,8.87,25.38,13.06,18.85,11.78,26.05)
A2 <- c(-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1,-1,1)
B2 <- c(rep(-1,2),rep(1,2),rep(-1,2),rep(1,2),rep(-1,2),rep(1,2),rep(-1,2),rep(1,2),rep(-1,2),rep(1,2),rep(-1,2),rep(1,2),rep(-1,2),rep(1,2),rep(-1,2),rep(1,2))
C2 <- c(rep(-1,4),rep(1,4),rep(-1,4),rep(1,4),rep(-1,4),rep(1,4),rep(-1,4),rep(1,4))
D2 <- c(rep(-1,8),rep(1,8),rep(-1,8),rep(1,8))
E2 <- c(rep(-1,16),rep(1,16))
dat2 <- data.frame(A2,B2,C2,D2,E2,y)

a)

model3 <- aov(y~A2*B2*C2*D2*E2,data = dat2)
halfnormal(model3)
## 
## Significant effects (alpha=0.05, Lenth method):
##  [1] D2       E2       A2:D2    A2       D2:E2    B2:E2    A2:B2    A2:B2:E2 
## 
##  [9] A2:E2    A2:D2:E2

This half-normal plot suggests that factor C may not be significant.

b)

model4 <- aov(y~A2*B2*D2*E2,data = dat2)
plot(model4)

## hat values (leverages) are all = 0.5
##  and there are no factor predictors; no plot no. 5

After dropping the factor C, if we analyze the above plots, they suggest that the constant variance assumption may be violated.

c)

If we drop this factor (C) we will have a three factor design left.

summary(model4)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## A2           1  83.56   83.56  57.233 1.14e-06 ***
## B2           1   0.06    0.06   0.041 0.841418    
## D2           1 285.78  285.78 195.742 2.16e-10 ***
## E2           1 153.17  153.17 104.910 1.97e-08 ***
## A2:B2        1  48.93   48.93  33.514 2.77e-05 ***
## A2:D2        1  88.88   88.88  60.875 7.66e-07 ***
## B2:D2        1   0.01    0.01   0.004 0.950618    
## A2:E2        1  33.76   33.76  23.126 0.000193 ***
## B2:E2        1  52.71   52.71  36.103 1.82e-05 ***
## D2:E2        1  61.80   61.80  42.328 7.24e-06 ***
## A2:B2:D2     1   3.82    3.82   2.613 0.125501    
## A2:B2:E2     1  44.96   44.96  30.794 4.40e-05 ***
## A2:D2:E2     1  26.01   26.01  17.815 0.000650 ***
## B2:D2:E2     1   0.05    0.05   0.035 0.854935    
## A2:B2:D2:E2  1   5.31    5.31   3.634 0.074735 .  
## Residuals   16  23.36    1.46                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

From these results we can conclude that factors A, D, E, and interactions AB, AD, AE, BE, DE, ABE, and ADE are significant (\(\alpha=0.05\)).

d)

lm_mod <- lm(y~A2+D2+E2+A2*B2+A2*E2+A2*D2+B2*E2+D2*E2+A2*B2*E2+A2*D2*E2,data = dat2)
coef(lm_mod)
## (Intercept)          A2          D2          E2          B2       A2:B2 
##  10.1803125   1.6159375   2.9884375   2.1878125   0.0434375   1.2365625 
##       A2:E2       A2:D2       E2:B2       D2:E2    A2:E2:B2    A2:D2:E2 
##   1.0271875   1.6665625   1.2834375   1.3896875   1.1853125   0.9015625

These are the coefficients for the linear model with the significant factors in this experiment.