1 Assignment on Sample size determination for ANOVA

1.1 Question 1

Given Data

#Question1
k<-4 #no. of populations
Var<- 4.5 #Variance of data
alpha<- 0.05 #Significance level
p <- 0.8 #power

1.2 Question 1a

Assuming all three cases of variability with alpha 0.05, performed the Power analysis of ANOVA

#1a
diff1<- 1 
library(pwr)
?pwr.anova.test
#Assuming Min Variability
pwr.anova.test(k = 4, n = NULL, f = diff1*sqrt(1/(2*k)), 
                             sig.level = 0.05, power = p)
## 
##      Balanced one-way analysis of variance power calculation 
## 
##               k = 4
##               n = 22.806
##               f = 0.3535534
##       sig.level = 0.05
##           power = 0.8
## 
## NOTE: n is number in each group
#Assuming Intermediate variability
pwr.anova.test(k = 4, n = NULL, f = (diff1/2)*sqrt((k+1)/(3*(k-1))), 
               sig.level = 0.05, power = p)
## 
##      Balanced one-way analysis of variance power calculation 
## 
##               k = 4
##               n = 20.62807
##               f = 0.372678
##       sig.level = 0.05
##           power = 0.8
## 
## NOTE: n is number in each group
#Assuming Max variability and our no. of pop are 4 which is even
pwr.anova.test(k = 4, n = NULL, f = (diff1/2), 
               sig.level = 0.05, power = p)
## 
##      Balanced one-way analysis of variance power calculation 
## 
##               k = 4
##               n = 11.92611
##               f = 0.5
##       sig.level = 0.05
##           power = 0.8
## 
## NOTE: n is number in each group
  • Comment: Observing the results from the analysis,
    • No of samples required in min variability = 23
    • No of samples required in Intermediate variability = 21
    • No of samples required in max variability = 12

1.3 Question 1b

Assuming all three cases of variability with alpha 0.1

#1b
diff2<- 0.5
#Assuming Min Variability
pwr.anova.test(k = 4, n = NULL, f = diff2*sqrt(1/(2*k)), 
               sig.level = 0.05, power = p)
## 
##      Balanced one-way analysis of variance power calculation 
## 
##               k = 4
##               n = 88.20348
##               f = 0.1767767
##       sig.level = 0.05
##           power = 0.8
## 
## NOTE: n is number in each group
#Assuming Intermediate variability
pwr.anova.test(k = 4, n = NULL, f = (diff2/2)*sqrt((k+1)/(3*(k-1))), 
               sig.level = 0.05, power = p)
## 
##      Balanced one-way analysis of variance power calculation 
## 
##               k = 4
##               n = 79.4821
##               f = 0.186339
##       sig.level = 0.05
##           power = 0.8
## 
## NOTE: n is number in each group
#Assuming Max variability and our no. of pop are 4 which is even
pwr.anova.test(k = 4, n = NULL, f = (diff2/2), 
               sig.level = 0.05, power = p)
## 
##      Balanced one-way analysis of variance power calculation 
## 
##               k = 4
##               n = 44.59927
##               f = 0.25
##       sig.level = 0.05
##           power = 0.8
## 
## NOTE: n is number in each group
  • Comment: Observing the results from the analysis,
    • No of samples required in min variability = 89
    • No of samples required in Intermediate variability = 80
    • No of samples required in max variability = 45

1.4 Question 2

Given Data

#Question2
pop1<- c(17.6,18.9,16.3,17.4,20.1,21.6)
pop2<- c(16.9,15.3,18.6,17.1,19.5,20.3)
pop3<- c(21.4,23.6,19.4,18.5,20.5,22.3)
pop4<- c(19.3,21.1,16.9,17.5,18.3,19.8)
data <- data.frame(
  Fluid_Type = rep(1:4, each=6),
  Life = c(pop1, pop2, pop3,pop4))
data$Fluid_Type<- as.factor(data$Fluid_Type)

##Question 2a

#2a
pwr.anova.test(k = 4, n = 6, f = 1/sd(data$Life), sig.level = 0.1, power=NULL)
## 
##      Balanced one-way analysis of variance power calculation 
## 
##               k = 4
##               n = 6
##               f = 0.4890694
##       sig.level = 0.1
##           power = 0.5618141
## 
## NOTE: n is number in each group
  • Comment: Power is 56%

##Question 2b

#2b
?aov
anova_result <- aov(Life ~ Fluid_Type, data = data)
summary(anova_result)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## Fluid_Type   3  30.16   10.05   3.047 0.0525 .
## Residuals   20  65.99    3.30                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  • Comment: our p-value is 0.0525, we will reject the null hypothesis at a significance level of 0.10, and we would conclude that there’s a significant difference between mean lives of the tested fluids.

##Quetion 2c

#2c
plot(anova_result)

  • Comment: Observed Normality of data and Almost equal variance.
    • most value fitted in.

1.5 Question 2d

#2d
?TukeyHSD
TukeyHSD(anova_result, conf.level = 0.9)
##   Tukey multiple comparisons of means
##     90% family-wise confidence level
## 
## Fit: aov(formula = Life ~ Fluid_Type, data = data)
## 
## $Fluid_Type
##           diff        lwr       upr     p adj
## 2-1 -0.7000000 -3.2670196 1.8670196 0.9080815
## 3-1  2.3000000 -0.2670196 4.8670196 0.1593262
## 4-1  0.1666667 -2.4003529 2.7336862 0.9985213
## 3-2  3.0000000  0.4329804 5.5670196 0.0440578
## 4-2  0.8666667 -1.7003529 3.4336862 0.8413288
## 4-3 -2.1333333 -4.7003529 0.4336862 0.2090635
plot(TukeyHSD(anova_result, conf.level = 0.9))

- Comment: only 2-3 fluid type lies outside. and significantly differ.

2 Complete R Code

#Question1
k<-4 #no. of populations
Var<- 4.5 #Variance of data
alpha<- 0.05 #Significance level
p <- 0.8 #power

#1a
diff1<- 1 
library(pwr)
?pwr.anova.test
#Assuming Min Variability
pwr.anova.test(k = 4, n = NULL, f = diff1*sqrt(1/(2*k)), 
                             sig.level = 0.05, power = p)
#Assuming Intermediate variability
pwr.anova.test(k = 4, n = NULL, f = (diff1/2)*sqrt((k+1)/(3*(k-1))), 
               sig.level = 0.05, power = p)
#Assuming Max variability and our no. of pop are 4 which is even
pwr.anova.test(k = 4, n = NULL, f = (diff1/2), 
               sig.level = 0.05, power = p)

#1b
diff2<- 0.5
#Assuming Min Variability
pwr.anova.test(k = 4, n = NULL, f = diff2*sqrt(1/(2*k)), 
               sig.level = 0.05, power = p)
#Assuming Intermediate variability
pwr.anova.test(k = 4, n = NULL, f = (diff2/2)*sqrt((k+1)/(3*(k-1))), 
               sig.level = 0.05, power = p)
#Assuming Max variability and our no. of pop are 4 which is even
pwr.anova.test(k = 4, n = NULL, f = (diff2/2), 
               sig.level = 0.05, power = p)

#Question2
pop1<- c(17.6,18.9,16.3,17.4,20.1,21.6)
pop2<- c(16.9,15.3,18.6,17.1,19.5,20.3)
pop3<- c(21.4,23.6,19.4,18.5,20.5,22.3)
pop4<- c(19.3,21.1,16.9,17.5,18.3,19.8)
data <- data.frame(
  Fluid_Type = rep(1:4, each=6),
  Life = c(pop1, pop2, pop3,pop4))
data$Fluid_Type<- as.factor(data$Fluid_Type)

#2a
pwr.anova.test(k = 4, n = 6, f = 1/sd(data$Life), sig.level = 0.1, power=NULL)

#2b
?aov
anova_result <- aov(Life ~ Fluid_Type, data = data)
summary(anova_result)

#2c
plot(anova_result)

#2d
?TukeyHSD
TukeyHSD(anova_result, conf.level = 0.9)
plot(TukeyHSD(anova_result, conf.level = 0.9))