This experiment investigates whether different foodstuffs (A, B, C, and D) produce different average weights.
The experiment follows a Completely Randomized Design (CRD) where foodstuff type is the treatment factor and weight is the response variable.
Weight <- c(
55,49,42,21,52,
61,112,30,89,63,
42,97,81,95,92,
169,137,169,85,154
)
Foodstuff <- factor(c(
rep("A",5),
rep("B",5),
rep("C",5),
rep("D",5)
))
food <- data.frame(Foodstuff, Weight)
food
## Foodstuff Weight
## 1 A 55
## 2 A 49
## 3 A 42
## 4 A 21
## 5 A 52
## 6 B 61
## 7 B 112
## 8 B 30
## 9 B 89
## 10 B 63
## 11 C 42
## 12 C 97
## 13 C 81
## 14 C 95
## 15 C 92
## 16 D 169
## 17 D 137
## 18 D 169
## 19 D 85
## 20 D 154
str(food)
## 'data.frame': 20 obs. of 2 variables:
## $ Foodstuff: Factor w/ 4 levels "A","B","C","D": 1 1 1 1 1 2 2 2 2 2 ...
## $ Weight : num 55 49 42 21 52 61 112 30 89 63 ...
table(food$Foodstuff)
##
## A B C D
## 5 5 5 5
aggregate(Weight ~ Foodstuff,
data = food,
mean)
## Foodstuff Weight
## 1 A 43.8
## 2 B 71.0
## 3 C 81.4
## 4 D 142.8
aggregate(Weight ~ Foodstuff,
data = food,
sd)
## Foodstuff Weight
## 1 A 13.62718
## 2 B 31.02418
## 3 C 22.87575
## 4 D 34.90272
aggregate(Weight ~ Foodstuff,
data = food,
summary)
## Foodstuff Weight.Min. Weight.1st Qu. Weight.Median Weight.Mean Weight.3rd Qu.
## 1 A 21.0 42.0 49.0 43.8 52.0
## 2 B 30.0 61.0 63.0 71.0 89.0
## 3 C 42.0 81.0 92.0 81.4 95.0
## 4 D 85.0 137.0 154.0 142.8 169.0
## Weight.Max.
## 1 55.0
## 2 112.0
## 3 97.0
## 4 169.0
boxplot(Weight ~ Foodstuff,
data = food,
col = c("lightblue","lightgreen",
"lightyellow","pink"),
main = "Weight by Foodstuff",
xlab = "Foodstuff",
ylab = "Weight")
stripchart(Weight ~ Foodstuff,
data = food,
vertical = TRUE,
method = "jitter",
pch = 19,
col = "blue")
means <- aggregate(Weight ~ Foodstuff,
data = food,
mean)
barplot(means$Weight,
names.arg = means$Foodstuff,
ylab = "Mean Weight",
main = "Mean Weight by Foodstuff")
The CRD model is
\[ Y_{ij} = \mu + \tau_i + \epsilon_{ij} \]
where
\[ H_0:\mu_A=\mu_B=\mu_C=\mu_D \]
\[ H_a:\text{At least one mean differs} \]
model <- aov(Weight ~ Foodstuff,
data = food)
summary(model)
## Df Sum Sq Mean Sq F value Pr(>F)
## Foodstuff 3 26235 8745 12.11 0.000218 ***
## Residuals 16 11559 722
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(model)
## Analysis of Variance Table
##
## Response: Weight
## Df Sum Sq Mean Sq F value Pr(>F)
## Foodstuff 3 26235 8745.0 12.105 0.000218 ***
## Residuals 16 11559 722.4
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model.tables(model,
type = "means")
## Tables of means
## Grand mean
##
## 84.75
##
## Foodstuff
## Foodstuff
## A B C D
## 43.8 71.0 81.4 142.8
TukeyHSD(model)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Weight ~ Foodstuff, data = food)
##
## $Foodstuff
## diff lwr upr p adj
## B-A 27.2 -21.43481 75.83481 0.4061728
## C-A 37.6 -11.03481 86.23481 0.1621713
## D-A 99.0 50.36519 147.63481 0.0001380
## C-B 10.4 -38.23481 59.03481 0.9268359
## D-B 71.8 23.16519 120.43481 0.0032490
## D-C 61.4 12.76519 110.03481 0.0112771
tapply(food$Weight,
food$Foodstuff,
mean)
## A B C D
## 43.8 71.0 81.4 142.8
par(mfrow=c(2,2))
plot(model)
hist(residuals(model),
main="Histogram of Residuals",
xlab="Residuals")
qqnorm(residuals(model))
qqline(residuals(model))
shapiro.test(residuals(model))
##
## Shapiro-Wilk normality test
##
## data: residuals(model)
## W = 0.93614, p-value = 0.2025
Interpretation:
bartlett.test(Weight ~ Foodstuff,
data = food)
##
## Bartlett test of homogeneity of variances
##
## data: Weight by Foodstuff
## Bartlett's K-squared = 3.1571, df = 3, p-value = 0.368
Interpretation:
grand.mean <- mean(food$Weight)
grand.mean
## [1] 84.75
SST <- sum((food$Weight - grand.mean)^2)
SST
## [1] 37793.75
means <- tapply(food$Weight,
food$Foodstuff,
mean)
r <- 5
SSTR <- sum(r*(means-grand.mean)^2)
SSTR
## [1] 26234.95
SSE <- SST - SSTR
SSE
## [1] 11558.8
df.treatment <- 4 - 1
df.error <- 20 - 4
c(df.treatment, df.error)
## [1] 3 16
MST <- SSTR/df.treatment
MSE <- SSE/df.error
c(MST,MSE)
## [1] 8744.983 722.425
F.value <- MST/MSE
F.value
## [1] 12.10504
pf(F.value,
df.treatment,
df.error,
lower.tail = FALSE)
## [1] 0.0002180248
aggregate(Weight ~ Foodstuff,
data=food,
function(x)
c(
Mean=mean(x),
SD=sd(x),
Min=min(x),
Max=max(x)
))
## Foodstuff Weight.Mean Weight.SD Weight.Min Weight.Max
## 1 A 43.80000 13.62718 21.00000 55.00000
## 2 B 71.00000 31.02418 30.00000 112.00000
## 3 C 81.40000 22.87575 42.00000 97.00000
## 4 D 142.80000 34.90272 85.00000 169.00000
Interpret the ANOVA results:
Interpret Tukey’s test to identify which foodstuffs differ significantly.
Based on the analyses:
sessionInfo()
## R version 4.3.3 (2024-02-29 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## time zone: Africa/Nairobi
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.37 R6_2.6.1 fastmap_1.2.0 xfun_0.52
## [5] cachem_1.1.0 knitr_1.51 htmltools_0.5.8.1 rmarkdown_2.31
## [9] lifecycle_1.0.4 cli_3.6.2 sass_0.4.9 jquerylib_0.1.4
## [13] compiler_4.3.3 rstudioapi_0.17.1 tools_4.3.3 evaluate_1.0.4
## [17] bslib_0.9.0 yaml_2.3.10 rlang_1.1.5 jsonlite_2.0.0