data <- PlantGrowth
set.seed(123)
dplyr::sample_n(data, 10)
## weight group
## 1 5.87 trt1
## 2 4.32 trt1
## 3 3.59 trt1
## 4 5.18 ctrl
## 5 5.14 ctrl
## 6 4.89 trt1
## 7 5.12 trt2
## 8 4.81 trt1
## 9 4.50 ctrl
## 10 4.69 trt1
levels(data$group)
## [1] "ctrl" "trt1" "trt2"
data$group <- ordered(data$group,
levels = c("ctrl", "trt1", "trt2"))
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
group_by(data, group) %>%
summarise(
count = n(),
mean = mean(weight, na.rm = TRUE),
sd = sd(weight, na.rm = TRUE)
)
## # A tibble: 3 × 4
## group count mean sd
## <ord> <int> <dbl> <dbl>
## 1 ctrl 10 5.03 0.583
## 2 trt1 10 4.66 0.794
## 3 trt2 10 5.53 0.443
library("ggpubr")
## Loading required package: ggplot2
ggboxplot(data, x = "group", y = "weight",
color = "group", palette = c("#00AFBB", "#E7B800", "#FC4E07"),
order = c("ctrl", "trt1", "trt2"),
ylab = "Weight", xlab = "Treatment")
library("ggpubr")
ggline(data, x = "group", y = "weight",
add = c("mean_se", "jitter"),
order = c("ctrl", "trt1", "trt2"),
ylab = "Weight", xlab = "Treatment")
boxplot(weight ~ group, data = data,
xlab = "Treatment", ylab = "Weight",
frame = FALSE, col = c("#00AFBB", "#E7B800", "#FC4E07"))
library("gplots")
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
plotmeans(weight ~ group, data = data, frame = FALSE,
xlab = "Treatment", ylab = "Weight",
main="Mean Plot with 95% CI")
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is not
## a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
# Calculate the one-way ANOVA test
res.aov <- aov(weight ~ group, data = data)
summary(res.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## group 2 3.766 1.8832 4.846 0.0159 *
## Residuals 27 10.492 0.3886
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
We can conclude that there are significant differences between the groups highlighted with “*” in the model summary because the p-value is less than the significance level of 0.05.
A significant p-value implies that some of the group means are different in a one-way ANOVA test, but we don’t know which pairs of groups are different.
Multiple pairwise comparisons can be performed to see if the mean differences between certain pairs of groups are statistically significant.
TukeyHSD(res.aov)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = weight ~ group, data = data)
##
## $group
## diff lwr upr p adj
## trt1-ctrl -0.371 -1.0622161 0.3202161 0.3908711
## trt2-ctrl 0.494 -0.1972161 1.1852161 0.1979960
## trt2-trt1 0.865 0.1737839 1.5562161 0.0120064
It can be observe that only the difference between trt2 and trt1 is significant, as shown by the output, with an adjusted p-value of 0.012.
library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
##
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
##
## geyser
summary(glht(res.aov, linfct = mcp(group = "Tukey")))
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: aov(formula = weight ~ group, data = data)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## trt1 - ctrl == 0 -0.3710 0.2788 -1.331 0.3908
## trt2 - ctrl == 0 0.4940 0.2788 1.772 0.1979
## trt2 - trt1 == 0 0.8650 0.2788 3.103 0.0121 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
pairwise.t.test(data$weight, data$group)
##
## Pairwise comparisons using t tests with pooled SD
##
## data: data$weight and data$group
##
## ctrl trt1
## trt1 0.194 -
## trt2 0.175 0.013
##
## P value adjustment method: holm
plot(res.aov, 1)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
leveneTest(weight ~ group, data = data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 1.1192 0.3412
## 27
The p-value is not less than the significance level of 0.05, as seen in the output above. This indicates that there is no indication that the variance across groups is statistically significant.
As a result, we can infer that the variations in the different treatment groups are homogeneous.
ANOVA test with no equal variance assumption
oneway.test(weight ~ group, data = data)
##
## One-way analysis of means (not assuming equal variances)
##
## data: weight and group
## F = 5.181, num df = 2.000, denom df = 17.128, p-value = 0.01739
pairwise.t.test(data$weight, data$group,
p.adjust.method = "BH", pool.sd = FALSE)
##
## Pairwise comparisons using t tests with non-pooled SD
##
## data: data$weight and data$group
##
## ctrl trt1
## trt1 0.250 -
## trt2 0.072 0.028
##
## P value adjustment method: BH
plot(res.aov, 2)
We can infer normality because all of the points lie roughly along this reference line.
aov_residuals <- residuals(object = res.aov )
Run Shapiro-Wilk test
shapiro.test(x = aov_residuals )
##
## Shapiro-Wilk normality test
##
## data: aov_residuals
## W = 0.96607, p-value = 0.4379
The Shapiro-Wilk test on the ANOVA residuals (W = 0.96, p = 0.43), which finds no evidence of normality violation, supports the previous conclusion.
ANOVA test with a non-parametric alternative
The Kruskal-Wallis rank-sum test is a non-parametric alternative to one-way ANOVA that can be employed when the ANOVA assumptions are not met.
kruskal.test(weight ~ group, data = data)
##
## Kruskal-Wallis rank sum test
##
## data: weight by group
## Kruskal-Wallis chi-squared = 7.9882, df = 2, p-value = 0.01842