data <- PlantGrowth
set.seed(123)
dplyr::sample_n(data, 10)
##    weight group
## 1    5.87  trt1
## 2    4.32  trt1
## 3    3.59  trt1
## 4    5.18  ctrl
## 5    5.14  ctrl
## 6    4.89  trt1
## 7    5.12  trt2
## 8    4.81  trt1
## 9    4.50  ctrl
## 10   4.69  trt1
levels(data$group)
## [1] "ctrl" "trt1" "trt2"
data$group <- ordered(data$group,
                         levels = c("ctrl", "trt1", "trt2"))
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
group_by(data, group) %>%
  summarise(
    count = n(),
    mean = mean(weight, na.rm = TRUE),
    sd = sd(weight, na.rm = TRUE)
  )
## # A tibble: 3 × 4
##   group count  mean    sd
##   <ord> <int> <dbl> <dbl>
## 1 ctrl     10  5.03 0.583
## 2 trt1     10  4.66 0.794
## 3 trt2     10  5.53 0.443

Visualize your information

library("ggpubr")
## Loading required package: ggplot2
ggboxplot(data, x = "group", y = "weight",
          color = "group", palette = c("#00AFBB", "#E7B800", "#FC4E07"),
          order = c("ctrl", "trt1", "trt2"),
          ylab = "Weight", xlab = "Treatment")

One-way ANOVA Test in R

library("ggpubr")
ggline(data, x = "group", y = "weight",
       add = c("mean_se", "jitter"),
       order = c("ctrl", "trt1", "trt2"),
       ylab = "Weight", xlab = "Treatment")

R’s one-way ANOVA test

boxplot(weight ~ group, data = data,
        xlab = "Treatment", ylab = "Weight",
        frame = FALSE, col = c("#00AFBB", "#E7B800", "#FC4E07"))

For plot means

library("gplots")
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
plotmeans(weight ~ group, data = data, frame = FALSE,
          xlab = "Treatment", ylab = "Weight",
          main="Mean Plot with 95% CI")
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is not
## a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter

# Calculate the one-way ANOVA test

res.aov <- aov(weight ~ group, data = data)
summary(res.aov)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## group        2  3.766  1.8832   4.846 0.0159 *
## Residuals   27 10.492  0.3886                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Conclusion

We can conclude that there are significant differences between the groups highlighted with “*” in the model summary because the p-value is less than the significance level of 0.05.

Multiple pairwise comparisons between group means

A significant p-value implies that some of the group means are different in a one-way ANOVA test, but we don’t know which pairs of groups are different.

Multiple pairwise comparisons can be performed to see if the mean differences between certain pairs of groups are statistically significant.

Multiple Tukey pairwise comparisons

TukeyHSD(res.aov)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = weight ~ group, data = data)
## 
## $group
##             diff        lwr       upr     p adj
## trt1-ctrl -0.371 -1.0622161 0.3202161 0.3908711
## trt2-ctrl  0.494 -0.1972161 1.1852161 0.1979960
## trt2-trt1  0.865  0.1737839 1.5562161 0.0120064

It can be observe that only the difference between trt2 and trt1 is significant, as shown by the output, with an adjusted p-value of 0.012.

library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
## 
##     geyser
summary(glht(res.aov, linfct = mcp(group = "Tukey")))
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: aov(formula = weight ~ group, data = data)
## 
## Linear Hypotheses:
##                  Estimate Std. Error t value Pr(>|t|)  
## trt1 - ctrl == 0  -0.3710     0.2788  -1.331   0.3908  
## trt2 - ctrl == 0   0.4940     0.2788   1.772   0.1979  
## trt2 - trt1 == 0   0.8650     0.2788   3.103   0.0121 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

T-test with pairs

pairwise.t.test(data$weight, data$group)
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  data$weight and data$group 
## 
##      ctrl  trt1 
## trt1 0.194 -    
## trt2 0.175 0.013
## 
## P value adjustment method: holm

Examine the assumption of homogeneity of variance.

1: Variance homogeneity

plot(res.aov, 1)

library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
leveneTest(weight ~ group, data = data)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2  1.1192 0.3412
##       27

The p-value is not less than the significance level of 0.05, as seen in the output above. This indicates that there is no indication that the variance across groups is statistically significant.

As a result, we can infer that the variations in the different treatment groups are homogeneous.

Relaxing the premise of homogeneity of variance

ANOVA test with no equal variance assumption

oneway.test(weight ~ group, data = data)
## 
##  One-way analysis of means (not assuming equal variances)
## 
## data:  weight and group
## F = 5.181, num df = 2.000, denom df = 17.128, p-value = 0.01739
pairwise.t.test(data$weight, data$group,
                 p.adjust.method = "BH", pool.sd = FALSE)
## 
##  Pairwise comparisons using t tests with non-pooled SD 
## 
## data:  data$weight and data$group 
## 
##      ctrl  trt1 
## trt1 0.250 -    
## trt2 0.072 0.028
## 
## P value adjustment method: BH

Examine the presumption of normality.

2: Normality

plot(res.aov, 2)

We can infer normality because all of the points lie roughly along this reference line.

Extract the residuals

aov_residuals <- residuals(object = res.aov )

Run Shapiro-Wilk test

shapiro.test(x = aov_residuals )
## 
##  Shapiro-Wilk normality test
## 
## data:  aov_residuals
## W = 0.96607, p-value = 0.4379

The Shapiro-Wilk test on the ANOVA residuals (W = 0.96, p = 0.43), which finds no evidence of normality violation, supports the previous conclusion.

ANOVA test with a non-parametric alternative

The Kruskal-Wallis rank-sum test is a non-parametric alternative to one-way ANOVA that can be employed when the ANOVA assumptions are not met.

kruskal.test(weight ~ group, data = data)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  weight by group
## Kruskal-Wallis chi-squared = 7.9882, df = 2, p-value = 0.01842