Introduction


ANOVA for repeated measurements


ANOVA for repeated measurements: A demo


Loading required packages

library(readxl)
library(ExpDes)
library(tidyverse)
library(ggstatsplot)
library(patchwork)
library(emmeans)
library(multcomp)
library(rstatix)

Loading and preparing the data for analysis

agrondata <- read_excel("agrondata.xlsx")

#Subset the agrondata to include only height measurements
HT <- agrondata |> 
  select(treatment, block, HT15, HT30, HT45, HT60)
HT
## # A tibble: 15 × 6
##    treatment block  HT15  HT30  HT45  HT60
##        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1         1     1  56.2 102.  152.  210. 
##  2         1     2  53.9  92.9 140   194. 
##  3         1     3  53.4  90.4 134.  189. 
##  4         1     4  52.2  89   129.  183. 
##  5         1     5  54.6 102.  150.  207. 
##  6         2     1  47.3  73.5 131.  173. 
##  7         2     2  44.4  72.4 119.  159. 
##  8         2     3  36.6  70.1 118.  158. 
##  9         2     4  38.2  68.8 119.  148. 
## 10         2     5  46.5  74.8 133.  174. 
## 11         3     1  24.4  38.7  50.4  74.2
## 12         3     2  21.9  36.6  45.9  69.5
## 13         3     3  22.2  37.3  46.7  70  
## 14         3     4  22.3  36    45.6  71.8
## 15         3     5  25.7  39.1  51.4  77.4
#Transforming data from wide format to long format
HT_Long <- HT  |> 
  gather(key = "Time", value = "HT", HT15, HT30, HT45, HT60) |> 
  convert_as_factor(treatment, block, Time) |> 
  dplyr::rename(Treatment = treatment) |> 
  dplyr::mutate(Days = recode(Time,
                       "HT15" = "15",
                       "HT30" = "30",
                       "HT45" = "45",
                       "HT60" = "60"))

HT_Long
## # A tibble: 60 × 5
##    Treatment block Time     HT Days 
##    <fct>     <fct> <fct> <dbl> <fct>
##  1 1         1     HT15   56.2 15   
##  2 1         2     HT15   53.9 15   
##  3 1         3     HT15   53.4 15   
##  4 1         4     HT15   52.2 15   
##  5 1         5     HT15   54.6 15   
##  6 2         1     HT15   47.3 15   
##  7 2         2     HT15   44.4 15   
##  8 2         3     HT15   36.6 15   
##  9 2         4     HT15   38.2 15   
## 10 2         5     HT15   46.5 15   
## # ℹ 50 more rows
#Testing for presence of outliers
HT_Long |> 
  group_by(Treatment, Days) |> 
  identify_outliers(HT)
## [1] Treatment  Days       block      Time       HT         is.outlier is.extreme
## <0 rows> (or 0-length row.names)
#Testing for normality of distribution
HT_Long |> 
  group_by(Treatment, Days) |> 
  shapiro_test(HT)
## # A tibble: 12 × 5
##    Treatment Days  variable statistic      p
##    <fct>     <fct> <chr>        <dbl>  <dbl>
##  1 1         15    HT           0.989 0.975 
##  2 1         30    HT           0.837 0.157 
##  3 1         45    HT           0.930 0.595 
##  4 1         60    HT           0.911 0.476 
##  5 2         15    HT           0.866 0.252 
##  6 2         30    HT           0.961 0.813 
##  7 2         45    HT           0.762 0.0384
##  8 2         60    HT           0.912 0.478 
##  9 3         15    HT           0.839 0.161 
## 10 3         30    HT           0.929 0.592 
## 11 3         45    HT           0.837 0.156 
## 12 3         60    HT           0.921 0.536

Generating the ANOVA: Method 1

rep.aov <- anova_test(
  data = HT_Long, 
  dv = HT,
  wid = block,
  within = c(Treatment, Days))

#To display the ANOVA table
get_anova_table(rep.aov)
## ANOVA Table (type III tests)
## 
##           Effect  DFn   DFd        F        p p<.05   ges
## 1      Treatment 1.04  4.15  915.983 4.91e-06     * 0.969
## 2           Days 1.02  4.09 1289.421 2.80e-06     * 0.979
## 3 Treatment:Days 6.00 24.00  297.420 2.65e-21     * 0.877
#To display the results of sphericity test
rep.aov$`Mauchly's Test for Sphericity` 
##      Effect        W     p p<.05
## 1 Treatment 0.071000 0.019     *
## 2      Days 0.000758 0.003     *

Generating the ANOVA: Method 2

aov.rep1 <- aov(HT ~ Treatment + Treatment:block + Days + Days:Treatment,
               data = HT_Long)

anova(aov.rep1)
## Analysis of Variance Table
## 
## Response: HT
##                 Df Sum Sq Mean Sq   F value    Pr(>F)    
## Treatment        2  61993 30996.3 2347.9997 < 2.2e-16 ***
## Days             3  91141 30380.3 2301.3391 < 2.2e-16 ***
## Treatment:block 12   1524   127.0    9.6226 5.516e-08 ***
## Treatment:Days   6  14314  2385.6  180.7149 < 2.2e-16 ***
## Residuals       36    475    13.2                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Extract the Mean Square for Blk:Var and use this as MSE(a)
MSE_a <- anova(aov.rep1)["Mean Sq"][3,1]
MS_Trt <- anova(aov.rep1)["Mean Sq"][1,1]
print(c(MSE_a, MS_Trt))
## [1]   127.0289 30996.2702

\[ \begin{aligned} F_{Treatment} &= \frac{MS_{Treatment}}{MSE(a)} \notag \\ &= 244.01\notag \\ p-value &= 2\times 10^{-8} \end{aligned} \]


Data Visualization

#Generate summary statistics for all treatment combinations
DT.means <- emmeans(aov.rep1, specs=c("Days", "Treatment"))
DT.means#Print the table of means
##  Days Treatment emmean   SE df lower.CL upper.CL
##  15   1           54.1 1.62 36     50.8     57.4
##  30   1           95.2 1.62 36     91.9     98.5
##  45   1          141.1 1.62 36    137.8    144.4
##  60   1          196.3 1.62 36    193.0    199.6
##  15   2           42.6 1.62 36     39.3     45.9
##  30   2           71.9 1.62 36     68.6     75.2
##  45   2          123.9 1.62 36    120.6    127.2
##  60   2          162.6 1.62 36    159.3    165.9
##  15   3           23.3 1.62 36     20.0     26.6
##  30   3           37.5 1.62 36     34.2     40.8
##  45   3           48.0 1.62 36     44.7     51.3
##  60   3           72.6 1.62 36     69.3     75.9
## 
## Results are averaged over the levels of: block 
## Confidence level used: 0.95
#Assign the letters to the treatment means
DT.means.cld <- cld(DT.means,
                   Letters=letters,
                   decreasing = TRUE) |> 
  dplyr::arrange(Treatment, Days)

DT.means.cld#Print the table of means
##  Days Treatment emmean   SE df lower.CL upper.CL .group     
##  15   1           54.1 1.62 36     50.8     57.4        g   
##  30   1           95.2 1.62 36     91.9     98.5      e     
##  45   1          141.1 1.62 36    137.8    144.4    c       
##  60   1          196.3 1.62 36    193.0    199.6  a         
##  15   2           42.6 1.62 36     39.3     45.9         hi 
##  30   2           71.9 1.62 36     68.6     75.2       f    
##  45   2          123.9 1.62 36    120.6    127.2     d      
##  60   2          162.6 1.62 36    159.3    165.9   b        
##  15   3           23.3 1.62 36     20.0     26.6           j
##  30   3           37.5 1.62 36     34.2     40.8          i 
##  45   3           48.0 1.62 36     44.7     51.3        gh  
##  60   3           72.6 1.62 36     69.3     75.9       f    
## 
## Results are averaged over the levels of: block 
## Confidence level used: 0.95 
## P value adjustment: tukey method for comparing a family of 12 estimates 
## significance level used: alpha = 0.05 
## NOTE: If two or more means share the same grouping symbol,
##       then we cannot show them to be different.
##       But we also did not show them to be the same.
HT_Long |> 
  group_by(Days, Treatment) |> 
  dplyr::summarize(Mean = mean(HT),
            SD = sd(HT),
            n = length(HT)) |>
  dplyr::mutate(SE = SD/sqrt(n)) |> 
  ggplot(aes(x = Days, y = Mean, group=Treatment)) +
  geom_line(aes(color = Treatment)) +
  geom_point() +
  geom_errorbar(aes(ymin = Mean - SE, 
                    ymax = Mean + SE,
                    colour = Treatment),
                width = 0.2, 
                size = 1) +
  labs(y = "Mean Height") +
  theme_classic() +
  theme(legend.position="bottom")