Two-way ANOVA with repeated measures Practice

Importing Libraries

library(tidyverse)

## Warning: package 'ggplot2' was built under R version 4.4.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(ggpubr)

## Warning: package 'ggpubr' was built under R version 4.4.3

library(rstatix)

## Warning: package 'rstatix' was built under R version 4.4.3

## 
## Attaching package: 'rstatix'
## 
## The following object is masked from 'package:stats':
## 
##     filter

library(broom)

Reading Data

# Wide format
data("selfesteem2", package = "datarium")
selfesteem2

## # A tibble: 24 × 5
##    id    treatment    t1    t2    t3
##    <fct> <fct>     <dbl> <dbl> <dbl>
##  1 1     ctr          83    77    69
##  2 2     ctr          97    95    88
##  3 3     ctr          93    92    89
##  4 4     ctr          92    92    89
##  5 5     ctr          77    73    68
##  6 6     ctr          72    65    63
##  7 7     ctr          92    89    79
##  8 8     ctr          92    87    81
##  9 9     ctr          95    91    84
## 10 10    ctr          92    84    81
## # ℹ 14 more rows

selfesteem2 is a built-in data set that comes with the datarium package. It contains repeated measures of self-esteem scores across three time points (t1, t2, and t3) for individuals in different treatment groups.

Transform to long format

# Gather the columns t1, t2 and t3 into long format.
# Convert id and time into factor variables
selfesteem2 <- selfesteem2 %>%
  gather(key = "time", value = "score", t1, t2, t3) %>%
  convert_as_factor(id, time)

key parameter would indicate the column name containing the variable names (t1, t2, t3). value parameter would indicate the column name containing the values from the grouped columns

# Inspect some random rows of the data by groups
selfesteem2 %>% sample_n_by(treatment, time, size = 1)

## # A tibble: 6 × 4
##   id    treatment time  score
##   <fct> <fct>     <fct> <dbl>
## 1 4     ctr       t1       92
## 2 5     ctr       t2       73
## 3 2     ctr       t3       88
## 4 11    Diet      t1       93
## 5 3     Diet      t2       91
## 6 11    Diet      t3       91

The sample_n_by() function is getting n(size) samples by specified group(treatment, time).

Summary Statistics

selfesteem2 %>%
  group_by(treatment, time) %>%
  get_summary_stats()

## # A tibble: 6 × 15
##   treatment time  variable     n   min   max median    q1    q3   iqr   mad
##   <fct>     <fct> <fct>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ctr       t1    score       12    72    97   92    82    92.2 10.2   2.96
## 2 ctr       t2    score       12    65    95   88    76    92   16     5.93
## 3 ctr       t3    score       12    62    91   81    68.8  88.2 19.5  11.9 
## 4 Diet      t1    score       12    74   100   90    83    91.5  8.5   4.45
## 5 Diet      t2    score       12    75    99   90    84.5  92.2  7.75  5.19
## 6 Diet      t3    score       12    72    97   89.5  84.8  93.5  8.75  6.67
## # ℹ 4 more variables: mean <dbl>, sd <dbl>, se <dbl>, ci <dbl>

Check Assumptions

Normality

selfesteem2 %>%
  group_by(treatment, time) %>%
  shapiro_test(score)

## # A tibble: 6 × 5
##   treatment time  variable statistic      p
##   <fct>     <fct> <chr>        <dbl>  <dbl>
## 1 ctr       t1    score        0.828 0.0200
## 2 ctr       t2    score        0.868 0.0618
## 3 ctr       t3    score        0.887 0.107 
## 4 Diet      t1    score        0.919 0.279 
## 5 Diet      t2    score        0.923 0.316 
## 6 Diet      t3    score        0.886 0.104

All groups appear to be normally distributed (> 0.05) from the Shapiro results except for the control treatment in t1.

ggqqplot(selfesteem2, "score", ggtheme = theme_bw()) +
  facet_grid(time ~ treatment, labeller = "label_both")

# Gets the mean and standard deviation per group (treatment x time)
group_stats <- selfesteem2 %>%
  group_by(treatment, time) %>%
  summarise(
    mean = mean(score, na.rm = TRUE),
    sd = sd(score, na.rm = TRUE),
    .groups = "drop"
  )
group_stats

## # A tibble: 6 × 4
##   treatment time   mean    sd
##   <fct>     <fct> <dbl> <dbl>
## 1 ctr       t1     88    8.08
## 2 ctr       t2     83.8 10.2 
## 3 ctr       t3     78.7 10.5 
## 4 Diet      t1     87.6  7.62
## 5 Diet      t2     87.8  7.42
## 6 Diet      t3     87.7  8.14

# Gets the x and y values for the normal curve
curve_data <- group_stats %>%
  rowwise() %>% # executes per row
  mutate(x = list(seq(mean - 4 * sd, mean + 4 * sd, length.out = 100))) %>%
  unnest(x) %>%
  mutate(y = dnorm(x, mean, sd))
curve_data

## # A tibble: 600 × 6
##    treatment time   mean    sd     x         y
##    <fct>     <fct> <dbl> <dbl> <dbl>     <dbl>
##  1 ctr       t1       88  8.08  55.7 0.0000166
##  2 ctr       t1       88  8.08  56.3 0.0000228
##  3 ctr       t1       88  8.08  57.0 0.0000312
##  4 ctr       t1       88  8.08  57.6 0.0000424
##  5 ctr       t1       88  8.08  58.3 0.0000573
##  6 ctr       t1       88  8.08  58.9 0.0000768
##  7 ctr       t1       88  8.08  59.6 0.000102 
##  8 ctr       t1       88  8.08  60.3 0.000136 
##  9 ctr       t1       88  8.08  60.9 0.000178 
## 10 ctr       t1       88  8.08  61.6 0.000233 
## # ℹ 590 more rows

ggplot(selfesteem2, aes(x = score)) +
  geom_histogram(aes(y = ..density..), fill = "skyblue", color = "black") +
  geom_line(data = curve_data, aes(x = x, y = y), color = "black", size = 1) +
  facet_grid(time ~ treatment, labeller = label_both) +
  theme_bw() +
  labs(y = "Density", x = "Score")

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Homogeneity

selfesteem2 %>%
  mutate(group = interaction(treatment, time)) %>%
  levene_test(score ~ group)

## # A tibble: 1 × 4
##     df1   df2 statistic     p
##   <int> <int>     <dbl> <dbl>
## 1     5    66     0.536 0.749

Since the p-value > 0.05, this implies that the groups have homogenuous variances.

Sphericity

anova_test(
  data = selfesteem2, dv = score, wid = id,
  within = c(treatment, time)
)

## ANOVA Table (type III tests)
## 
## $ANOVA
##           Effect DFn DFd      F        p p<.05   ges
## 1      treatment   1  11 15.541 2.00e-03     * 0.059
## 2           time   2  22 27.369 1.08e-06     * 0.049
## 3 treatment:time   2  22 30.424 4.63e-07     * 0.050
## 
## $`Mauchly's Test for Sphericity`
##           Effect     W     p p<.05
## 1           time 0.469 0.023     *
## 2 treatment:time 0.616 0.089      
## 
## $`Sphericity Corrections`
##           Effect   GGe      DF[GG]    p[GG] p[GG]<.05   HFe      DF[HF]
## 1           time 0.653 1.31, 14.37 5.03e-05         * 0.705 1.41, 15.52
## 2 treatment:time 0.723  1.45, 15.9 1.25e-05         * 0.803 1.61, 17.66
##      p[HF] p[HF]<.05
## 1 2.81e-05         *
## 2 4.82e-06         *

From the ANOVA results

Treatment: There is a significant main effect of treatment on score (p = .002). This suggests that across time points, the different treatment groups had significantly different scores.

Time: Significant main effect (p < .001), meaning the scores change significantly over time, regardless of treatment.

Treatment × Time interaction: Also significant (p < .001). This means that the pattern of change over time differs between treatment groups.

From Mauchly’s Test for Sphericity

The p-value for the treatment:time (> 0.05) indicates that sphericity is met for this interaction.

Post-hoc tests

A significant two-way interaction indicates that the impact that one factor (e.g., treatment) has on the outcome variable (e.g., self-esteem score) depends on the level of the other factor (e.g., time) (and vice versa). So, you can decompose a significant two-way interaction into:

Simple main effect: run one-way model of the first variable (factor A) at each level of the second variable (factor B),
Simple pairwise comparisons: if the simple main effect is significant, run multiple pairwise comparisons to determine which groups are different.

For a non-significant two-way interaction, you need to determine whether you have any statistically significant main effects from the ANOVA output.

Reference: Repeated Measures ANOVA in R: The Ultimate Guide - Datanovia

Procedure for a significant two-way interaction

Effect of treatment. In this example, we’ll analyze the effect of treatment on self-esteem score at every time point.

# Effect of treatment at each time point
one.way <- selfesteem2 %>%
  group_by(time) %>%
  anova_test(dv = score, wid = id, within = treatment) %>%
  get_anova_table() %>%
  adjust_pvalue(method = "bonferroni")
one.way

## # A tibble: 3 × 9
##   time  Effect      DFn   DFd      F       p `p<.05`      ges   p.adj
##   <fct> <chr>     <dbl> <dbl>  <dbl>   <dbl> <chr>      <dbl>   <dbl>
## 1 t1    treatment     1    11  0.376 0.552   ""      0.000767 1      
## 2 t2    treatment     1    11  9.03  0.012   "*"     0.052    0.036  
## 3 t3    treatment     1    11 30.9   0.00017 "*"     0.199    0.00051

# Pairwise comparisons between treatment groups
pwc <- selfesteem2 %>%
  group_by(time) %>%
  pairwise_t_test(
    score ~ treatment, paired = TRUE,
    p.adjust.method = "bonferroni"
    )
pwc

## # A tibble: 3 × 11
##   time  .y.   group1 group2    n1    n2 statistic    df       p   p.adj
## * <fct> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>   <dbl>   <dbl>
## 1 t1    score ctr    Diet      12    12     0.613    11 0.552   0.552  
## 2 t2    score ctr    Diet      12    12    -3.00     11 0.012   0.012  
## 3 t3    score ctr    Diet      12    12    -5.56     11 0.00017 0.00017
## # ℹ 1 more variable: p.adj.signif <chr>

Considering the Bonferroni adjusted p-value (p.adj), it can be seen that the simple main effect of treatment was not significant at the time point t1 (p = 1). It becomes significant at t2 (p = 0.036) and t3 (p = 0.00051).

Pairwise comparisons show that the mean self-esteem score was significantly different between ctr and Diet group at t2 (p = 0.12) and t3 (p = 0.00017) but not at t1 (p = 0.55).

Effect of time

# Effect of time at each level of treatment
one.way2 <- selfesteem2 %>%
  group_by(treatment) %>%
  anova_test(dv = score, wid = id, within = time) %>%
  get_anova_table() %>%
  adjust_pvalue(method = "bonferroni")
one.way2

## # A tibble: 2 × 9
##   treatment Effect   DFn   DFd      F          p `p<.05`      ges     p.adj
##   <fct>     <chr>  <dbl> <dbl>  <dbl>      <dbl> <chr>      <dbl>     <dbl>
## 1 ctr       time       2    22 39.7   0.00000005 "*"     0.145    0.0000001
## 2 Diet      time       2    22  0.078 0.925      ""      0.000197 1

# Pairwise comparisons between time points
pwc2 <- selfesteem2 %>%
  group_by(treatment) %>%
  pairwise_t_test(
    score ~ time, paired = TRUE,
    p.adjust.method = "bonferroni"
    )
pwc2

## # A tibble: 6 × 11
##   treatment .y.   group1 group2    n1    n2 statistic    df         p     p.adj
## * <fct>     <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>     <dbl>     <dbl>
## 1 ctr       score t1     t2        12    12     4.53     11 0.000858  0.003    
## 2 ctr       score t1     t3        12    12     6.91     11 0.0000255 0.0000765
## 3 ctr       score t2     t3        12    12     6.49     11 0.0000449 0.000135 
## 4 Diet      score t1     t2        12    12    -0.522    11 0.612     1        
## 5 Diet      score t1     t3        12    12    -0.102    11 0.921     1        
## 6 Diet      score t2     t3        12    12     0.283    11 0.782     1        
## # ℹ 1 more variable: p.adj.signif <chr>

The effect of time is significant only for the control trial, F(2, 22) = 39.7, p < 0.0001. Pairwise comparisons show that all comparisons between time points were statistically significant for control trial.

Procedure for non-significant two-way interaction

If the interaction is not significant, you need to interpret the main effects for each of the two variables: treatment and time. A significant main effect can be followed up with pairwise comparisons.

selfesteem2 %>%
  pairwise_t_test(
    score ~ treatment, paired = TRUE,
    p.adjust.method = "bonferroni"
  )

## # A tibble: 1 × 10
##   .y.   group1 group2    n1    n2 statistic    df        p    p.adj p.adj.signif
## * <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>    <dbl>    <dbl> <chr>       
## 1 score ctr    Diet      36    36     -4.35    35 0.000113 0.000113 ***

There is a highly significant difference in score between the ctr and Diet treatment conditions. The negative t-statistic (-4.35) means that scores in the Diet condition were higher than in the ctr condition

selfesteem2 %>%
  pairwise_t_test(
    score ~ time, paired = TRUE,
    p.adjust.method = "bonferroni"
  )

## # A tibble: 3 × 10
##   .y.   group1 group2    n1    n2 statistic    df     p p.adj p.adj.signif
## * <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl> <dbl> <dbl> <chr>       
## 1 score t1     t2        24    24      2.86    23 0.009 0.027 *           
## 2 score t1     t3        24    24      3.70    23 0.001 0.004 **          
## 3 score t2     t3        24    24      3.75    23 0.001 0.003 **

There are significant differences in score across all time points. Since this is a paired test, it assumes the same individuals were measured at t1, t2, and t3.