In a repeated measures design the same variable is measured more than once on the same unit (plant, animal, student)
It’s simplest form is: pre-test and post-test design
The resulting data for each unit constitute a time series (because of autocorrelation)
The analysis must be able to answer one of the major research questions:
The usual methods of statistical analysis, such as the analysis of variance, must be modified or replaced by more suitable ones to take account of the possible correlation
We shall demonstrate here two (2) ways to analyze data from experiments with repeated measures
We shall use the agrondata in this demonstration since it contains repeated measurement on plant height.
The data on plant height is in wide format, hence, it has to be converted to long format
library(readxl)
library(ExpDes)
library(tidyverse)
library(ggstatsplot)
library(patchwork)
library(emmeans)
library(multcomp)
library(rstatix)
agrondata <- read_excel("agrondata.xlsx")
#Subset the agrondata to include only height measurements
HT <- agrondata |>
select(treatment, block, HT15, HT30, HT45, HT60)
HT
## # A tibble: 15 × 6
## treatment block HT15 HT30 HT45 HT60
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 1 56.2 102. 152. 210.
## 2 1 2 53.9 92.9 140 194.
## 3 1 3 53.4 90.4 134. 189.
## 4 1 4 52.2 89 129. 183.
## 5 1 5 54.6 102. 150. 207.
## 6 2 1 47.3 73.5 131. 173.
## 7 2 2 44.4 72.4 119. 159.
## 8 2 3 36.6 70.1 118. 158.
## 9 2 4 38.2 68.8 119. 148.
## 10 2 5 46.5 74.8 133. 174.
## 11 3 1 24.4 38.7 50.4 74.2
## 12 3 2 21.9 36.6 45.9 69.5
## 13 3 3 22.2 37.3 46.7 70
## 14 3 4 22.3 36 45.6 71.8
## 15 3 5 25.7 39.1 51.4 77.4
#Transforming data from wide format to long format
HT_Long <- HT |>
gather(key = "Time", value = "HT", HT15, HT30, HT45, HT60) |>
convert_as_factor(treatment, block, Time) |>
dplyr::rename(Treatment = treatment) |>
dplyr::mutate(Days = recode(Time,
"HT15" = "15",
"HT30" = "30",
"HT45" = "45",
"HT60" = "60"))
HT_Long
## # A tibble: 60 × 5
## Treatment block Time HT Days
## <fct> <fct> <fct> <dbl> <fct>
## 1 1 1 HT15 56.2 15
## 2 1 2 HT15 53.9 15
## 3 1 3 HT15 53.4 15
## 4 1 4 HT15 52.2 15
## 5 1 5 HT15 54.6 15
## 6 2 1 HT15 47.3 15
## 7 2 2 HT15 44.4 15
## 8 2 3 HT15 36.6 15
## 9 2 4 HT15 38.2 15
## 10 2 5 HT15 46.5 15
## # ℹ 50 more rows
#Testing for presence of outliers
HT_Long |>
group_by(Treatment, Days) |>
identify_outliers(HT)
## [1] Treatment Days block Time HT is.outlier is.extreme
## <0 rows> (or 0-length row.names)
#Testing for normality of distribution
HT_Long |>
group_by(Treatment, Days) |>
shapiro_test(HT)
## # A tibble: 12 × 5
## Treatment Days variable statistic p
## <fct> <fct> <chr> <dbl> <dbl>
## 1 1 15 HT 0.989 0.975
## 2 1 30 HT 0.837 0.157
## 3 1 45 HT 0.930 0.595
## 4 1 60 HT 0.911 0.476
## 5 2 15 HT 0.866 0.252
## 6 2 30 HT 0.961 0.813
## 7 2 45 HT 0.762 0.0384
## 8 2 60 HT 0.912 0.478
## 9 3 15 HT 0.839 0.161
## 10 3 30 HT 0.929 0.592
## 11 3 45 HT 0.837 0.156
## 12 3 60 HT 0.921 0.536
rep.aov <- anova_test(
data = HT_Long,
dv = HT,
wid = block,
within = c(Treatment, Days))
#To display the ANOVA table
get_anova_table(rep.aov)
## ANOVA Table (type III tests)
##
## Effect DFn DFd F p p<.05 ges
## 1 Treatment 1.04 4.15 915.983 4.91e-06 * 0.969
## 2 Days 1.02 4.09 1289.421 2.80e-06 * 0.979
## 3 Treatment:Days 6.00 24.00 297.420 2.65e-21 * 0.877
#To display the results of sphericity test
rep.aov$`Mauchly's Test for Sphericity`
## Effect W p p<.05
## 1 Treatment 0.071000 0.019 *
## 2 Days 0.000758 0.003 *
aov.rep1 <- aov(HT ~ Treatment + Treatment:block + Days + Days:Treatment,
data = HT_Long)
anova(aov.rep1)
## Analysis of Variance Table
##
## Response: HT
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 61993 30996.3 2347.9997 < 2.2e-16 ***
## Days 3 91141 30380.3 2301.3391 < 2.2e-16 ***
## Treatment:block 12 1524 127.0 9.6226 5.516e-08 ***
## Treatment:Days 6 14314 2385.6 180.7149 < 2.2e-16 ***
## Residuals 36 475 13.2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Extract the Mean Square for Blk:Var and use this as MSE(a)
MSE_a <- anova(aov.rep1)["Mean Sq"][3,1]
MS_Trt <- anova(aov.rep1)["Mean Sq"][1,1]
print(c(MSE_a, MS_Trt))
## [1] 127.0289 30996.2702
\[ \begin{aligned} F_{Treatment} &= \frac{MS_{Treatment}}{MSE(a)} \notag \\ &= 244.01\notag \\ p-value &= 2\times 10^{-8} \end{aligned} \]
#Generate summary statistics for all treatment combinations
DT.means <- emmeans(aov.rep1, specs=c("Days", "Treatment"))
DT.means#Print the table of means
## Days Treatment emmean SE df lower.CL upper.CL
## 15 1 54.1 1.62 36 50.8 57.4
## 30 1 95.2 1.62 36 91.9 98.5
## 45 1 141.1 1.62 36 137.8 144.4
## 60 1 196.3 1.62 36 193.0 199.6
## 15 2 42.6 1.62 36 39.3 45.9
## 30 2 71.9 1.62 36 68.6 75.2
## 45 2 123.9 1.62 36 120.6 127.2
## 60 2 162.6 1.62 36 159.3 165.9
## 15 3 23.3 1.62 36 20.0 26.6
## 30 3 37.5 1.62 36 34.2 40.8
## 45 3 48.0 1.62 36 44.7 51.3
## 60 3 72.6 1.62 36 69.3 75.9
##
## Results are averaged over the levels of: block
## Confidence level used: 0.95
#Assign the letters to the treatment means
DT.means.cld <- cld(DT.means,
Letters=letters,
decreasing = TRUE) |>
dplyr::arrange(Treatment, Days)
DT.means.cld#Print the table of means
## Days Treatment emmean SE df lower.CL upper.CL .group
## 15 1 54.1 1.62 36 50.8 57.4 g
## 30 1 95.2 1.62 36 91.9 98.5 e
## 45 1 141.1 1.62 36 137.8 144.4 c
## 60 1 196.3 1.62 36 193.0 199.6 a
## 15 2 42.6 1.62 36 39.3 45.9 hi
## 30 2 71.9 1.62 36 68.6 75.2 f
## 45 2 123.9 1.62 36 120.6 127.2 d
## 60 2 162.6 1.62 36 159.3 165.9 b
## 15 3 23.3 1.62 36 20.0 26.6 j
## 30 3 37.5 1.62 36 34.2 40.8 i
## 45 3 48.0 1.62 36 44.7 51.3 gh
## 60 3 72.6 1.62 36 69.3 75.9 f
##
## Results are averaged over the levels of: block
## Confidence level used: 0.95
## P value adjustment: tukey method for comparing a family of 12 estimates
## significance level used: alpha = 0.05
## NOTE: If two or more means share the same grouping symbol,
## then we cannot show them to be different.
## But we also did not show them to be the same.
HT_Long |>
group_by(Days, Treatment) |>
dplyr::summarize(Mean = mean(HT),
SD = sd(HT),
n = length(HT)) |>
dplyr::mutate(SE = SD/sqrt(n)) |>
ggplot(aes(x = Days, y = Mean, group=Treatment)) +
geom_line(aes(color = Treatment)) +
geom_point() +
geom_errorbar(aes(ymin = Mean - SE,
ymax = Mean + SE,
colour = Treatment),
width = 0.2,
size = 1) +
labs(y = "Mean Height") +
theme_classic() +
theme(legend.position="bottom")