Two-way ANOVA with repeated measures

Importing Libraries

library(haven)

## Warning: package 'haven' was built under R version 4.4.3

library(ez)

## Warning: package 'ez' was built under R version 4.4.3

library(tidyverse)

## Warning: package 'ggplot2' was built under R version 4.4.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(ggpubr)

## Warning: package 'ggpubr' was built under R version 4.4.3

library(rstatix)

## Warning: package 'rstatix' was built under R version 4.4.3

## 
## Attaching package: 'rstatix'
## 
## The following object is masked from 'package:stats':
## 
##     filter

library(car)

## Warning: package 'car' was built under R version 4.4.3

## Loading required package: carData

## Warning: package 'carData' was built under R version 4.4.3

## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some

library(nortest)

Reading source files

paus_df <- read_sav("files/PAUS_English_Translated.sav")

Creating a copy of the mother file

paus_df_copy <- paus_df

Functions

remove_nas <- function(columns, data) {
  return(data %>% filter(if_all(all_of(columns), ~ !is.na(.))))
}
transform_numeric_to_categorical <- function(column, data) {       
  col_data <- data[[column]]
  levels <- sort(unique(col_data))
  label_vec <- as_factor(sort(unique(col_data)))
  return(ordered(col_data, levels = levels, labels = label_vec))
}
find_shapiro_normal_columns <- function(data) {
  for (col_name in names(data)) {
  if (is.numeric(data[[col_name]])) {
    values <- na.omit(data[[col_name]])
    
    if (length(values) >= 3 && length(unique(values)) > 1) {
      shapiro_result <- shapiro.test(values)
      
      if (shapiro_result$p.value > 0.05) {
        cat("\nColumn:", col_name,
            "\nW =", round(shapiro_result$statistic, 4),
            "p-value =", round(shapiro_result$p.value, 4), "\n")
      }
    }
  }
}
}

paus_df_copy$chronotyp_tric.1 <- transform_numeric_to_categorical("chronotyp_tric.1", paus_df_copy)

data_long <- paus_df_copy %>%
  gather(key = "time", value = "sleep_sufficiency_score", 
        recovery1.1, recovery1.2, recovery1.3) %>%
  convert_as_factor(sysUserID, time, chronotyp_tric.1)
data_long <- remove_nas(c("sysUserID","chronotyp_tric.1","time","sleep_sufficiency_score"),data_long)
# data_long <- data_long[c("sysUserID","chronotyp_tric.1","time","sleep_sufficiency_score")]
View(data_long %>%
  filter(sysUserID == "24042") %>%
  select(sysUserID, chronotyp_tric.1, time, sleep_sufficiency_score, sysYearWeek.1, sysYearWeek.2, sysYearWeek.3))

Data Assumptions Testing

Sphericity

data_filtered <- data_long %>%
  group_by(sysUserID) %>%
  filter(n_distinct(time) >= 3) %>%
  ungroup()
View(data_filtered)

anova_test(
  data = data_filtered,
  dv = sleep_sufficiency_score,
  wid = sysUserID,
  within = time,
  between = chronotyp_tric.1
)

## ANOVA Table (type III tests)
## 
## $ANOVA
##                  Effect DFn DFd     F     p p<.05      ges
## 1      chronotyp_tric.1   2 329 3.123 0.045     * 0.016000
## 2                  time   2 658 4.358 0.013     * 0.002000
## 3 chronotyp_tric.1:time   4 658 0.463 0.763       0.000439
## 
## $`Mauchly's Test for Sphericity`
##                  Effect     W     p p<.05
## 1                  time 0.984 0.071      
## 2 chronotyp_tric.1:time 0.984 0.071      
## 
## $`Sphericity Corrections`
##                  Effect   GGe       DF[GG] p[GG] p[GG]<.05  HFe       DF[HF]
## 1                  time 0.984 1.97, 647.64 0.014         * 0.99 1.98, 651.51
## 2 chronotyp_tric.1:time 0.984 3.94, 647.64 0.760           0.99 3.96, 651.51
##   p[HF] p[HF]<.05
## 1 0.013         *
## 2 0.761

Interpretation: The ideal value for the Mauchly’s Test to say that the variables do not violate the assumption of sphericity is if it is > 0.05. Since the p-value (0.585) > 0.05, then sphericity is not violated.
Sphericity assumes that:

The variance of the change in self-blame from Time 1 to Time 2
The variance of the change from Time 2 to Time 3
The variance of the change from Time 1 to Time 3

…are all approximately equal (within-subjects (coping11.1–.3)) across the participants.

Homogeneity

data_long %>%   mutate(group = interaction(chronotyp_tric.1, time)) %>%   
  levene_test(sleep_sufficiency_score ~ group)

## # A tibble: 1 × 4
##     df1   df2 statistic     p
##   <int> <int>     <dbl> <dbl>
## 1     8  1343     0.879 0.534

The ideal value for the Levene’s test of variance equality is if it is p > 0.05, since it does not reject the null hypothesis that the variances are equal. Since the p-value (0.2325) from the test is > 0.05, we can conclude that the variances of your dependent variable (coping11) are equal across the levels of a between-subjects factor (fia9B.1). This checks the variances of between-subjects (fia9B.1 groups).

Test of Normality

data_long %>%
  group_by(chronotyp_tric.1, time) %>%
  shapiro_test(sleep_sufficiency_score)

## # A tibble: 9 × 5
##   chronotyp_tric.1                    time        variable     statistic       p
##   <ord>                               <fct>       <chr>            <dbl>   <dbl>
## 1 Morning person                      recovery1.1 sleep_suffi…     0.966 2.39e-5
## 2 Morning person                      recovery1.2 sleep_suffi…     0.946 6.21e-6
## 3 Morning person                      recovery1.3 sleep_suffi…     0.952 5.56e-5
## 4 Neither a morning or evening person recovery1.1 sleep_suffi…     0.953 6.60e-5
## 5 Neither a morning or evening person recovery1.2 sleep_suffi…     0.941 1.71e-4
## 6 Neither a morning or evening person recovery1.3 sleep_suffi…     0.964 1.19e-2
## 7 Evening person                      recovery1.1 sleep_suffi…     0.962 2.37e-5
## 8 Evening person                      recovery1.2 sleep_suffi…     0.949 5.59e-5
## 9 Evening person                      recovery1.3 sleep_suffi…     0.921 2.83e-6

ggqqplot(data_long, "sleep_sufficiency_score", ggtheme = theme_bw()) +
  facet_grid(time ~ chronotyp_tric.1, labeller = "label_both")

gghistogram(data_long, "sleep_sufficiency_score", xlim = c(0, 100), ggtheme = theme_bw()) +
  facet_grid(time ~ chronotyp_tric.1, labeller = "label_both")

## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.

# Gets the mean and standard deviation per group (treatment x time)
group_stats <- data_long %>%
  group_by(chronotyp_tric.1, time) %>%
  summarise(
    mean = mean(sleep_sufficiency_score, na.rm = TRUE),
    sd = sd(sleep_sufficiency_score, na.rm = TRUE),
    .groups = "drop"
  )
group_stats

## # A tibble: 9 × 4
##   chronotyp_tric.1                    time         mean    sd
##   <ord>                               <fct>       <dbl> <dbl>
## 1 Morning person                      recovery1.1  59.9  22.0
## 2 Morning person                      recovery1.2  61.0  21.3
## 3 Morning person                      recovery1.3  62.9  20.4
## 4 Neither a morning or evening person recovery1.1  57.8  23.9
## 5 Neither a morning or evening person recovery1.2  60.3  24.0
## 6 Neither a morning or evening person recovery1.3  62.5  20.3
## 7 Evening person                      recovery1.1  54.5  22.0
## 8 Evening person                      recovery1.2  55.9  22.1
## 9 Evening person                      recovery1.3  55.3  23.1

# Gets the x and y values for the normal curve
curve_data <- group_stats %>%
  rowwise() %>% # executes per row
  mutate(x = list(seq(mean - 4 * sd, mean + 4 * sd, length.out = 100))) %>%
  unnest(x) %>%
  mutate(y = dnorm(x, mean, sd))
curve_data

## # A tibble: 900 × 6
##    chronotyp_tric.1 time         mean    sd     x          y
##    <ord>            <fct>       <dbl> <dbl> <dbl>      <dbl>
##  1 Morning person   recovery1.1  59.9  22.0 -28.0 0.00000609
##  2 Morning person   recovery1.1  59.9  22.0 -26.2 0.00000839
##  3 Morning person   recovery1.1  59.9  22.0 -24.4 0.0000115 
##  4 Morning person   recovery1.1  59.9  22.0 -22.6 0.0000156 
##  5 Morning person   recovery1.1  59.9  22.0 -20.9 0.0000211 
##  6 Morning person   recovery1.1  59.9  22.0 -19.1 0.0000283 
##  7 Morning person   recovery1.1  59.9  22.0 -17.3 0.0000377 
##  8 Morning person   recovery1.1  59.9  22.0 -15.5 0.0000499 
##  9 Morning person   recovery1.1  59.9  22.0 -13.8 0.0000656 
## 10 Morning person   recovery1.1  59.9  22.0 -12.0 0.0000858 
## # ℹ 890 more rows

ggplot(data_long, aes(x = sleep_sufficiency_score)) +
  geom_histogram(aes(y = ..density..), bins = 25, fill = "skyblue", color = "black") +
  geom_line(data = curve_data, aes(x = x, y = y), color = "black", size = 1) +
  facet_grid(time ~ chronotyp_tric.1, labeller = label_both) +
  theme_bw() +
  labs(y = "Density", x = "Score") +
  coord_cartesian(xlim = c(0, 100))

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Post-hoc tests

A significant two-way interaction indicates that the impact that one factor (e.g., treatment) has on the outcome variable (e.g., self-esteem score) depends on the level of the other factor (e.g., time) (and vice versa). So, you can decompose a significant two-way interaction into:

Simple main effect: run one-way model of the first variable (factor A) at each level of the second variable (factor B),
Simple pairwise comparisons: if the simple main effect is significant, run multiple pairwise comparisons to determine which groups are different.

For a non-significant two-way interaction, you need to determine whether you have any statistically significant main effects from the ANOVA output.

Reference: Repeated Measures ANOVA in R: The Ultimate Guide - Datanovia

Procedure for non-significant two-way interaction

If the interaction is not significant, you need to interpret the main effects for each of the two variables: treatment and time. A significant main effect can be followed up with pairwise comparisons.

group_means <- data_long %>%
  group_by(chronotyp_tric.1) %>%
  summarise(
    mean_score = mean(sleep_sufficiency_score, na.rm = TRUE),
    n = n()
  )
group_means

## # A tibble: 3 × 3
##   chronotyp_tric.1                    mean_score     n
##   <ord>                                    <dbl> <int>
## 1 Morning person                            61.1   544
## 2 Neither a morning or evening person       59.8   345
## 3 Evening person                            55.1   463

data_long %>%
  pairwise_t_test(
    sleep_sufficiency_score ~ chronotyp_tric.1, paired = FALSE,
    p.adjust.method = "bonferroni"
  )

## # A tibble: 3 × 9
##   .y.            group1 group2    n1    n2       p p.signif   p.adj p.adj.signif
## * <chr>          <chr>  <chr>  <int> <int>   <dbl> <chr>      <dbl> <chr>       
## 1 sleep_suffici… Morni… Neith…   544   345 4.18e-1 ns       1   e+0 ns          
## 2 sleep_suffici… Morni… Eveni…   544   463 2.28e-5 ****     6.85e-5 ****        
## 3 sleep_suffici… Neith… Eveni…   345   463 2.8 e-3 **       8.4 e-3 **

There is a highly significant difference (p-value < 0.01) in score between the perceived sleep sufficiency of for the morning person and evening chronotypes. This indicates a highly significant difference in perceived sleep sufficiency between morning and evening chronotypes. Individuals identifying as Morning persons reported significantly higher sleep sufficiency than Evening persons. A statistically significant difference also exists between “Neither” and “Evening” chronotypes. This suggests that Evening types perceive their sleep as less sufficient compared to those who do not identify strongly with either morning or evening preferences.

group_means <- data_long %>%
  group_by(time) %>%
  summarise(
    mean_score = mean(sleep_sufficiency_score, na.rm = TRUE),
    n = n()
  )
group_means

## # A tibble: 3 × 3
##   time        mean_score     n
##   <fct>            <dbl> <int>
## 1 recovery1.1       57.5   586
## 2 recovery1.2       59.1   407
## 3 recovery1.3       60.3   359

data_filtered %>%
  pairwise_t_test(
    sleep_sufficiency_score ~ time, paired = TRUE,
    p.adjust.method = "bonferroni"
  )

## # A tibble: 3 × 10
##   .y.         group1 group2    n1    n2 statistic    df     p p.adj p.adj.signif
## * <chr>       <chr>  <chr>  <int> <int>     <dbl> <dbl> <dbl> <dbl> <chr>       
## 1 sleep_suff… recov… recov…   332   332     -1.65   331 0.1   0.299 ns          
## 2 sleep_suff… recov… recov…   332   332     -3.13   331 0.002 0.006 **          
## 3 sleep_suff… recov… recov…   332   332     -1.50   331 0.134 0.402 ns

Based on the pairwise comparisons of perceived sleep sufficiency across the three recovery assessment groups (recovery1.1, recovery1.2, and recovery1.3), only the comparison between recovery1.1 and recovery1.3 showed a statistically significant difference. This suggests that there is a statistically significant difference in perceived sleep sufficiency between the first and third recovery assessments.

The first assessment was taken during the Spring from weeks (18 to 26) (May to June) in 2014.
The second assessment was taken during the Fall from weeks (38 to 51) (September to December) in 2014.
The third assessment was taken during the Winter from weeks (2 to 8) (January to February) in 2015.

This means: There was a statistically significant change in the sleep sufficiency score from the first recovery phase (recovery1.1) to the third recovery phase (recovery1.3).

The test statistic is negative: -3.125111. This indicates that the mean sleep sufficiency score was lower in recovery1.1 than in recovery1.3.

Calendar source for the seasons check: 2014 Seasons