ANCOVA

ANCOVA adjusts the dependent variable (DV) by removing (controlling for) the linear effect of the covariate — before testing for group differences.

Use this when each row is independent (no repeated measures)

Importing Libraries

library(haven)

## Warning: package 'haven' was built under R version 4.4.3

library(ez)

## Warning: package 'ez' was built under R version 4.4.3

library(tidyverse)

## Warning: package 'ggplot2' was built under R version 4.4.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(ggpubr)

## Warning: package 'ggpubr' was built under R version 4.4.3

library(rstatix)

## Warning: package 'rstatix' was built under R version 4.4.3

## 
## Attaching package: 'rstatix'
## 
## The following object is masked from 'package:stats':
## 
##     filter

library(car)

## Warning: package 'car' was built under R version 4.4.3

## Loading required package: carData

## Warning: package 'carData' was built under R version 4.4.3

## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some

library(nortest)

Reading source files

paus_df <- read_sav("files/PAUS_English_Translated.sav")

Functions

remove_nas <- function(columns, data) {   
  return(data %>% filter(if_all(all_of(columns), ~ !is.na(.)))) 
} 
transform_numeric_to_categorical <- function(column, data) {          
  col_data <-data[[column]]   
  levels <- sort(unique(col_data))   
  label_vec <- as_factor(sort(unique(col_data)))   
  return(ordered(col_data, levels = levels, labels = label_vec)) 
} 
find_shapiro_normal_columns <- function(data) {   
  for (col_name in names(data)) {   
    if (is.numeric(data[[col_name]])) {     
      values <- na.omit(data[[col_name]])          
      if (length(values) >= 3 && length(unique(values)) > 1) {       
        shapiro_result <- shapiro.test(values)              
        if (shapiro_result$p.value > 0.05) {         
          cat("\nColumn:", col_name,             
              "\nW =", round(shapiro_result$statistic, 4),             
              "p-value =", round(shapiro_result$p.value, 4), "\n")       
        }     
      }   
    } 
  } 
}

paus_df_copy <- paus_df
paus_df_copy$chronotyp_tric.1 <- transform_numeric_to_categorical("chronotyp_tric.1", paus_df_copy)
paus_df_copy$recovery1.2 <- as.numeric(paus_df_copy$recovery1.2)
paus_df_copy$recovery1.3 <- as.numeric(paus_df_copy$recovery1.3)
paus_df_copy <- remove_nas(c("sysUserID", "chronotyp_tric.1", "recovery1.2", "recovery1.3"),paus_df_copy)
paus_df_copy <- paus_df_copy[c('sysUserID', 'chronotyp_tric.1', 'recovery1.2', 'recovery1.3')]

Data Assumptions Testing

Linearity

This assumption checks that there is a linear relationship between the covariate and the dependent variable.

ggscatter(
  paus_df_copy, x = "recovery1.2", y = "recovery1.3",
  color = "chronotyp_tric.1", add = "reg.line"
  )+
  stat_regline_equation(
    aes(label =  paste(..eq.label.., ..rr.label.., sep = "~~~~"), color = "chronotyp_tric.1")
    )

There is a linear relationship between the sleep sufficiency (recovery1) in the first and third assessments.

Homogeneity of regression slopes

This assumption checks that there is no significant interaction between the covariate and the grouping variable.

paus_df_copy %>% anova_test(recovery1.3 ~ chronotyp_tric.1*recovery1.2)

## ANOVA Table (type II tests)
## 
##                         Effect DFn DFd       F        p p<.05   ges
## 1             chronotyp_tric.1   2 326   1.835 1.61e-01       0.011
## 2                  recovery1.2   1 326 538.633 4.99e-71     * 0.623
## 3 chronotyp_tric.1:recovery1.2   2 326   1.604 2.03e-01       0.010

Since there is no significant interaction between the grouping variable (chronotype) and the covariate (recovery1 in 2nd assessment).

Normality of residuals

model <- lm(recovery1.3 ~ recovery1.2 + chronotyp_tric.1, data = paus_df_copy)
# Inspect the model diagnostic metrics
model.metrics <- augment(model) 
head(model.metrics, 3)

## # A tibble: 3 × 9
##   recovery1.3 recovery1.2 chronotyp_tric.1  .fitted .resid   .hat .sigma .cooksd
##         <dbl>       <dbl> <ord>               <dbl>  <dbl>  <dbl>  <dbl>   <dbl>
## 1          72          90 Morning person       84.2 -12.2  0.0121   13.0 2.74e-3
## 2          73          73 Neither a mornin…    71.8   1.19 0.0125   13.0 2.67e-5
## 3          22          12 Morning person       26.1  -4.14 0.0221   13.0 5.85e-4
## # ℹ 1 more variable: .std.resid <dbl>

shapiro_test(model.metrics$.resid)

## # A tibble: 1 × 3
##   variable             statistic p.value
##   <chr>                    <dbl>   <dbl>
## 1 model.metrics$.resid     0.989  0.0156

The ideal result should be that the Shapiro Wilk test p-value will not be significant.

Homogeneity of variances

model.metrics %>% levene_test(.resid ~ chronotyp_tric.1)

## # A tibble: 1 × 4
##     df1   df2 statistic      p
##   <int> <int>     <dbl>  <dbl>
## 1     2   329      4.33 0.0139

The ideal result should be that the Levene’s test p-value will not be significant.

Outliers

model.metrics %>% 
  filter(abs(.std.resid) > 3) %>%
  as.data.frame()

##   recovery1.3 recovery1.2 chronotyp_tric.1  .fitted    .resid        .hat
## 1           2          43   Evening person 46.37427 -44.37427 0.010578893
## 2          89          42   Morning person 48.48896  40.51104 0.009468907
##     .sigma    .cooksd .std.resid
## 1 12.80434 0.03138070  -3.426350
## 2 12.84408 0.02335784   3.126299

There were outliers detected.

ANCOVA Implementation

# In this formula, the covariate (recovery1.2) is indicated first instead of the grouping variable (chronotyp_tric.1)
res.aov <- paus_df_copy %>% anova_test(recovery1.3 ~ recovery1.2 + chronotyp_tric.1)
get_anova_table(res.aov)

## ANOVA Table (type II tests)
## 
##             Effect DFn DFd       F        p p<.05   ges
## 1      recovery1.2   1 328 536.656 5.10e-71     * 0.621
## 2 chronotyp_tric.1   2 328   1.829 1.62e-01       0.011

The ideal result should show that the p-values of the covariate and grouping variable show a significance.

Post-Hoc Test

Pairwise comparisons can be performed to identify which groups are different. The Bonferroni multiple testing correction is applied.

paus_df_copy %>% 
  emmeans_test(
    recovery1.3 ~ chronotyp_tric.1, covariate = recovery1.2,
    p.adjust.method = "bonferroni"
  )

## # A tibble: 3 × 9
##   term             .y.   group1 group2    df statistic      p p.adj p.adj.signif
## * <chr>            <chr> <chr>  <chr>  <dbl>     <dbl>  <dbl> <dbl> <chr>       
## 1 recovery1.2*chr… reco… Morni… Neith…   328    -0.129 0.898  1     ns          
## 2 recovery1.2*chr… reco… Morni… Eveni…   328     1.69  0.0915 0.275 ns          
## 3 recovery1.2*chr… reco… Neith… Eveni…   328     1.63  0.104  0.311 ns