library(haven)
## Warning: package 'haven' was built under R version 4.4.3
library(ez)
## Warning: package 'ez' was built under R version 4.4.3
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.4.3
library(rstatix)
## Warning: package 'rstatix' was built under R version 4.4.3
##
## Attaching package: 'rstatix'
##
## The following object is masked from 'package:stats':
##
## filter
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.3
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(nortest)
paus_df <- read_sav("files/PAUS_English_Translated.sav")
paus_df_copy <- paus_df
remove_nas <- function(columns, data) {
return(data %>% filter(if_all(all_of(columns), ~ !is.na(.))))
}
transform_numeric_to_categorical <- function(column, data) {
col_data <- data[[column]]
levels <- sort(unique(col_data))
label_vec <- as_factor(sort(unique(col_data)))
return(ordered(col_data, levels = levels, labels = label_vec))
}
find_shapiro_normal_columns <- function(data) {
for (col_name in names(data)) {
if (is.numeric(data[[col_name]])) {
values <- na.omit(data[[col_name]])
if (length(values) >= 3 && length(unique(values)) > 1) {
shapiro_result <- shapiro.test(values)
if (shapiro_result$p.value > 0.05) {
cat("\nColumn:", col_name,
"\nW =", round(shapiro_result$statistic, 4),
"p-value =", round(shapiro_result$p.value, 4), "\n")
}
}
}
}
}
paus_df_copy$chronotyp_tric.1 <- transform_numeric_to_categorical("chronotyp_tric.1", paus_df_copy)
data_long <- paus_df_copy %>%
gather(key = "time", value = "sleep_sufficiency_score",
recovery1.1, recovery1.2, recovery1.3) %>%
convert_as_factor(sysUserID, time, chronotyp_tric.1)
data_long <- remove_nas(c("sysUserID","chronotyp_tric.1","time","sleep_sufficiency_score"),data_long)
# data_long <- data_long[c("sysUserID","chronotyp_tric.1","time","sleep_sufficiency_score")]
View(data_long %>%
filter(sysUserID == "24042") %>%
select(sysUserID, chronotyp_tric.1, time, sleep_sufficiency_score, sysYearWeek.1, sysYearWeek.2, sysYearWeek.3))
data_filtered <- data_long %>%
group_by(sysUserID) %>%
filter(n_distinct(time) >= 3) %>%
ungroup()
View(data_filtered)
anova_test(
data = data_filtered,
dv = sleep_sufficiency_score,
wid = sysUserID,
within = time,
between = chronotyp_tric.1
)
## ANOVA Table (type III tests)
##
## $ANOVA
## Effect DFn DFd F p p<.05 ges
## 1 chronotyp_tric.1 2 329 3.123 0.045 * 0.016000
## 2 time 2 658 4.358 0.013 * 0.002000
## 3 chronotyp_tric.1:time 4 658 0.463 0.763 0.000439
##
## $`Mauchly's Test for Sphericity`
## Effect W p p<.05
## 1 time 0.984 0.071
## 2 chronotyp_tric.1:time 0.984 0.071
##
## $`Sphericity Corrections`
## Effect GGe DF[GG] p[GG] p[GG]<.05 HFe DF[HF]
## 1 time 0.984 1.97, 647.64 0.014 * 0.99 1.98, 651.51
## 2 chronotyp_tric.1:time 0.984 3.94, 647.64 0.760 0.99 3.96, 651.51
## p[HF] p[HF]<.05
## 1 0.013 *
## 2 0.761
Interpretation: The ideal value for the Mauchly’s
Test to say that the variables do not violate the assumption of
sphericity is if it is > 0.05. Since the p-value (0.585) > 0.05,
then sphericity is not violated.
Sphericity assumes that:
The variance of the change in self-blame from Time 1 to Time 2
The variance of the change from Time 2 to Time 3
The variance of the change from Time 1 to Time 3
…are all approximately equal (within-subjects (coping11.1–.3)) across the participants.
data_long %>% mutate(group = interaction(chronotyp_tric.1, time)) %>%
levene_test(sleep_sufficiency_score ~ group)
## # A tibble: 1 × 4
## df1 df2 statistic p
## <int> <int> <dbl> <dbl>
## 1 8 1343 0.879 0.534
The ideal value for the Levene’s test of variance equality is if it
is p > 0.05, since it does not reject the null hypothesis that the
variances are equal. Since the p-value (0.2325) from the test is >
0.05, we can conclude that the variances of your dependent variable
(coping11) are equal across the levels of a
between-subjects factor (fia9B.1). This checks the
variances of between-subjects (fia9B.1 groups).
data_long %>%
group_by(chronotyp_tric.1, time) %>%
shapiro_test(sleep_sufficiency_score)
## # A tibble: 9 × 5
## chronotyp_tric.1 time variable statistic p
## <ord> <fct> <chr> <dbl> <dbl>
## 1 Morning person recovery1.1 sleep_suffi… 0.966 2.39e-5
## 2 Morning person recovery1.2 sleep_suffi… 0.946 6.21e-6
## 3 Morning person recovery1.3 sleep_suffi… 0.952 5.56e-5
## 4 Neither a morning or evening person recovery1.1 sleep_suffi… 0.953 6.60e-5
## 5 Neither a morning or evening person recovery1.2 sleep_suffi… 0.941 1.71e-4
## 6 Neither a morning or evening person recovery1.3 sleep_suffi… 0.964 1.19e-2
## 7 Evening person recovery1.1 sleep_suffi… 0.962 2.37e-5
## 8 Evening person recovery1.2 sleep_suffi… 0.949 5.59e-5
## 9 Evening person recovery1.3 sleep_suffi… 0.921 2.83e-6
ggqqplot(data_long, "sleep_sufficiency_score", ggtheme = theme_bw()) +
facet_grid(time ~ chronotyp_tric.1, labeller = "label_both")
gghistogram(data_long, "sleep_sufficiency_score", xlim = c(0, 100), ggtheme = theme_bw()) +
facet_grid(time ~ chronotyp_tric.1, labeller = "label_both")
## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.
# Gets the mean and standard deviation per group (treatment x time)
group_stats <- data_long %>%
group_by(chronotyp_tric.1, time) %>%
summarise(
mean = mean(sleep_sufficiency_score, na.rm = TRUE),
sd = sd(sleep_sufficiency_score, na.rm = TRUE),
.groups = "drop"
)
group_stats
## # A tibble: 9 × 4
## chronotyp_tric.1 time mean sd
## <ord> <fct> <dbl> <dbl>
## 1 Morning person recovery1.1 59.9 22.0
## 2 Morning person recovery1.2 61.0 21.3
## 3 Morning person recovery1.3 62.9 20.4
## 4 Neither a morning or evening person recovery1.1 57.8 23.9
## 5 Neither a morning or evening person recovery1.2 60.3 24.0
## 6 Neither a morning or evening person recovery1.3 62.5 20.3
## 7 Evening person recovery1.1 54.5 22.0
## 8 Evening person recovery1.2 55.9 22.1
## 9 Evening person recovery1.3 55.3 23.1
# Gets the x and y values for the normal curve
curve_data <- group_stats %>%
rowwise() %>% # executes per row
mutate(x = list(seq(mean - 4 * sd, mean + 4 * sd, length.out = 100))) %>%
unnest(x) %>%
mutate(y = dnorm(x, mean, sd))
curve_data
## # A tibble: 900 × 6
## chronotyp_tric.1 time mean sd x y
## <ord> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 Morning person recovery1.1 59.9 22.0 -28.0 0.00000609
## 2 Morning person recovery1.1 59.9 22.0 -26.2 0.00000839
## 3 Morning person recovery1.1 59.9 22.0 -24.4 0.0000115
## 4 Morning person recovery1.1 59.9 22.0 -22.6 0.0000156
## 5 Morning person recovery1.1 59.9 22.0 -20.9 0.0000211
## 6 Morning person recovery1.1 59.9 22.0 -19.1 0.0000283
## 7 Morning person recovery1.1 59.9 22.0 -17.3 0.0000377
## 8 Morning person recovery1.1 59.9 22.0 -15.5 0.0000499
## 9 Morning person recovery1.1 59.9 22.0 -13.8 0.0000656
## 10 Morning person recovery1.1 59.9 22.0 -12.0 0.0000858
## # ℹ 890 more rows
ggplot(data_long, aes(x = sleep_sufficiency_score)) +
geom_histogram(aes(y = ..density..), bins = 25, fill = "skyblue", color = "black") +
geom_line(data = curve_data, aes(x = x, y = y), color = "black", size = 1) +
facet_grid(time ~ chronotyp_tric.1, labeller = label_both) +
theme_bw() +
labs(y = "Density", x = "Score") +
coord_cartesian(xlim = c(0, 100))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
A significant two-way interaction indicates that the impact that one factor (e.g., treatment) has on the outcome variable (e.g., self-esteem score) depends on the level of the other factor (e.g., time) (and vice versa). So, you can decompose a significant two-way interaction into:
Simple main effect: run one-way model of the first variable (factor A) at each level of the second variable (factor B),
Simple pairwise comparisons: if the simple main effect is significant, run multiple pairwise comparisons to determine which groups are different.
For a non-significant two-way interaction, you need to determine whether you have any statistically significant main effects from the ANOVA output.
Reference: Repeated Measures ANOVA in R: The Ultimate Guide - Datanovia
If the interaction is not significant, you need to interpret the main effects for each of the two variables: treatment and time. A significant main effect can be followed up with pairwise comparisons.
group_means <- data_long %>%
group_by(chronotyp_tric.1) %>%
summarise(
mean_score = mean(sleep_sufficiency_score, na.rm = TRUE),
n = n()
)
group_means
## # A tibble: 3 × 3
## chronotyp_tric.1 mean_score n
## <ord> <dbl> <int>
## 1 Morning person 61.1 544
## 2 Neither a morning or evening person 59.8 345
## 3 Evening person 55.1 463
data_long %>%
pairwise_t_test(
sleep_sufficiency_score ~ chronotyp_tric.1, paired = FALSE,
p.adjust.method = "bonferroni"
)
## # A tibble: 3 × 9
## .y. group1 group2 n1 n2 p p.signif p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <chr> <dbl> <chr>
## 1 sleep_suffici… Morni… Neith… 544 345 4.18e-1 ns 1 e+0 ns
## 2 sleep_suffici… Morni… Eveni… 544 463 2.28e-5 **** 6.85e-5 ****
## 3 sleep_suffici… Neith… Eveni… 345 463 2.8 e-3 ** 8.4 e-3 **
There is a highly significant difference (p-value < 0.01) in score between the perceived sleep sufficiency of for the morning person and evening chronotypes. This indicates a highly significant difference in perceived sleep sufficiency between morning and evening chronotypes. Individuals identifying as Morning persons reported significantly higher sleep sufficiency than Evening persons. A statistically significant difference also exists between “Neither” and “Evening” chronotypes. This suggests that Evening types perceive their sleep as less sufficient compared to those who do not identify strongly with either morning or evening preferences.
group_means <- data_long %>%
group_by(time) %>%
summarise(
mean_score = mean(sleep_sufficiency_score, na.rm = TRUE),
n = n()
)
group_means
## # A tibble: 3 × 3
## time mean_score n
## <fct> <dbl> <int>
## 1 recovery1.1 57.5 586
## 2 recovery1.2 59.1 407
## 3 recovery1.3 60.3 359
data_filtered %>%
pairwise_t_test(
sleep_sufficiency_score ~ time, paired = TRUE,
p.adjust.method = "bonferroni"
)
## # A tibble: 3 × 10
## .y. group1 group2 n1 n2 statistic df p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 sleep_suff… recov… recov… 332 332 -1.65 331 0.1 0.299 ns
## 2 sleep_suff… recov… recov… 332 332 -3.13 331 0.002 0.006 **
## 3 sleep_suff… recov… recov… 332 332 -1.50 331 0.134 0.402 ns
Based on the pairwise comparisons of perceived sleep sufficiency across the three recovery assessment groups (recovery1.1, recovery1.2, and recovery1.3), only the comparison between recovery1.1 and recovery1.3 showed a statistically significant difference. This suggests that there is a statistically significant difference in perceived sleep sufficiency between the first and third recovery assessments.
The first assessment was taken during the Spring from weeks (18 to 26) (May to June) in 2014.
The second assessment was taken during the Fall from weeks (38 to 51) (September to December) in 2014.
The third assessment was taken during the Winter from weeks (2 to 8) (January to February) in 2015.
This means: There was a statistically significant change in the sleep sufficiency score from the first recovery phase (recovery1.1) to the third recovery phase (recovery1.3).
The test statistic is negative: -3.125111. This indicates that the mean sleep sufficiency score was lower in recovery1.1 than in recovery1.3.
Calendar source for the seasons check: 2014 Seasons