#Intro and Set-Up
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)
library(tidyverse)
library(readxl)
library(knitr)
library(dplyr)
library(janitor)
library(readr)
library(supernova)
ParticipantInfo <- read_xlsx("~/Desktop/midterm_sleep_exercise.xlsx", sheet = "participant_info_midterm")
SleepData <- read_xlsx("~/Desktop/midterm_sleep_exercise.xlsx", sheet = "sleep_data_midterm")
head(ParticipantInfo)
## # A tibble: 6 × 4
## ID Exercise_Group Sex Age
## <chr> <chr> <chr> <dbl>
## 1 P001 NONE Male 35
## 2 P002 Nonee Malee 57
## 3 P003 None Female 26
## 4 P004 None Female 29
## 5 P005 None Male 33
## 6 P006 None Female 33
head(SleepData)
## # A tibble: 6 × 4
## ID Pre_Sleep Post_Sleep Sleep_Efficiency
## <chr> <chr> <dbl> <dbl>
## 1 P001 zzz-5.8 4.7 81.6
## 2 P002 Sleep-6.6 7.4 75.7
## 3 P003 <NA> 6.2 82.9
## 4 P004 SLEEP-7.2 7.3 83.6
## 5 P005 score-7.4 7.4 83.5
## 6 P006 Sleep-6.6 7.1 88.5
NewWalkerData <- ParticipantInfo %>%
mutate(Exercise_Group = case_when(
Exercise_Group %in% c("C") ~ "Cardio",
Exercise_Group %in% c("N", "NONE", "Nonee") ~ "None",
Exercise_Group %in% c("C+W", "CW") ~ "Cardio+Weights",
Exercise_Group %in% c("WEIGHTS","WEIGHTSSS","WEIGHTZ") ~ "Weights",
TRUE ~ Exercise_Group
),
Sex=case_when(
Sex %in% c("F","Fem","Femalee") ~ "Female",
Sex %in% c("M","Mal","MALE","Malee") ~ "Male",
TRUE ~ Sex
)
)
MergedWalkerData <- left_join(NewWalkerData,SleepData, by="ID")
view(MergedWalkerData)
MergedWalkerData <- MergedWalkerData %>%
mutate(
Pre_Sleep = str_extract_all(Pre_Sleep, "\\d+\\.?\\d*") %>%
sapply(function(x) mean(as.numeric(x))),
Post_Sleep = str_extract_all(Post_Sleep, "\\d+\\.?\\d*") %>%
sapply(function(x) mean(as.numeric(x)))
)
MergedWalkerData <- MergedWalkerData %>%
mutate(Sleep_Difference = Post_Sleep - Pre_Sleep)
MergedWalkerData <- MergedWalkerData %>%
mutate(
AgeGroup2 = case_when(
Age < 40 ~ "<40",
Age >= 40 ~ ">=40",
TRUE ~ NA_character_
)
)
sum(is.na(MergedWalkerData$Sleep_Difference))
## [1] 14
MergedWalkerData <- MergedWalkerData %>%
filter(!is.na(Sleep_Difference))
MergedWalkerData %>%
summarise(
Mean_SleepDiff = mean(Sleep_Difference, na.rm = TRUE),
SD_SleepDiff = sd(Sleep_Difference, na.rm = TRUE),
Min_SleepDiff = min(Sleep_Difference, na.rm = TRUE),
Max_SleepDiff = max(Sleep_Difference, na.rm = TRUE),
Mean_SleepEff = mean(Sleep_Efficiency, na.rm = TRUE),
SD_SleepEff = sd(Sleep_Efficiency, na.rm = TRUE),
Min_SleepEff = min(Sleep_Efficiency, na.rm = TRUE),
Max_SleepEff = max(Sleep_Efficiency, na.rm = TRUE)
) %>%
kable(
digits = 2,
caption = "Sleep Statistics (Overall)"
)
Mean_SleepDiff | SD_SleepDiff | Min_SleepDiff | Max_SleepDiff | Mean_SleepEff | SD_SleepEff | Min_SleepEff | Max_SleepEff |
---|---|---|---|---|---|---|---|
0.68 | 0.66 | -1.1 | 2.1 | 83.78 | 5.97 | 71.7 | 101.5 |
MergedWalkerData %>%
group_by(Exercise_Group) %>%
summarise(
Mean_SleepDiff = mean(Sleep_Difference, na.rm = TRUE),
SD_SleepDiff = sd(Sleep_Difference, na.rm = TRUE),
Mean_SleepEff = mean(Sleep_Efficiency, na.rm = TRUE),
SD_SleepEff = sd(Sleep_Efficiency, na.rm = TRUE),
n = n()
) %>%
kable(
digits = 2,
caption = "Sleep Statistics by Exercise Group"
)
Exercise_Group | Mean_SleepDiff | SD_SleepDiff | Mean_SleepEff | SD_SleepEff | n |
---|---|---|---|---|---|
Cardio | 1.14 | 0.49 | 85.45 | 5.99 | 21 |
Cardio+Weights | 0.86 | 0.38 | 86.83 | 5.98 | 23 |
None | 0.05 | 0.64 | 81.07 | 5.55 | 21 |
Weights | 0.67 | 0.61 | 81.46 | 4.31 | 21 |
ggplot(MergedWalkerData, aes(x = Exercise_Group, y = Sleep_Difference, fill = Exercise_Group)) +
geom_boxplot() +
labs(
title = "Sleep Difference by Exercise Group",
x = "Exercise Group",
y = "Sleep Difference (Post - Pre Sleep Hours)"
) +
theme_minimal() +
theme(legend.position = "none")
ggplot(MergedWalkerData, aes(x = Exercise_Group, y = Sleep_Efficiency, fill = Exercise_Group)) +
geom_boxplot() +
labs(
title = "Sleep Efficiency by Exercise Group",
x = "Exercise Group",
y = "Sleep Efficiency (%)"
) +
theme_minimal() +
theme(legend.position = "none")
ggplot(MergedWalkerData, aes(x = Sleep_Efficiency, y = Sleep_Difference)) +
geom_point(alpha = 0.7, size = 3, color = "dodgerblue4") +
geom_smooth(method = "lm", se = TRUE, color = "firebrick") +
labs(
title = "Relationship Between Sleep Efficiency and Sleep Difference",
x = "Sleep Efficiency (%)",
y = "Sleep Difference (Post - Pre Sleep Hours)"
) +
theme_minimal()
t.test(Sleep_Difference ~ Sex, data = MergedWalkerData)
##
## Welch Two Sample t-test
##
## data: Sleep_Difference by Sex
## t = 1.5801, df = 77.647, p-value = 0.1182
## alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
## 95 percent confidence interval:
## -0.05865017 0.50972574
## sample estimates:
## mean in group Female mean in group Male
## 0.7795918 0.5540541
t.test(Sleep_Difference ~ AgeGroup2, data = MergedWalkerData)
##
## Welch Two Sample t-test
##
## data: Sleep_Difference by AgeGroup2
## t = -1.3746, df = 36.662, p-value = 0.1776
## alternative hypothesis: true difference in means between group <40 and group >=40 is not equal to 0
## 95 percent confidence interval:
## -0.50676303 0.09717936
## sample estimates:
## mean in group <40 mean in group >=40
## 0.6373134 0.8421053
The mean sleep difference for females is .78 and the mean sleep difference for males is .55. The p-value is .12, which is not significant, though arguably approaching significance. The mean sleep difference in individuals under 40 is .64 and the mean sleep difference in individuals equal to or over 40 is .84. The p-value is .18, though, so not significant.
SleepDiffAnova <- aov(Sleep_Difference ~ Exercise_Group, data = MergedWalkerData)
supernova(SleepDiffAnova)
## Analysis of Variance Table (Type III SS)
## Model: Sleep_Difference ~ Exercise_Group
##
## SS df MS F PRE p
## ----- --------------- | ------ -- ----- ------ ----- -----
## Model (error reduced) | 13.560 3 4.520 15.717 .3651 .0000
## Error (from model) | 23.583 82 0.288
## ----- --------------- | ------ -- ----- ------ ----- -----
## Total (empty model) | 37.144 85 0.437
TukeyHSD(SleepDiffAnova)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Sleep_Difference ~ Exercise_Group, data = MergedWalkerData)
##
## $Exercise_Group
## diff lwr upr p adj
## Cardio+Weights-Cardio -0.2772257 -0.7017134 0.14726203 0.3237562
## None-Cardio -1.0904762 -1.5245041 -0.65644825 0.0000000
## Weights-Cardio -0.4714286 -0.9054565 -0.03740063 0.0278779
## None-Cardio+Weights -0.8132505 -1.2377382 -0.38876282 0.0000171
## Weights-Cardio+Weights -0.1942029 -0.6186906 0.23028480 0.6287294
## Weights-None 0.6190476 0.1850197 1.05307556 0.0018927
SleepEffAnova <- aov(Sleep_Efficiency ~ Exercise_Group, data = MergedWalkerData)
supernova(SleepEffAnova)
## Analysis of Variance Table (Type III SS)
## Model: Sleep_Efficiency ~ Exercise_Group
##
## SS df MS F PRE p
## ----- --------------- | -------- -- ------- ----- ----- -----
## Model (error reduced) | 540.400 3 180.133 5.925 .1782 .0010
## Error (from model) | 2492.939 82 30.402
## ----- --------------- | -------- -- ------- ----- ----- -----
## Total (empty model) | 3033.339 85 35.686
TukeyHSD(SleepEffAnova)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Sleep_Efficiency ~ Exercise_Group, data = MergedWalkerData)
##
## $Exercise_Group
## diff lwr upr p adj
## Cardio+Weights-Cardio 1.3871636 -2.977172 5.75149915 0.8383629
## None-Cardio -4.3761905 -8.838613 0.08623232 0.0566544
## Weights-Cardio -3.9904762 -8.452899 0.47194661 0.0962888
## None-Cardio+Weights -5.7633540 -10.127690 -1.39901844 0.0046379
## Weights-Cardio+Weights -5.3776398 -9.741975 -1.01330416 0.0094267
## Weights-None 0.3857143 -4.076709 4.84813708 0.9958617
For the overall sleep difference ANOVA, F(3, 82) = 15.72, p < .001, PRE = .37. This means that independent variable (exercise) has a significant impact on sleep difference and explains around 37% of the variation in sleep difference. This is a very large effect size. Moreover, all of the comparisons are significant, other than “Cardio + Weights vs. Cardio” and “Weights vs. Cardio and Weights.” The implication of this data is that cardio is the most impactful, though weights are still far more impactful than nothing.
For the overall sleep efficiency ANOVA, F(3, 82) = 5.93, p = .001, PRE = .18. This means that the independent variable (exercise) had a significant impact on sleep efficiency and explains around 18% of the variation in sleep efficiency. This is a relatively large effect size. That being said, the only two significant group comparisons were “None vs. Cardio + Weights” and “Weights vs. Cardio + Weights.” “None vs Cardio” was very close to being significant, though, (p=.06) and “Weights vs. Cardio” was marginal (P=.09).
If I had to recommend one exercise regime, I would recommend “Cardio and Weights.” It is the only combinations that is significantly better than nothing in regards to both sleep efficiency and duration. Moreoever, “Cardio and Weights” outperforms “Weights’ in regards to sleep efficiency. Taken together, this is the most effective regime. That being said, the evidence suggests that any type of exercise is beneficial in regards to sleep. So, I would recommend (practically speaking) that individuals engage with whatever type of exercise they are most able to incorporate routines, even if”Cardio and Weights” together is not a realistic goal for them.
To me, the most challenging aspect of this was cleaning the data. At first, I had trouble making the pre-sleep and post-sleep data numeric without inadvertently erasing the data. I had to find some code I had never seen before online in order to extract the numbers and resolve this issue. Overall, though, I feel confident about the steps I needed to take and how to trouble shoot the issues I encountered. Even though I found the midterm easier, I felt that it was a very good foundation for tackling the slightly more complex issues in this assignment. Honestly, if I were to “redo this analysis” in the future, I would probably try to learn more about the sleep research beforehand; I don’t necessarily feel that I know enough about what “sleep efficiency” is or the practical effects of losing/gaining a small amount of sleep to make particularly meaningful recommendations based on the data. The fact that the results in this experiment were significant statistically does not mean that they are significant practically–and, in general, I feel that it’s very important to have a more holistic understanding of an experiment, in addition to being able to analyze the objective data accurately.