#Intro and Set-Up

knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)

library(tidyverse)
library(readxl)
library(knitr)
library(dplyr)
library(janitor)
library(readr)
library(supernova)

ParticipantInfo <- read_xlsx("~/Desktop/midterm_sleep_exercise.xlsx", sheet = "participant_info_midterm")
SleepData <- read_xlsx("~/Desktop/midterm_sleep_exercise.xlsx", sheet = "sleep_data_midterm")

head(ParticipantInfo)

## # A tibble: 6 × 4
##   ID    Exercise_Group Sex      Age
##   <chr> <chr>          <chr>  <dbl>
## 1 P001  NONE           Male      35
## 2 P002  Nonee          Malee     57
## 3 P003  None           Female    26
## 4 P004  None           Female    29
## 5 P005  None           Male      33
## 6 P006  None           Female    33

head(SleepData)

## # A tibble: 6 × 4
##   ID    Pre_Sleep Post_Sleep Sleep_Efficiency
##   <chr> <chr>          <dbl>            <dbl>
## 1 P001  zzz-5.8          4.7             81.6
## 2 P002  Sleep-6.6        7.4             75.7
## 3 P003  <NA>             6.2             82.9
## 4 P004  SLEEP-7.2        7.3             83.6
## 5 P005  score-7.4        7.4             83.5
## 6 P006  Sleep-6.6        7.1             88.5

Merge and Base Cleaning

NewWalkerData <- ParticipantInfo %>%
  mutate(Exercise_Group = case_when(
    Exercise_Group %in% c("C") ~ "Cardio",
    Exercise_Group %in% c("N", "NONE", "Nonee") ~ "None",
    Exercise_Group %in% c("C+W", "CW") ~ "Cardio+Weights",
    Exercise_Group %in% c("WEIGHTS","WEIGHTSSS","WEIGHTZ") ~ "Weights",
    TRUE ~ Exercise_Group
  ),
  Sex=case_when(
    Sex %in% c("F","Fem","Femalee") ~ "Female",
    Sex %in% c("M","Mal","MALE","Malee") ~ "Male",
    TRUE ~ Sex
  )
   )

MergedWalkerData <- left_join(NewWalkerData,SleepData, by="ID")

view(MergedWalkerData)

Create Derived Variables

MergedWalkerData <- MergedWalkerData %>%
  mutate(
    Pre_Sleep  = str_extract_all(Pre_Sleep, "\\d+\\.?\\d*") %>%
                   sapply(function(x) mean(as.numeric(x))),
    Post_Sleep = str_extract_all(Post_Sleep, "\\d+\\.?\\d*") %>%
                   sapply(function(x) mean(as.numeric(x)))
  )

MergedWalkerData <- MergedWalkerData %>%
  mutate(Sleep_Difference = Post_Sleep - Pre_Sleep)

MergedWalkerData <- MergedWalkerData %>%
  mutate(
    AgeGroup2 = case_when(
      Age < 40 ~ "<40",
      Age >= 40 ~ ">=40",
      TRUE ~ NA_character_   
    )
  )

sum(is.na(MergedWalkerData$Sleep_Difference))

## [1] 14

MergedWalkerData <- MergedWalkerData %>%
  filter(!is.na(Sleep_Difference))

Descriptive Statistics

MergedWalkerData %>%
  summarise(
    Mean_SleepDiff = mean(Sleep_Difference, na.rm = TRUE),
    SD_SleepDiff   = sd(Sleep_Difference, na.rm = TRUE),
    Min_SleepDiff  = min(Sleep_Difference, na.rm = TRUE),
    Max_SleepDiff  = max(Sleep_Difference, na.rm = TRUE),
    
    Mean_SleepEff  = mean(Sleep_Efficiency, na.rm = TRUE),
    SD_SleepEff    = sd(Sleep_Efficiency, na.rm = TRUE),
    Min_SleepEff   = min(Sleep_Efficiency, na.rm = TRUE),
    Max_SleepEff   = max(Sleep_Efficiency, na.rm = TRUE)
  ) %>%
  kable(
    digits = 2,
    caption = "Sleep Statistics (Overall)"
  )

Sleep Statistics (Overall)
Mean_SleepDiff	SD_SleepDiff	Min_SleepDiff	Max_SleepDiff	Mean_SleepEff	SD_SleepEff	Min_SleepEff	Max_SleepEff
0.68	0.66	-1.1	2.1	83.78	5.97	71.7	101.5

MergedWalkerData %>%
  group_by(Exercise_Group) %>%
  summarise(
    Mean_SleepDiff = mean(Sleep_Difference, na.rm = TRUE),
    SD_SleepDiff   = sd(Sleep_Difference, na.rm = TRUE),
    Mean_SleepEff  = mean(Sleep_Efficiency, na.rm = TRUE),
    SD_SleepEff    = sd(Sleep_Efficiency, na.rm = TRUE),
    n = n()
  ) %>%
  kable(
    digits = 2,
    caption = "Sleep Statistics by Exercise Group"
  )

Sleep Statistics by Exercise Group
Exercise_Group	Mean_SleepDiff	SD_SleepDiff	Mean_SleepEff	SD_SleepEff	n
Cardio	1.14	0.49	85.45	5.99	21
Cardio+Weights	0.86	0.38	86.83	5.98	23
None	0.05	0.64	81.07	5.55	21
Weights	0.67	0.61	81.46	4.31	21

Visualizations

ggplot(MergedWalkerData, aes(x = Exercise_Group, y = Sleep_Difference, fill = Exercise_Group)) +
  geom_boxplot() +
  labs(
    title = "Sleep Difference by Exercise Group",
    x = "Exercise Group",
    y = "Sleep Difference (Post - Pre Sleep Hours)"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

ggplot(MergedWalkerData, aes(x = Exercise_Group, y = Sleep_Efficiency, fill = Exercise_Group)) +
  geom_boxplot() +
  labs(
    title = "Sleep Efficiency by Exercise Group",
    x = "Exercise Group",
    y = "Sleep Efficiency (%)"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

ggplot(MergedWalkerData, aes(x = Sleep_Efficiency, y = Sleep_Difference)) +
  geom_point(alpha = 0.7, size = 3, color = "dodgerblue4") +
  geom_smooth(method = "lm", se = TRUE, color = "firebrick") +
  labs(
    title = "Relationship Between Sleep Efficiency and Sleep Difference",
    x = "Sleep Efficiency (%)",
    y = "Sleep Difference (Post - Pre Sleep Hours)"
  ) +
  theme_minimal()

T-Tests

t.test(Sleep_Difference ~ Sex, data = MergedWalkerData)

## 
##  Welch Two Sample t-test
## 
## data:  Sleep_Difference by Sex
## t = 1.5801, df = 77.647, p-value = 0.1182
## alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
## 95 percent confidence interval:
##  -0.05865017  0.50972574
## sample estimates:
## mean in group Female   mean in group Male 
##            0.7795918            0.5540541

t.test(Sleep_Difference ~ AgeGroup2, data = MergedWalkerData)

## 
##  Welch Two Sample t-test
## 
## data:  Sleep_Difference by AgeGroup2
## t = -1.3746, df = 36.662, p-value = 0.1776
## alternative hypothesis: true difference in means between group <40 and group >=40 is not equal to 0
## 95 percent confidence interval:
##  -0.50676303  0.09717936
## sample estimates:
##  mean in group <40 mean in group >=40 
##          0.6373134          0.8421053

The mean sleep difference for females is .78 and the mean sleep difference for males is .55. The p-value is .12, which is not significant, though arguably approaching significance. The mean sleep difference in individuals under 40 is .64 and the mean sleep difference in individuals equal to or over 40 is .84. The p-value is .18, though, so not significant.

ANOVAS

SleepDiffAnova <- aov(Sleep_Difference ~ Exercise_Group, data = MergedWalkerData)
supernova(SleepDiffAnova)

##  Analysis of Variance Table (Type III SS)
##  Model: Sleep_Difference ~ Exercise_Group
## 
##                              SS df    MS      F   PRE     p
##  ----- --------------- | ------ -- ----- ------ ----- -----
##  Model (error reduced) | 13.560  3 4.520 15.717 .3651 .0000
##  Error (from model)    | 23.583 82 0.288                   
##  ----- --------------- | ------ -- ----- ------ ----- -----
##  Total (empty model)   | 37.144 85 0.437

TukeyHSD(SleepDiffAnova)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Sleep_Difference ~ Exercise_Group, data = MergedWalkerData)
## 
## $Exercise_Group
##                              diff        lwr         upr     p adj
## Cardio+Weights-Cardio  -0.2772257 -0.7017134  0.14726203 0.3237562
## None-Cardio            -1.0904762 -1.5245041 -0.65644825 0.0000000
## Weights-Cardio         -0.4714286 -0.9054565 -0.03740063 0.0278779
## None-Cardio+Weights    -0.8132505 -1.2377382 -0.38876282 0.0000171
## Weights-Cardio+Weights -0.1942029 -0.6186906  0.23028480 0.6287294
## Weights-None            0.6190476  0.1850197  1.05307556 0.0018927

SleepEffAnova <- aov(Sleep_Efficiency ~ Exercise_Group, data = MergedWalkerData)
supernova(SleepEffAnova)

##  Analysis of Variance Table (Type III SS)
##  Model: Sleep_Efficiency ~ Exercise_Group
## 
##                                SS df      MS     F   PRE     p
##  ----- --------------- | -------- -- ------- ----- ----- -----
##  Model (error reduced) |  540.400  3 180.133 5.925 .1782 .0010
##  Error (from model)    | 2492.939 82  30.402                  
##  ----- --------------- | -------- -- ------- ----- ----- -----
##  Total (empty model)   | 3033.339 85  35.686

TukeyHSD(SleepEffAnova)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Sleep_Efficiency ~ Exercise_Group, data = MergedWalkerData)
## 
## $Exercise_Group
##                              diff        lwr         upr     p adj
## Cardio+Weights-Cardio   1.3871636  -2.977172  5.75149915 0.8383629
## None-Cardio            -4.3761905  -8.838613  0.08623232 0.0566544
## Weights-Cardio         -3.9904762  -8.452899  0.47194661 0.0962888
## None-Cardio+Weights    -5.7633540 -10.127690 -1.39901844 0.0046379
## Weights-Cardio+Weights -5.3776398  -9.741975 -1.01330416 0.0094267
## Weights-None            0.3857143  -4.076709  4.84813708 0.9958617

For the overall sleep difference ANOVA, F(3, 82) = 15.72, p < .001, PRE = .37. This means that independent variable (exercise) has a significant impact on sleep difference and explains around 37% of the variation in sleep difference. This is a very large effect size. Moreover, all of the comparisons are significant, other than “Cardio + Weights vs. Cardio” and “Weights vs. Cardio and Weights.” The implication of this data is that cardio is the most impactful, though weights are still far more impactful than nothing.

For the overall sleep efficiency ANOVA, F(3, 82) = 5.93, p = .001, PRE = .18. This means that the independent variable (exercise) had a significant impact on sleep efficiency and explains around 18% of the variation in sleep efficiency. This is a relatively large effect size. That being said, the only two significant group comparisons were “None vs. Cardio + Weights” and “Weights vs. Cardio + Weights.” “None vs Cardio” was very close to being significant, though, (p=.06) and “Weights vs. Cardio” was marginal (P=.09).

Synthesis & Recommmendation

If I had to recommend one exercise regime, I would recommend “Cardio and Weights.” It is the only combinations that is significantly better than nothing in regards to both sleep efficiency and duration. Moreoever, “Cardio and Weights” outperforms “Weights’ in regards to sleep efficiency. Taken together, this is the most effective regime. That being said, the evidence suggests that any type of exercise is beneficial in regards to sleep. So, I would recommend (practically speaking) that individuals engage with whatever type of exercise they are most able to incorporate routines, even if”Cardio and Weights” together is not a realistic goal for them.

Reflection

To me, the most challenging aspect of this was cleaning the data. At first, I had trouble making the pre-sleep and post-sleep data numeric without inadvertently erasing the data. I had to find some code I had never seen before online in order to extract the numbers and resolve this issue. Overall, though, I feel confident about the steps I needed to take and how to trouble shoot the issues I encountered. Even though I found the midterm easier, I felt that it was a very good foundation for tackling the slightly more complex issues in this assignment. Honestly, if I were to “redo this analysis” in the future, I would probably try to learn more about the sleep research beforehand; I don’t necessarily feel that I know enough about what “sleep efficiency” is or the practical effects of losing/gaining a small amount of sleep to make particularly meaningful recommendations based on the data. The fact that the results in this experiment were significant statistically does not mean that they are significant practically–and, in general, I feel that it’s very important to have a more holistic understanding of an experiment, in addition to being able to analyze the objective data accurately.

Midterm Assignment

Jonah Dratfield

2025-10-17