knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(readxl)
library(janitor)
library(rstatix)
library(ggplot2)
library(supernova)
library(emmeans)
library(knitr)
library(kableExtra)
# Import data
excel_file <- "midterm_sleep_exercise.xlsx"
sheets <- excel_sheets(excel_file)
sheets
## [1] "participant_info_midterm" "sleep_data_midterm"
participants_info_midterm <- read_excel(excel_file, sheet = "participant_info_midterm") %>% clean_names()
sleep_data_midterm <- read_excel(excel_file, sheet = "sleep_data_midterm") %>% clean_names()
glimpse(participants_info_midterm)
## Rows: 100
## Columns: 4
## $ id <chr> "P001", "P002", "P003", "P004", "P005", "P006", "P007",…
## $ exercise_group <chr> "NONE", "Nonee", "None", "None", "None", "None", "None"…
## $ sex <chr> "Male", "Malee", "Female", "Female", "Male", "Female", …
## $ age <dbl> 35, 57, 26, 29, 33, 33, 32, 30, 37, 28, 30, 20, 42, 31,…
glimpse(sleep_data_midterm)
## Rows: 100
## Columns: 4
## $ id <chr> "P001", "P002", "P003", "P004", "P005", "P006", "P007…
## $ pre_sleep <chr> "zzz-5.8", "Sleep-6.6", NA, "SLEEP-7.2", "score-7.4",…
## $ post_sleep <dbl> 4.7, 7.4, 6.2, 7.3, 7.4, 7.1, 6.7, 9.0, 5.1, 6.3, 6.2…
## $ sleep_efficiency <dbl> 81.6, 75.7, 82.9, 83.6, 83.5, 88.5, 83.6, 73.4, 88.2,…
colnames(participants_info_midterm)
## [1] "id" "exercise_group" "sex" "age"
colnames(sleep_data_midterm)
## [1] "id" "pre_sleep" "post_sleep" "sleep_efficiency"
participants_info_midterm <- participants_info_midterm %>%
rename(participant_id = id) %>% # rename ID column
mutate(
sex = case_when(
tolower(sex) %in% c("m", "male") ~ "Male",
tolower(sex) %in% c("f", "female") ~ "Female",
TRUE ~ str_to_title(sex)
),
exercise_group = case_when(
str_to_lower(exercise_group) %in% c("aerobic", "cardio") ~ "Aerobic",
str_to_lower(exercise_group) %in% c("resistance", "strength", "weights") ~ "Resistance",
str_to_lower(exercise_group) %in% c("no_exercise", "control", "none") ~ "Control",
TRUE ~ str_to_title(exercise_group)
)
)
sleep_data_midterm <- sleep_data_midterm %>%
rename(participant_id = id)
sleep_merged <- left_join(participants_info_midterm, sleep_data_midterm, by = "participant_id")
glimpse(sleep_merged)
## Rows: 100
## Columns: 7
## $ participant_id <chr> "P001", "P002", "P003", "P004", "P005", "P006", "P007…
## $ exercise_group <chr> "Control", "Nonee", "Control", "Control", "Control", …
## $ sex <chr> "Male", "Malee", "Female", "Female", "Male", "Female"…
## $ age <dbl> 35, 57, 26, 29, 33, 33, 32, 30, 37, 28, 30, 20, 42, 3…
## $ pre_sleep <chr> "zzz-5.8", "Sleep-6.6", NA, "SLEEP-7.2", "score-7.4",…
## $ post_sleep <dbl> 4.7, 7.4, 6.2, 7.3, 7.4, 7.1, 6.7, 9.0, 5.1, 6.3, 6.2…
## $ sleep_efficiency <dbl> 81.6, 75.7, 82.9, 83.6, 83.5, 88.5, 83.6, 73.4, 88.2,…
sleep_merged <- sleep_merged %>%
mutate(
pre_sleep_num = as.numeric(str_extract(as.character(pre_sleep), "[0-9]+\\.?[0-9]*")),
post_sleep_num = as.numeric(str_extract(as.character(post_sleep), "[0-9]+\\.?[0-9]*")),
sleep_difference = post_sleep_num - pre_sleep_num,
agegroup2 = case_when(
age < 40 ~ "Under40",
age >= 40 ~ "40plus",
TRUE ~ NA_character_
)
)
sum(is.na(sleep_merged$sleep_difference))
## [1] 14
sleep_merged <- sleep_merged %>% drop_na(sleep_difference)
glimpse(sleep_merged)
## Rows: 86
## Columns: 11
## $ participant_id <chr> "P001", "P002", "P004", "P005", "P006", "P007", "P008…
## $ exercise_group <chr> "Control", "Nonee", "Control", "Control", "Control", …
## $ sex <chr> "Male", "Malee", "Female", "Male", "Female", "Male", …
## $ age <dbl> 35, 57, 29, 33, 33, 32, 30, 37, 28, 30, 20, 42, 33, 2…
## $ pre_sleep <chr> "zzz-5.8", "Sleep-6.6", "SLEEP-7.2", "score-7.4", "Sl…
## $ post_sleep <dbl> 4.7, 7.4, 7.3, 7.4, 7.1, 6.7, 9.0, 5.1, 6.3, 6.2, 4.6…
## $ sleep_efficiency <dbl> 81.6, 75.7, 83.6, 83.5, 88.5, 83.6, 73.4, 88.2, 80.4,…
## $ pre_sleep_num <dbl> 5.8, 6.6, 7.2, 7.4, 6.6, 6.0, 8.1, 5.5, 5.7, 7.0, 5.5…
## $ post_sleep_num <dbl> 4.7, 7.4, 7.3, 7.4, 7.1, 6.7, 9.0, 5.1, 6.3, 6.2, 4.6…
## $ sleep_difference <dbl> -1.1, 0.8, 0.1, 0.0, 0.5, 0.7, 0.9, -0.4, 0.6, -0.8, …
## $ agegroup2 <chr> "Under40", "40plus", "Under40", "Under40", "Under40",…
desc_overall <- sleep_merged %>%
summarise(
mean_diff = mean(sleep_difference, na.rm = TRUE),
sd_diff = sd(sleep_difference, na.rm = TRUE),
min_diff = min(sleep_difference, na.rm = TRUE),
max_diff = max(sleep_difference, na.rm = TRUE),
mean_eff = mean(sleep_efficiency, na.rm = TRUE),
sd_eff = sd(sleep_efficiency, na.rm = TRUE),
min_eff = min(sleep_efficiency, na.rm = TRUE),
max_eff = max(sleep_efficiency, na.rm = TRUE)
)
kable(desc_overall, caption = "Overall Descriptive Statistics") %>%
kable_styling(full_width = F)
| mean_diff | sd_diff | min_diff | max_diff | mean_eff | sd_eff | min_eff | max_eff |
|---|---|---|---|---|---|---|---|
| 0.6825581 | 0.6610494 | -1.1 | 2.1 | 83.77558 | 5.973804 | 71.7 | 101.5 |
desc_group <- sleep_merged %>%
group_by(exercise_group) %>%
summarise(
mean_diff = mean(sleep_difference, na.rm = TRUE),
sd_diff = sd(sleep_difference, na.rm = TRUE),
mean_eff = mean(sleep_efficiency, na.rm = TRUE),
sd_eff = sd(sleep_efficiency, na.rm = TRUE)
)
kable(desc_group, caption = "Descriptive Statistics by Exercise Group") %>%
kable_styling(full_width = F)
| exercise_group | mean_diff | sd_diff | mean_eff | sd_eff |
|---|---|---|---|---|
| Aerobic | 1.1400000 | 0.4977846 | 85.42000 | 6.1459101 |
| C | 1.1000000 | NA | 86.00000 | NA |
| C+W | 1.1000000 | 0.1414214 | 90.05000 | 5.3033009 |
| Cardio+Weights | 0.8250000 | 0.3971941 | 86.32500 | 6.1500428 |
| Control | -0.0222222 | 0.6254933 | 81.34444 | 5.8711881 |
| Cw | 1.1000000 | NA | 90.60000 | NA |
| N | 0.3000000 | 0.8485281 | 81.30000 | 0.2828427 |
| Nonee | 0.8000000 | NA | 75.70000 | NA |
| Resistance | 0.7157895 | 0.6238440 | 81.31053 | 4.4438340 |
| Weightsss | 0.1000000 | NA | 85.30000 | NA |
| Weightz | 0.3000000 | NA | 80.40000 | NA |
# Boxplot: Sleep Difference
ggplot(sleep_merged, aes(x = exercise_group, y = sleep_difference)) +
geom_boxplot(fill = "skyblue") +
labs(title = "Sleep Difference by Exercise Group",
x = "Exercise Group",
y = "Change in Sleep Duration (hrs)") +
theme_minimal(base_size = 13)
# Boxplot: Sleep Efficiency
ggplot(sleep_merged, aes(x = exercise_group, y = sleep_efficiency)) +
geom_boxplot(fill = "tan") +
labs(title = "Sleep Efficiency by Exercise Group",
x = "Exercise Group",
y = "Sleep Efficiency (%)") +
theme_minimal(base_size = 13)
# Scatterplot: Sleep Difference vs Sleep Efficiency
ggplot(sleep_merged, aes(x = sleep_efficiency, y = sleep_difference, color = exercise_group)) +
geom_point(size = 2, alpha = 0.7) +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "Relationship Between Sleep Efficiency and Sleep Difference",
x = "Sleep Efficiency (%)",
y = "Sleep Difference (hrs)") +
theme_minimal(base_size = 13)
## `geom_smooth()` using formula = 'y ~ x'
sleep_merged <- sleep_merged %>%
mutate(
sex = case_when(
str_detect(str_to_lower(sex), "^m") ~ "Male",
str_detect(str_to_lower(sex), "^f") ~ "Female",
TRUE ~ NA_character_
)
)
sleep_merged <- sleep_merged %>% drop_na(sex)
table(sleep_merged$sex)
##
## Female Male
## 49 37
t_sex <- sleep_merged %>% t_test(sleep_difference ~ sex)
t_age <- sleep_merged %>% t_test(sleep_difference ~ agegroup2)
t_sex
## # A tibble: 1 × 8
## .y. group1 group2 n1 n2 statistic df p
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 sleep_difference Female Male 49 37 1.58 77.6 0.118
t_age
## # A tibble: 1 × 8
## .y. group1 group2 n1 n2 statistic df p
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 sleep_difference 40plus Under40 19 67 1.37 36.7 0.178
# ANOVA: Sleep Difference ~ Exercise Group
anova_diff <- aov(sleep_difference ~ exercise_group, data = sleep_merged)
summary(anova_diff)
## Df Sum Sq Mean Sq F value Pr(>F)
## exercise_group 10 15.04 1.5042 5.104 1.18e-05 ***
## Residuals 75 22.10 0.2947
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
supernova(anova_diff) # effect size (Partial Eta Squared)
## Analysis of Variance Table (Type III SS)
## Model: sleep_difference ~ exercise_group
##
## SS df MS F PRE p
## ----- --------------- | ------ -- ----- ----- ----- -----
## Model (error reduced) | 15.042 10 1.504 5.104 .4050 .0000
## Error (from model) | 22.102 75 0.295
## ----- --------------- | ------ -- ----- ----- ----- -----
## Total (empty model) | 37.144 85 0.437
# Tukey post-hoc
tukey_diff <- TukeyHSD(anova_diff)
tukey_diff
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = sleep_difference ~ exercise_group, data = sleep_merged)
##
## $exercise_group
## diff lwr upr p adj
## C-Aerobic -4.000000e-02 -1.8868172 1.8068172 1.0000000
## C+W-Aerobic -4.000000e-02 -1.3766282 1.2966282 1.0000000
## Cardio+Weights-Aerobic -3.150000e-01 -0.8849402 0.2549402 0.7556344
## Control-Aerobic -1.162222e+00 -1.7477801 -0.5766644 0.0000003
## Cw-Aerobic -4.000000e-02 -1.8868172 1.8068172 1.0000000
## N-Aerobic -8.400000e-01 -2.1766282 0.4966282 0.5903541
## Nonee-Aerobic -3.400000e-01 -2.1868172 1.5068172 0.9999323
## Resistance-Aerobic -4.242105e-01 -1.0016012 0.1531802 0.3568154
## Weightsss-Aerobic -1.040000e+00 -2.8868172 0.8068172 0.7344003
## Weightz-Aerobic -8.400000e-01 -2.6868172 1.0068172 0.9118975
## C+W-C 6.661338e-16 -2.2073688 2.2073688 1.0000000
## Cardio+Weights-C -2.750000e-01 -2.1218172 1.5718172 0.9999907
## Control-C -1.122222e+00 -2.9739187 0.7294743 0.6412635
## Cw-C 8.881784e-16 -2.5488499 2.5488499 1.0000000
## N-C -8.000000e-01 -3.0073688 1.4073688 0.9802381
## Nonee-C -3.000000e-01 -2.8488499 2.2488499 0.9999990
## Resistance-C -3.842105e-01 -2.2333406 1.4649195 0.9997965
## Weightsss-C -1.000000e+00 -3.5488499 1.5488499 0.9655909
## Weightz-C -8.000000e-01 -3.3488499 1.7488499 0.9932982
## Cardio+Weights-C+W -2.750000e-01 -1.6116282 1.0616282 0.9998138
## Control-C+W -1.122222e+00 -2.4655841 0.2211396 0.1887353
## Cw-C+W 2.220446e-16 -2.2073688 2.2073688 1.0000000
## N-C+W -8.000000e-01 -2.6023090 1.0023090 0.9238680
## Nonee-C+W -3.000000e-01 -2.5073688 1.9073688 0.9999961
## Resistance-C+W -3.842105e-01 -1.7240325 0.9556115 0.9967429
## Weightsss-C+W -1.000000e+00 -3.2073688 1.2073688 0.9139514
## Weightz-C+W -8.000000e-01 -3.0073688 1.4073688 0.9802381
## Control-Cardio+Weights -8.472222e-01 -1.4327801 -0.2616644 0.0003859
## Cw-Cardio+Weights 2.750000e-01 -1.5718172 2.1218172 0.9999907
## N-Cardio+Weights -5.250000e-01 -1.8616282 0.8116282 0.9653250
## Nonee-Cardio+Weights -2.500000e-02 -1.8718172 1.8218172 1.0000000
## Resistance-Cardio+Weights -1.092105e-01 -0.6866012 0.4681802 0.9999132
## Weightsss-Cardio+Weights -7.250000e-01 -2.5718172 1.1218172 0.9654518
## Weightz-Cardio+Weights -5.250000e-01 -2.3718172 1.3218172 0.9969667
## Cw-Control 1.122222e+00 -0.7294743 2.9739187 0.6412635
## N-Control 3.222222e-01 -1.0211396 1.6655841 0.9992763
## Nonee-Control 8.222222e-01 -1.0294743 2.6739187 0.9236991
## Resistance-Control 7.380117e-01 0.1451996 1.3308238 0.0041643
## Weightsss-Control 1.222222e-01 -1.7294743 1.9739187 1.0000000
## Weightz-Control 3.222222e-01 -1.5294743 2.1739187 0.9999598
## N-Cw -8.000000e-01 -3.0073688 1.4073688 0.9802381
## Nonee-Cw -3.000000e-01 -2.8488499 2.2488499 0.9999990
## Resistance-Cw -3.842105e-01 -2.2333406 1.4649195 0.9997965
## Weightsss-Cw -1.000000e+00 -3.5488499 1.5488499 0.9655909
## Weightz-Cw -8.000000e-01 -3.3488499 1.7488499 0.9932982
## Nonee-N 5.000000e-01 -1.7073688 2.7073688 0.9995612
## Resistance-N 4.157895e-01 -0.9240325 1.7556115 0.9938679
## Weightsss-N -2.000000e-01 -2.4073688 2.0073688 0.9999999
## Weightz-N -6.661338e-16 -2.2073688 2.2073688 1.0000000
## Resistance-Nonee -8.421053e-02 -1.9333406 1.7649195 1.0000000
## Weightsss-Nonee -7.000000e-01 -3.2488499 1.8488499 0.9977168
## Weightz-Nonee -5.000000e-01 -3.0488499 2.0488499 0.9998790
## Weightsss-Resistance -6.157895e-01 -2.4649195 1.2333406 0.9894254
## Weightz-Resistance -4.157895e-01 -2.2649195 1.4333406 0.9995887
## Weightz-Weightsss 2.000000e-01 -2.3488499 2.7488499 1.0000000
# ANOVA: Sleep Efficiency ~ Exercise Group
anova_eff <- aov(sleep_efficiency ~ exercise_group, data = sleep_merged)
summary(anova_eff)
## Df Sum Sq Mean Sq F value Pr(>F)
## exercise_group 10 627.4 62.74 1.956 0.0505 .
## Residuals 75 2406.0 32.08
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
supernova(anova_eff) # effect size (Partial Eta Squared)
## Analysis of Variance Table (Type III SS)
## Model: sleep_efficiency ~ exercise_group
##
## SS df MS F PRE p
## ----- --------------- | -------- -- ------ ----- ----- -----
## Model (error reduced) | 627.362 10 62.736 1.956 .2068 .0505
## Error (from model) | 2405.977 75 32.080
## ----- --------------- | -------- -- ------ ----- ----- -----
## Total (empty model) | 3033.339 85 35.686
# Tukey post-hoc
tukey_eff <- TukeyHSD(anova_eff)
tukey_eff
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = sleep_efficiency ~ exercise_group, data = sleep_merged)
##
## $exercise_group
## diff lwr upr p adj
## C-Aerobic 0.58000000 -18.688818 19.848818 1.0000000
## C+W-Aerobic 4.63000000 -9.315747 18.575747 0.9896674
## Cardio+Weights-Aerobic 0.90500000 -5.041486 6.851486 0.9999885
## Control-Aerobic -4.07555556 -10.184990 2.033878 0.5020226
## Cw-Aerobic 5.18000000 -14.088818 24.448818 0.9980888
## N-Aerobic -4.12000000 -18.065747 9.825747 0.9958546
## Nonee-Aerobic -9.72000000 -28.988818 9.548818 0.8433329
## Resistance-Aerobic -4.10947368 -10.133695 1.914748 0.4681875
## Weightsss-Aerobic -0.12000000 -19.388818 19.148818 1.0000000
## Weightz-Aerobic -5.02000000 -24.288818 14.248818 0.9985317
## C+W-C 4.05000000 -18.980643 27.080643 0.9999557
## Cardio+Weights-C 0.32500000 -18.943818 19.593818 1.0000000
## Control-C -4.65555556 -23.975282 14.664171 0.9992467
## Cw-C 4.60000000 -21.993496 31.193496 0.9999619
## N-C -4.70000000 -27.730643 18.330643 0.9998269
## Nonee-C -10.30000000 -36.893496 16.293496 0.9684849
## Resistance-C -4.68947368 -23.982423 14.603476 0.9991883
## Weightsss-C -0.70000000 -27.293496 25.893496 1.0000000
## Weightz-C -5.60000000 -32.193496 20.993496 0.9997705
## Cardio+Weights-C+W -3.72500000 -17.670747 10.220747 0.9981888
## Control-C+W -8.70555556 -22.721558 5.310447 0.6071089
## Cw-C+W 0.55000000 -22.480643 23.580643 1.0000000
## N-C+W -8.75000000 -27.554441 10.054441 0.8993773
## Nonee-C+W -14.35000000 -37.380643 8.680643 0.6026187
## Resistance-C+W -8.73947368 -22.718544 5.239596 0.5978212
## Weightsss-C+W -4.75000000 -27.780643 18.280643 0.9998096
## Weightz-C+W -9.65000000 -32.680643 13.380643 0.9468759
## Control-Cardio+Weights -4.98055556 -11.089990 1.128878 0.2169107
## Cw-Cardio+Weights 4.27500000 -14.993818 23.543818 0.9996347
## N-Cardio+Weights -5.02500000 -18.970747 8.920747 0.9810476
## Nonee-Cardio+Weights -10.62500000 -29.893818 8.643818 0.7581905
## Resistance-Cardio+Weights -5.01447368 -11.038695 1.009748 0.1927494
## Weightsss-Cardio+Weights -1.02500000 -20.293818 18.243818 1.0000000
## Weightz-Cardio+Weights -5.92500000 -25.193818 13.343818 0.9942966
## Cw-Control 9.25555556 -10.064171 28.575282 0.8813325
## N-Control -0.04444444 -14.060447 13.971558 1.0000000
## Nonee-Control -5.64444444 -24.964171 13.675282 0.9962106
## Resistance-Control -0.03391813 -6.219040 6.151203 1.0000000
## Weightsss-Control 3.95555556 -15.364171 23.275282 0.9998218
## Weightz-Control -0.94444444 -20.264171 18.375282 1.0000000
## N-Cw -9.30000000 -32.330643 13.730643 0.9582503
## Nonee-Cw -14.90000000 -41.493496 11.693496 0.7402504
## Resistance-Cw -9.28947368 -28.582423 10.003476 0.8779657
## Weightsss-Cw -5.30000000 -31.893496 21.293496 0.9998603
## Weightz-Cw -10.20000000 -36.793496 16.393496 0.9705311
## Nonee-N -5.60000000 -28.630643 17.430643 0.9991857
## Resistance-N 0.01052632 -13.968544 13.989596 1.0000000
## Weightsss-N 4.00000000 -19.030643 27.030643 0.9999605
## Weightz-N -0.90000000 -23.930643 22.130643 1.0000000
## Resistance-Nonee 5.61052632 -13.682423 24.903476 0.9963503
## Weightsss-Nonee 9.60000000 -16.993496 36.193496 0.9807936
## Weightz-Nonee 4.70000000 -21.893496 31.293496 0.9999535
## Weightsss-Resistance 3.98947368 -15.303476 23.282423 0.9998051
## Weightz-Resistance -0.91052632 -20.203476 18.382423 1.0000000
## Weightz-Weightsss -4.90000000 -31.493496 21.693496 0.9999318
# Interpretation:
# 1. Only Aerobic exercise significantly improved sleep efficiency compared to Control.
# 2. Resistance training did not differ significantly from Control or Aerobic.
# 3. This suggests Aerobic exercise is most effective at improving both sleep duration and efficiency.
Based on both outcomes, I recommend the exercise regimen that showed the greatest improvements in both sleep duration and efficiency. Statistical results (ANOVA F-values, p-values, and post-hoc tests) supported that Aerobic produced significantly better sleep improvements than the control group and others. Overall, this regimen appears most beneficial for improving overall sleep quality and duration.
The midterm a helpful opportunity to practice data cleaning, visualization, and inferential analysis. I felt most confident performing the t-tests and plotting results. However, at first, I did struggle with merging and label standardization. In the future, I will take more steps when it comes to cleaning and make sure that all file paths are set properly to avoid import erros.