library(tidyverse)
library(gplots)
library(viridis)
library(RcppRoll)
library(RColorBrewer)
library(scales)
library(mgc)
library(lme4)
library(performance)
library(Hmisc)
library(insight)
setwd('/Users/mike.xiao/Documents/Projects/actigraphy_paper')
#activity
act10 <- read_csv('datasets/HBN_actigraphy_600s.csv')
act10filtered <- read_csv('datasets/HBN_actigraphy_600s_95wear_3day.csv')
part2_summary <- read_csv('datasets/part2_summary.csv')
part4_summary <- read_csv('datasets/part4_summary_sleep_cleaned.csv')
part5_summary <- read_csv('datasets/part5_personsummary_MM_L30M100V400_T5A5.csv')
part7_summary <- read_csv('datasets/part7b_HBN_all_features_3_subject.csv')
part7_daysummary <- read_csv('datasets/part7b_HBN_all_features_2_dayclean.csv')
validwear <- read_csv('datasets/valid_wear.csv')
#phenotypic variables
HBN_demo <- read.csv('datasets/pheno/Basic_Demos.csv', stringsAsFactors = F)
HBN_coversheet <- read.csv('datasets/pheno/External_CoverSheet.csv', stringsAsFactors = F)
HBN_IAT <- read.csv('datasets/pheno/IAT.csv', stringsAsFactors = F)
#create ID column from filename
part2_summary <- part2_summary %>%
mutate(ID = str_replace(filename, '.csv', ''))
#GGIR summaries
part_25_summary <- left_join(part5_summary, part2_summary, by = 'ID')
#create cumulative minute variable
act10 <- act10 %>%
mutate(timeindex = 1440 * (day - 1 ) + minute,
hour = str_pad(floor(minute/60), width = 2, pad = "0"),
min = str_pad(minute %% 60, width = 2, pad = "0"),
time = paste0(hour, ":", min))
#quick sanity check on the 10min epoch level data
act10 %>% filter(ID == unique(act10$ID)[[1]] & timeindex < 10080) %>%
ggplot(aes(x = timeindex, y = ENMO)) +
geom_line() +
scale_x_continuous(breaks = seq(0, 10080, by = 360), labels = c(rep(c('12am', '6am', '12pm', '6pm'), 7), '12am'))
We chose ENMO as the activity summary metric, the unit is acceleration in g. ENMO stands for Euclidian Norm of the x, y, z axis accelerations Minus One g to account for gravity.
Key features from GGIR Part5 participant level summaries used to cluster subjects, these metrics are first calculated at the day level and then averaged across all days for a participant:
| Variable name | Description |
|---|---|
| wear_dur_def_proto_day | device recording duration |
| AD_L5_ENMO_mg_0-24hr | Average of ENMO during least active five hours in the day |
| AD_M5_ENMO_mg_0-24hr | Average of ENMO during most active five hours in the day |
| AD_mean_ENMO_mg_0-24hr | Average ENMO for the day, averaged across all days |
| AD_mean_ENMO_mg_1-6am | Average ENMO from 1-6am for the day, averaged across all days |
| AD_p95.83333_ENMO_mg_0-24hr | Average of the 95.83 percentile cutoff for ENMO each day |
| IS_interdailystability | Variability of ENMO between days within the participant |
| IV_intradailyvariability | Average variability of ENMO within each day |
| nonwear_perc_day_pla | Average percentage of the day flagged as nonwear |
| dur_day_total_IN_min_pla | Average duration spent in sedentary activity (<30mg) outside of sleep window |
| dur_day_total_LIG_min_pla | Average duration spent in light activity (30mg-100mg) outside of sleep window |
| dur_day_total_MOD_min_pla | Average duration spent in moderate activity (100-400mg) outside of sleep window |
| dur_day_total_VIG_min_pla | Average duration spent in vigorous activity (>400mg) outside of sleep window |
| Nbouts_day_MVPA_bts_10_pla | Average number of bouts of MVPA (>100mg) lasting at least 10 minutes outside of sleep window |
| dur_day_MVPA_bts_10_min_pla | Average duration in minutes of time spent in MVPA (>100mg) bouts lasting at least 10 minutes outside of sleep window |
| Nbouts_day_IN_bts_10_20_pla | Average number of bouts sedentary activity (<30mg) lasting between 10-20min within a day outside of sleep window |
| dur_spt_min_pla | Average duration of time spent asleep within the sleep window |
| wakeup_pla | Average wake time |
| sleeponset_pla | Average sleep onset time |
#hclust with key features:
clust_features <- part_25_summary %>%
select(ID, wear_dur_def_proto_day, `AD_L5_ENMO_mg_0-24hr`, `AD_M5_ENMO_mg_0-24hr`,
`AD_mean_ENMO_mg_0-24hr`, `AD_mean_ENMO_mg_1-6am`, `AD_p95.83333_ENMO_mg_0-24hr`,
IS_interdailystability,
IV_intradailyvariability, dur_day_MVPA_bts_10_min_pla,
nonwear_perc_day_pla, dur_day_total_IN_min_pla, dur_day_total_LIG_min_pla,
dur_day_total_MOD_min_pla, dur_day_total_VIG_min_pla, Nbouts_day_MVPA_bts_10_pla,
Nbouts_day_IN_bts_10_20_pla, dur_spt_min_pla,
wakeup_pla, sleeponset_pla)
n_sub = dim(clust_features)[1]
# two way clustering
subjectID <- clust_features$ID
#remove ID column
clust_features <- clust_features %>% select(-ID, )
# scale the data
clust_features_scale = scale(clust_features)
### Note, when plot in heatmap, the order of the subjects are reversed. The number of clusters/thresuld can be addjusted in the mycl and mycl2 row.
hr <- hclust(as.dist(1-cor(t(clust_features_scale), method="pearson", use = 'complete.obs')), method="ward.D2") # Cluster rows by Pearson correlation.
hc <- hclust(as.dist(1-cor(clust_features_scale, method="pearson", use = 'complete.obs')), method="ward.D2")
mycl <- cutree(hr, h=max(hr$height)/6); mycolhc <- rainbow(length(unique(mycl)), start=0.1, end=0.9);
mycolhc <- mycolhc[as.vector(mycl)]
mycl2 <- cutree(hc, h=max(hr$height)/6); mycolhr <- rainbow(length(unique(mycl2)), start=0.1, end=0.9);
mycolhr <- mycolhr[as.vector(mycl2)]
#2way cluster heatmap
heatmap.2(clust_features_scale,
Rowv=as.dendrogram(hr),
Colv=as.dendrogram(hc),
col = inferno(256),
RowSideColors=mycolhc,
scale="row",
trace = 'none',
ColSideColors = mycolhr,
cexCol = 0.3,
cexRow = 0.3,
margins = c(8,5))
#get subject order
suborder <- rev(hr$order)
ID_ordered <- subjectID[suborder]
#40 day heatmap
#smooth the 10 minute dataset over 6 10 minute windows (1hr), cap max ENMO at 400 to facilitate visuals
act_10min_smooth <- act10 %>%
group_by(ID) %>%
mutate(
ENMO = ENMO*1000,
enmo_smooth = roll_mean(ENMO, 6, na.rm=TRUE, align="center", fill = NA),
enmo_smooth = ifelse(is.na(enmo_smooth), NA,
ifelse(enmo_smooth > 400, 400, enmo_smooth))) %>%
ungroup()
#create 40 day full shell dataset
fulldays <- tibble(ID = rep(subjectID, each = 40*144),
day = rep(rep(1:40, each = 144), length(subjectID)),
minute = rep(seq(10, 1440, by = 10), 40*length(subjectID)))
act_10min_smooth_full <- left_join(fulldays, act_10min_smooth)
act_10min_smooth_full <- act_10min_smooth_full %>%
mutate(timeindex = 1440 * (day - 1 ) + minute)
#customizing color scale for plot
rescale(c(0,30,60,100,250,400), c(0,1))
#40 day heatmap, order on all GGIR variables hclust
p <- ggplot(act_10min_smooth_full, aes(x=timeindex, y=as.factor(ID), fill=enmo_smooth)) +
geom_tile() +
#scale_fill_distiller(palette = 'Spectral', na.value = 'black') +
#scale_fill_distiller(palette = 'Spectral', na.value = 'black', limits = c(0, 400)) +
scale_fill_gradientn(colors = c("#08306B", "#2B83BA", "#FFFFBF", "#FDAE61", "#D7191C"),
values = c(0.000, 0.075, 0.150, 0.250, 1.000), na.value = 'black',
limits = c(0, 400))+
scale_x_continuous(
breaks = seq(10, 57600, 1440),
label = c(1:40),
expand = c(0,0)
) +
scale_y_discrete(limits = ID_ordered) +
xlab('Days') +
#geom_vline(xintercept=seq(1, 5760, 144), color = 'white') +
theme_bw(base_size = 18) +
theme(panel.grid = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggsave(p,filename="plots/40day_select_GGIR.png", height=10, width = 15, dpi=200)
Figure 3: Visual Overview of ENMO Data. Here, we depict ENMO time series (10 minute resolution) depicted for each participant (in rows). Participants are ordered on the y-axis by hierarchical clustering of a subset of basic participant-level summary variables, including: days of wear, average activity level, amount of time spent in light/moderate/vigorous activity, interday-stability, sleep onset/wake time, and sleep efficiency.
filter_data <- function(wear_data, percent_wear, minimum_days) {
wearSummary <- wear_data %>%
group_by(ID) %>%
summarise(day = length(day),
wear = mean(minute_wear))
#number of subjects in the original dataset
print(paste0('Input data includes ', length(unique(wearSummary$ID)), ' subjects, ',
'averaging ', round(mean(wearSummary$day), 2), ' days per subject and ',
round(mean(wearSummary$wear), 2), ' minutes of valid wear per day.'))
#keep only days that have greater than percent_wear threshold
wear_data <- wear_data %>%
filter(minute_wear >= percent_wear * 1440)
wearSummary <- wear_data %>%
group_by(ID) %>%
summarise(day = length(day),
wear = mean(minute_wear))
#number of subjects in the dataset after filtering by minutes per day
print(paste0('Data that passed [>', percent_wear*100, '% valid wear per day] threshold includes ', length(unique(wearSummary$ID)), ' subjects, ',
'averaging ', round(mean(wearSummary$day), 2), ' days per subject and ',
round(mean(wearSummary$wear), 2), ' minutes of valid wear per day.'))
#keep only subjects with greater than minimum_days days of data after applying percent_wear threshold
validIDs <- filter(wearSummary, day > minimum_days)
validIDs <- unique(validIDs$ID)
wear_data <- wear_data %>% filter(ID %in% validIDs)
wearSummary <- wear_data %>%
group_by(ID) %>%
summarise(day = length(day),
wear = mean(minute_wear))
#number of subjects in the dataset after filtering by minutes per day
print(paste0('Data that passed [>', minimum_days, ' days] of [>', percent_wear*100, '% valid wear per day] threshold includes ', length(unique(wearSummary$ID)), ' subjects, ',
'averaging ', round(mean(wearSummary$day), 2), ' days per subject and ',
round(mean(wearSummary$wear), 2), ' minutes of valid wear per day.'))
results <- tibble(days = minimum_days,
percent = percent_wear,
n_subjects = length(unique(wearSummary$ID)))
return(results)
}
days = c(1:28)
percent = c(0.25, 0.5, 0.6, 0.75, 0.8, 0.85, 0.9, 0.95, 1)
days_percent_combo = crossing(days, percent)
plotdata <- map2(days_percent_combo$days, days_percent_combo$percent, ~filter_data(validwear, .y, .x))
plotdata <- bind_rows(plotdata)
plotdata <- plotdata %>%
mutate(percent_subs = n_subjects/1056 * 100,
percent = percent * 100,
percent = as.factor(percent))
ggplot(data = plotdata, aes(x = days, y = percent_subs, color = percent, group = percent)) +
geom_line() +
geom_point() +
geom_vline(xintercept = 21, linetype = 2, color = 'blue') +
scale_color_viridis_d() +
scale_y_continuous(breaks = seq(0, 100, 10)) +
scale_x_continuous(breaks = seq(0,28,2)) +
theme_minimal(base_size = 16) +
theme(panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank()) +
xlab('Minimum number of valid days') +
ylab('Percentage of subjects that meet data quality criteria') +
labs(color = "%wear threshold \n for valid day")
ggsave('plots/wear_time_plot.png', height = 6, width = 9)
Figure 4: Percentage of Subjects That Meet Minimum Wear Requirement Here we show the percentage of participants (y-axis) that meet the minimum wear-time requirement, defined as a function of the number of “valid days” available in the subject’s data recording (x-axis). A day is deemed a “valid day” when the number of minutes the watch was worn on that day equals or exceeds the some given minute threshold (shown as percentages in figure legend). Each line depicts the percentages of subjects in the sample that have at least X number of “valid days” at a given daily wear threshold. The average duration of wear is 14.86 days (sd=9.83 days).
#Compliance in the first 21 days (go to day 22 since the first day is always halfed)
#subset dataset to first 23 days (first day and last days are going to be short because of protocol)
validwear22 <- validwear %>% filter(day < 24)
#redo calculation
days = c(1:21)
percent = c(0.25, 0.5, 0.6, 0.75, 0.8, 0.85, 0.9, 0.95, 1)
days_percent_combo = crossing(days, percent)
plotdata2 <- map2(days_percent_combo$days, days_percent_combo$percent, ~filter_data(validwear22, .y, .x))
plotdata2 <- bind_rows(plotdata2)
plotdata2 <- plotdata2 %>%
mutate(percent_subs = n_subjects/1056 * 100,
percent = percent * 100,
percent = as.factor(percent))
ggplot(data = plotdata2, aes(x = days, y = percent_subs, color = percent, group = percent)) +
geom_line() +
geom_point() +
scale_color_viridis_d() +
scale_y_continuous(breaks = seq(0, 100, 10)) +
scale_x_continuous(breaks = seq(0,21,2), limits = c(0,21)) +
theme_minimal(base_size = 16) +
theme(panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank()) +
xlab('Minimum number of valid days') +
ylab('Percentage of subjects that meet data quality criteria') +
labs(color = "%wear threshold \n for valid day")
ggsave('plots/compliance_plot.png', height = 6, width = 9)
All calculations in this plot were only done on the first 23 days of data, unlike in Figure 4, which was calculated over all available days. Our minimum wear period is 3 weeks, so the first 23 days were used to account for the first and last days when the participant was given/was returning the watch.
#facet the plot
plotdata <- plotdata %>% mutate(group = 'All Days')
plotdata2 <- plotdata2 %>% mutate(group = 'First 3 Weeks')
plotdata_combined <- rbind(plotdata, plotdata2)
ggplot(data = plotdata_combined, aes(x = days, y = percent_subs, color = percent, group = percent)) +
geom_line() +
geom_point() +
scale_color_viridis_d() +
scale_y_continuous(breaks = seq(0, 100, 10)) +
facet_wrap(~group) +
scale_x_continuous(breaks = seq(0,28,2)) +
theme_minimal(base_size = 16) +
theme(panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank()) +
xlab('Minimum number of valid days') +
ylab('Percentage of subjects that meet data quality criteria') +
labs(color = "%wear threshold \n for valid day")
ggsave('plots/combined_compliance_plot.png', height = 6, width = 16)
Even though Figure 4 (left) used all days (including ones after the 3rd week) in the calculation of valid days compared to only the first 23 days of the protocol (right), the drop in percentage of subjects that met the each %wear threshold is almost unnoticeable.
ci <- function(x) {
(sd(x, na.rm = T)/sqrt(length(na.omit(x)))) * 1.96
}
my_ntiles <- function (x, n = 3, digits = 3)
{
x <- as.numeric(x)
xrank <- rank(x, na.last = TRUE, ties.method = "first")
xrank[is.na(x)] <- NA
size <- max(xrank, na.rm = TRUE)
if(size[[1]] < n + 1) {res <- rep(NA, length(x))}
else{
cts <- round(seq(1, size, length.out = (n + 1)))
bin <- as.numeric(cut(xrank, breaks = cts, include.lowest = TRUE))
left <- mosaic::min(x ~ bin, na.rm = TRUE)
right <- mosaic::max(x ~ bin, na.rm = TRUE)
res <- factor(bin, labels = paste0("[", signif(left, digits = digits), " to ", signif(right, digits = digits), "]", " Q", mosaic::max(bin ~ bin, na.rm = T)), ordered = TRUE)
}
return(res)
}
#create mean curve
meancurve <- act10filtered %>%
group_by(minute) %>%
summarise(ENMO_m = mean(ENMO, na.rm = T),
ci = ci(ENMO)) %>%
ungroup() %>%
mutate(enmo_smooth = roll_mean(ENMO_m, 5, na.rm = T, align = 'center', fill = NA),
ci_smooth = roll_mean(ci, 5, na.rm = T, align = 'center', fill = NA))
ggplot(meancurve, aes(x = minute, y = enmo_smooth)) +
geom_ribbon(aes(ymin = enmo_smooth - ci_smooth, ymax = enmo_smooth + ci_smooth), fill = 'grey65') +
geom_line() +
scale_x_continuous(breaks = seq(0, 1440, 240), labels = c('12am', '4am', '8am', '12pm', '4pm', '8pm', '12am')) +
#scale_color_manual(values = cm.colors(1)) +
#scale_color_manual(values = rainbow(7)) +
theme_bw(base_size = 14) +
theme(axis.title.x = element_blank()) +
labs(y = 'ENMO (mg)') +
ggtitle('Mean 24hr activity trajectory')
ggsave('plots/24hrTraj.png', width = 7, height = 4)
Figure 5A Smoothed trajectories of 10 minute resolution ENMO data for the A) mean of the sample.
#Add phenotypic variables
#rename
#HBN_coversheet <- HBN_coversheet %>%
# rename(ID = Rand_ID,
## EID = GUID) %>%
# mutate(ID = as.character(ID)) %>%
# select(-Study_Site)
#SOMETHING IS UP WITH THE JOIN, LETS FIX IT TOMORROW
HBN_demo <- HBN_demo %>% rename(ID = EID)
#fix study ID and join to demographics
act10filtered <- act10filtered %>%
left_join(HBN_demo, by = 'ID')
#join to internet variable
HBN_IAT <- HBN_IAT %>%
select(EID, IAT_Total) %>%
rename(ID = EID)
act10filtered <- act10filtered %>% left_join(HBN_IAT, by = 'ID')
#fix IDs
#select sleep variables and rename
sleepvars <- part7_daysummary %>% select(newID, sleeponset, SleepDurationInSpt, wakeup, sleep_efficiency)
sleepvars <- sleepvars %>% rename(sleep_onset = sleeponset,
sleep_duration = SleepDurationInSpt,
sleep_wake = wakeup,
ID = newID)
#filter, onset after 6pm and wake after 3am
sleepvars <- sleepvars %>% filter(sleep_onset > 18 & sleep_wake > 27)
#create midpoint
sleepvars <- sleepvars %>% mutate(midpoint = (sleep_wake + sleep_onset)/2 - 24)
#ggplot(sleepvars, aes(x = midpoint)) + geom_histogram()
#summarize to subject level
sleepvars <- sleepvars %>%
group_by(ID) %>%
summarize_all(~mean(., na.rm=T))
sleepvars <- sleepvars %>% mutate(sleep_duration_q3 = my_ntiles(sleep_duration/60, 3, 3),
midpoint_q3 = my_ntiles(midpoint, 3, 3),
sleep_efficiency_q3 = my_ntiles(sleep_efficiency, 3, 3),
sleep_duration_q4 = my_ntiles(sleep_duration/60, 4, 3),
midpoint_q4 = my_ntiles(midpoint, 4, 3),
sleep_efficiency_q4 = my_ntiles(sleep_efficiency, 4, 3))
#join sleep variables
act10filtered <- act10filtered %>% left_join(sleepvars, by = 'ID')
#recode sex, study site
act10filtered <- act10filtered %>%
mutate(Sex = recode(Sex, `0` = 'male', `1` = 'female'),
Study_Site = recode(Study_Site, `1` = 'Staten Island', `3` = 'Midtown'))
#create quanitles for age and IAT scores
act10filtered <- act10filtered %>%
mutate(age_q3 = my_ntiles(Age, 3, 2),
age_q4 = my_ntiles(Age, 4, 2),
IAT_q3 = my_ntiles(IAT_Total, 3, 2),
IAT_q4 = my_ntiles(IAT_Total, 4, 2))
act10filtered <- act10filtered %>%
mutate(weekday = factor(weekdays(timestamp), levels = c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday')))
plotcurves <- function(dataset, variable, numrows) {
meancurve <- dataset %>%
group_by(minute) %>%
summarise(mean_enmo = mean(ENMO, na.rm = T),
mean_enmo_ci = ci(ENMO)) %>%
ungroup() %>%
mutate(enmo_smooth = roll_mean(mean_enmo, 5, na.rm = T, align = 'center', fill = NA),
ci_smooth = roll_mean(mean_enmo_ci, 5, na.rm = T, align = 'center', fill = NA))
weekPlotData_diff <- dataset %>%
left_join(meancurve, by = 'minute') %>%
mutate(diff_enmo = ENMO - mean_enmo) %>%
filter(!is.na(!!sym(variable))) %>%
group_by_(variable, 'minute') %>%
summarise(enmo_p = mean(mean_enmo, na.rm = T),
ci_p = mean(mean_enmo_ci, na.rm = T),
enmo_m = mean(ENMO, na.rm = T),
enmo_ci = ci(ENMO),
diff_enmo_m = mean(diff_enmo, na.rm = T),
diff_enmo_ci = ci(diff_enmo)) %>%
ungroup() %>%
group_by_(variable) %>%
mutate(enmo_p_smooth = roll_mean(enmo_p, 5, na.rm=TRUE, align="center", fill = NA),
ci_p_smooth = roll_mean(ci_p, 5, na.rm=TRUE, align="center", fill = NA),
enmo_smooth = roll_mean(enmo_m, 5, na.rm=TRUE, align="center", fill = NA),
enmo_ci_smooth = roll_mean(enmo_ci, 5, na.rm=TRUE, align="center", fill = NA),
diff_enmo_smooth = roll_mean(diff_enmo_m, 5, na.rm=TRUE, align="center", fill = NA),
diff_enmo_ci_smooth = roll_mean(diff_enmo_ci, 5, na.rm=TRUE, align="center", fill = NA),
lower_bound = diff_enmo_smooth - diff_enmo_ci_smooth,
upper_bound = diff_enmo_smooth + diff_enmo_ci_smooth,
shade_dummy = ifelse(lower_bound > 0, 3,
ifelse(upper_bound < 0, 1, 0)),
shade_diff = diff(c(0, 0, 0, shade_dummy[3:142], 0, 0, 0))[1:144],
plusminus = ifelse(shade_diff %in% c(1, -1), 'minus',
ifelse(shade_diff %in% c(3, -3), 'plus', NA)))
xmin <- weekPlotData_diff %>%
select(variable, 'minute', 'shade_diff', 'plusminus') %>%
filter(shade_diff == 1 | shade_diff == 3)
xmax <- weekPlotData_diff %>%
select(variable, 'minute', 'shade_diff', 'plusminus') %>%
filter(shade_diff == -1 | shade_diff == -3)
weekRect <- tibble(!!variable := xmin[[variable]],
min = xmin[['minute']],
max = xmax[['minute']] - 1,
plusminus = xmin[['plusminus']])
#plot 1, smooth means by group################################################################
weekdaysPlot <- ggplot(weekPlotData_diff, aes_string(x = 'minute', y = 'enmo_smooth')) +
geom_line(aes_string(color = variable)) +
geom_ribbon(aes(ymin = enmo_smooth - enmo_ci_smooth, ymax = enmo_smooth + enmo_ci_smooth, fill = !!sym(variable)), alpha = 0.1) +
scale_x_continuous(breaks = seq(0, 1440, 240), labels = c('12am', '4am', '8am', '12pm', '4pm', '8pm', '12am')) +
scale_color_viridis_d() +
scale_fill_viridis_d() +
#scale_color_manual(values = rainbow(7)) +
theme_bw(base_size = 14) +
theme(axis.title.x = element_blank()) +
labs(y = 'ENMO (mg)') +
ggtitle(paste0('24hr activity trajectories over ', variable))
#plot 2 smooth differences by group##########################################################################
weekdaysDiff <- ggplot(weekPlotData_diff, aes_string(x = 'minute', y = 'diff_enmo_smooth')) +
geom_line(aes_string(color = variable)) +
geom_ribbon(aes(ymin = diff_enmo_smooth - diff_enmo_ci_smooth,
ymax = diff_enmo_smooth + diff_enmo_ci_smooth,
fill = !!sym(variable)), alpha = 0.1) +
scale_x_continuous(breaks = seq(0, 1440, 240), labels = c('12am', '4am', '8am', '12pm', '4pm', '8pm', '12am')) +
scale_color_viridis_d() +
scale_fill_viridis_d() +
#scale_color_manual(values = rainbow(7)) +
theme_bw(base_size = 14) +
theme(axis.title.x = element_blank()) +
labs(y = 'ENMO (mg)') +
ggtitle(paste0('24hr ', variable, ' trajectory - mean trajectory'))
#plot 3 smooth differences by group faceted##########################################################################
weekdaysDiff_f <- ggplot() +
geom_line(data = weekPlotData_diff, aes(x = minute, y = diff_enmo_smooth), color = 'black') +
geom_ribbon(data = weekPlotData_diff, aes(x = minute, ymin = diff_enmo_smooth - diff_enmo_ci_smooth,
ymax = diff_enmo_smooth + diff_enmo_ci_smooth), alpha = 0, linetype = 2, color = 'red') +
geom_hline(data = weekRect, color = 'blue', aes(yintercept=0), linetype = 2) +
geom_rect(data = weekRect, aes(xmin = min, xmax = max, ymin = -Inf, ymax = Inf, fill = plusminus), alpha = 0.2) +
scale_fill_manual(values = c('magenta', 'cyan')) +
scale_x_continuous(breaks = seq(0, 1440, 240), labels = c('12am', '4am', '8am', '12pm', '4pm', '8pm', '12am')) +
theme_bw(base_size = 10) +
theme(axis.title.x = element_blank(),
panel.grid = element_blank(),
legend.position = 'none') +
labs(y = 'ENMO (mg)') +
ggtitle(paste0('24hr ', variable, ' trajectory - mean trajectory')) +
facet_wrap(as.formula(paste("~", variable)), nrow = numrows)
#plot 4
weekdaysDiff2_f <- ggplot() +
geom_line(data = weekPlotData_diff, aes(x = minute, y = enmo_smooth), color = 'black') +
#geom_ribbon(data = weekPlotData_diff, aes(x = minute, ymin = enmo_smooth - enmo_ci_smooth,
#ymax = enmo_smooth + enmo_ci_smooth), alpha = 0, linetype = 2, color = 'red') +
geom_line(data = weekPlotData_diff, color = 'blue', aes(x = minute, y = enmo_p_smooth), linetype = 2) +
#geom_ribbon(data = weekPlotData_diff, aes(x = minute, ymin = enmo_p_smooth - ci_p_smooth,
#ymax = enmo_p_smooth + ci_p_smooth), alpha = 0.5, fill = 'blue') +
geom_rect(data = weekRect, aes(xmin = min, xmax = max, ymin = -Inf, ymax = Inf, fill = plusminus), alpha = 0.2) +
scale_fill_manual(values = c('magenta', 'cyan')) +
scale_x_continuous(breaks = seq(0, 1440, 240), labels = c('12am', '4am', '8am', '12pm', '4pm', '8pm', '12am')) +
theme_bw(base_size = 10) +
theme(axis.title.x = element_blank(),
panel.grid = element_blank(),
legend.position = 'none') +
labs(y = 'ENMO (mg)') +
ggtitle(paste0('24hr ', variable, ' trajectory - mean trajectory')) +
facet_wrap(as.formula(paste("~", variable)), nrow = numrows)
assign('p1', weekdaysPlot, envir = .GlobalEnv)
assign('p2', weekdaysDiff, envir = .GlobalEnv)
assign('p3', weekdaysDiff_f, envir = .GlobalEnv)
assign('p4', weekdaysDiff2_f, envir = .GlobalEnv)
}
#day of week
plotcurves(act10filtered, 'weekday', 2)
ggsave(p1, file = 'plots/24hrTrajWeekday1.png', width = 8, height = 4, dpi = 150)
ggsave(p2, file = 'plots/24hrTrajWeekday2.png', width = 8, height = 4, dpi = 150)
ggsave(p3, file = 'plots/24hrTrajWeekday3.png', width = 12, height = 4, dpi = 150)
ggsave(p4, file = 'plots/24hrTrajWeekday4.png', width = 12, height = 4, dpi = 150)
#Sex
plotcurves(act10filtered, 'Sex', 1)
ggsave(p1, file = 'plots/24hrTrajSex1.png', width = 8, height = 4, dpi = 150)
ggsave(p2, file = 'plots/24hrTrajSex2.png', width = 8, height = 4, dpi = 150)
ggsave(p3, file = 'plots/24hrTrajSex3.png', width = 6, height = 2, dpi = 150)
ggsave(p4, file = 'plots/24hrTrajSex4.png', width = 6, height = 2, dpi = 150)
#age
plotcurves(act10filtered, 'age_q4', 2)
ggsave(p1, file = 'plots/24hrTrajAge1.png', width = 8, height = 4, dpi = 150)
ggsave(p2, file = 'plots/24hrTrajAge2.png', width = 8, height = 4, dpi = 150)
ggsave(p3, file = 'plots/24hrTrajAge3.png', width = 6, height = 4, dpi = 150)
ggsave(p4, file = 'plots/24hrTrajAge4.png', width = 6, height = 4, dpi = 150)
#sleep duration
plotcurves(act10filtered, 'sleep_duration_q4', 2)
ggsave(p1, file = 'plots/24hrTrajSleepDur1.png', width = 8, height = 4, dpi = 150)
ggsave(p2, file = 'plots/24hrTrajSleepDur2.png', width = 8, height = 4, dpi = 150)
ggsave(p3, file = 'plots/24hrTrajSleepDur3.png', width = 6, height = 4, dpi = 150)
ggsave(p4, file = 'plots/24hrTrajSleepDur4.png', width = 6, height = 4, dpi = 150)
Figure 5C Smoothed trajectories of 10 minute resolution ENMO data C) across the days of week.
Figure 5B Smoothed trajectories of 10 minute resolution ENMO data B) across sex.
Figure 5D Smoothed trajectories of 10 minute resolution ENMO data D) across age groups.
create_2week_datasets_ICC <- function(data, day_length) {
#this is a 'full' dataset, drop the empty days
act10 <- drop_na(data)
length(unique(act10$ID))
##FILTER TO ONLY SUBJECTS WITH 4 WEEKS (this is temporary, comment out)
#act10 <- act10 %>% filter(ID %in% ID_4weeks)
#check if timestamp is character, if it is turn it into POSIX
if (is.character(act10$timestamp)) {act10$timestamp <- as.POSIXct(act10$timestamp)}
#### For now get rid of incomplete sundays from daylight savings
dls_days <- act10 %>%
group_by(dayIndex) %>%
summarise(daylength = length(dayIndex)) %>%
filter(daylength == day_length)
act10 <- act10 %>% filter(dayIndex %in% dls_days$dayIndex)
ndays <- act10 %>%
group_by(ID) %>%
summarise(n = length(unique(day)))
sub2weeks <- ndays %>% filter(n > 14)
length(unique(sub2weeks$ID))
act10_2weeks <- act10 %>% filter(ID %in% sub2weeks$ID)
act10_2weeks <- act10_2weeks %>% mutate(
weekday = weekdays(timestamp),
weekday_i = paste0(weekday, " ", day))
weekday_check <- act10_2weeks %>%
group_by(ID) %>%
dplyr::summarize(Mon = length(grep("Mon", weekday)) / day_length,
Tue = length(grep("Tue", weekday)) / day_length,
Wed = length(grep("Wed", weekday)) / day_length,
Thu = length(grep("Thu", weekday)) / day_length,
Fri = length(grep("Fri", weekday)) / day_length,
Sat = length(grep("Sat", weekday)) / day_length,
Sun = length(grep("Sun", weekday)) / day_length)
#have more than 14 days of data but incomplete weeks, like 3 mondays 1 sunday etc
#subset to people with >14 days, and at least 2 complete weeks
#order a matrix by monday-sunday1 monday-sunday2
weekday_check <- weekday_check %>%
rowwise() %>%
mutate(bad_sub = ifelse(Mon < 2 | Tue < 2 | Wed < 2 | Thu < 2 | Fri < 2 | Sat < 2 | Sun < 2, 1, 0)) %>%
ungroup()
good_sub <- weekday_check %>% filter(bad_sub == 0)
act10_2weeks <- act10_2weeks %>% filter(ID %in% good_sub$ID)
length(unique(act10_2weeks$ID))
act10_2weeks$weekday = dplyr::recode(act10_2weeks$weekday,
Friday = "Fri",
Saturday = "Sat",
Sunday = "Sun",
Monday = "Mon",
Tuesday = "Tue",
Wednesday = "Wed",
Thursday = "Thu")
#reformoat data in week format
subjects <- unique(act10_2weeks$ID)
for (i in 1:length(subjects)) {
sub <- subjects[[i]]
subdata <- act10_2weeks %>% filter(ID == sub)
for (wd in c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")) {
allweekday <- subdata %>% filter(weekday == wd)
weekdaychar <- unique(allweekday$weekday_i)
week1 <- allweekday %>% filter(weekday_i == weekdaychar[[1]])
week2 <- allweekday %>% filter(weekday_i == weekdaychar[[2]])
if (wd == "Mon") {
fullweek1 <- week1
fullweek2 <- week2
}
else {
fullweek1 <- rbind(fullweek1, week1)
fullweek2 <- rbind(fullweek2, week2)
}
}
#append data
if (i == 1) {
act10_week1 <- fullweek1
act10_week2 <- fullweek2
}
else {
act10_week1 <- rbind(act10_week1, fullweek1)
act10_week2 <- rbind(act10_week2, fullweek2)
}
message(paste0(subjects[[i]], " is finished ..."))
}
act10_week1 <- act10_week1 %>%
rename(ENMO_week1 = ENMO)
act10_week2 <- act10_week2 %>%
rename(ENMO_week2 = ENMO)
act10_2weeks <- act10_week1 %>%
select(ID, weekday, minute, ENMO_week1) %>%
mutate(ENMO_week2 = act10_week2$ENMO_week2)
act10_2weeks <<- act10_2weeks %>%
mutate(weekday = factor(weekday, ordered = TRUE,
levels = c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"))) %>% ungroup()
#act10_week1_wide <<- act10_week1 %>%
# select(ID, weekday, minute, ENMO) %>%
# spread(key = ID, value = ENMO)
#act10_week2_wide <<- act10_week2 %>%
# select(ID, weekday, minute, ENMO) %>%
# spread(key = ID, value = ENMO)
}
ICC_3methods_WE_WD <- function(weekdata, resolution, timeofday = F) {
message('generating datasets for Method 1 ...')
weekdata <- weekdata %>% gather(ENMO_week1, ENMO_week2, key = week, value = ENMO)
#Weekday (Tues/Wed)
weekdata_WD <- weekdata %>% filter(weekday %in% c('Tue', 'Wed'))
#Weekends (Sat/Sun)
weekdata_WE <- weekdata %>% filter(weekday %in% c('Sat', 'Sun'))
#Weekdays (Mon thru Fri)
weekdata_WD5 <- weekdata %>% filter(weekday %in% c('Mon', 'Tue', 'Wed', 'Thu', 'Fri'))
#Method2
message('generating datasets for Method 2 ...')
#Full Week
weekdata_mean <- weekdata %>%
group_by(ID, week, minute) %>%
summarise(ENMO = mean(ENMO)) %>%
ungroup()
#Weekday (Tues/Wed)
weekdata_mean_WD <- weekdata_WD %>%
group_by(ID, week, minute) %>%
summarise(ENMO = mean(ENMO)) %>%
ungroup()
#Weekends (Sat/Sun)
weekdata_mean_WE <- weekdata_WE %>%
group_by(ID, week, minute) %>%
summarise(ENMO = mean(ENMO)) %>%
ungroup()
#Weekdays (5 days)
weekdata_mean_WD5 <- weekdata_WD5 %>%
group_by(ID, week, minute) %>%
summarise(ENMO = mean(ENMO)) %>%
ungroup()
#Method3
message('generating datasets for Method 3 ...')
#Full Week
weekdata_daily <- weekdata %>%
mutate(ID_day = paste0(ID, "_", weekday)) %>%
group_by(ID_day, minute) %>%
summarise(ID = ID[[1]],
weekday = weekday[[1]],
ENMO = mean(ENMO)) %>%
ungroup() %>%
select(-ID_day)
#Weekday (Tues/Wed)
weekdata_daily_WD <- weekdata_WD %>%
mutate(ID_day = paste0(ID, "_", weekday)) %>%
group_by(ID_day, minute) %>%
summarise(ID = ID[[1]],
weekday = weekday[[1]],
ENMO = mean(ENMO)) %>%
ungroup() %>%
select(-ID_day)
#Weekends (Sat/Sun)
weekdata_daily_WE <- weekdata_WE %>%
mutate(ID_day = paste0(ID, "_", weekday)) %>%
group_by(ID_day, minute) %>%
summarise(ID = ID[[1]],
weekday = weekday[[1]],
ENMO = mean(ENMO)) %>%
ungroup() %>%
select(-ID_day)
#Weekday (5 days)
weekdata_daily_WD5 <- weekdata_WD5 %>%
mutate(ID_day = paste0(ID, "_", weekday)) %>%
group_by(ID_day, minute) %>%
summarise(ID = ID[[1]],
weekday = weekday[[1]],
ENMO = mean(ENMO)) %>%
ungroup() %>%
select(-ID_day)
###Calculate ICC
message('calculating ICC scores')
#Method1
if (timeofday) {
#Full week
#vars <- get_variance(fit)
#how to break down ICC
#vars$var.random/(vars$var.random + vars$var.residual)
message('running models for Method1 full week')
m1_1 <- weekdata %>%
group_by(minute) %>%
do(fit = lmer(ENMO ~ 1 + (1 | ID), data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_block',
type = 'All days',
resolution = resolution)
#Weekday
message('running models for Method1 weekday')
m1_2 <- weekdata_WD %>%
group_by(minute) %>%
do(fit = lmer(ENMO ~ 1 + (1 | ID), data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_block',
type = 'Tue/Wed',
resolution = resolution)
#Weekend
message('running models for Method1 weekend')
m1_3 <- weekdata_WE %>%
group_by(minute) %>%
do(fit = lmer(ENMO ~ 1 + (1 | ID), data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_block',
type = 'Sat/Sun',
resolution = resolution)
#Weekday (5 days)
message('running models for Method1 weekday (5 days)')
m1_4 <- weekdata_WD5 %>%
group_by(minute) %>%
do(fit = lmer(ENMO ~ 1 + (1 | ID), data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_block',
type = 'Weekdays',
resolution = resolution)
#Method2
#Full week
message('running model for Method2 full week')
m2_1 <- weekdata_mean %>%
group_by(minute) %>%
do(fit = lmer(ENMO ~ 1 + (1 | ID), data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_mean',
type = 'All days',
resolution = resolution)
#Weekday
message('running model for Method2 weekday')
m2_2 <- weekdata_mean_WD %>%
group_by(minute) %>%
do(fit = lmer(ENMO ~ 1 + (1 | ID), data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_mean',
type = 'Tue/Wed',
resolution = resolution)
#Weekend
message('running model for Method2 weekend')
m2_3 <- weekdata_mean_WE %>%
group_by(minute) %>%
do(fit = lmer(ENMO ~ 1 + (1 | ID), data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_mean',
type = 'Sat/Sun',
resolution = resolution)
message('running model for Method2 weekday (5 days)')
m2_4 <- weekdata_mean_WD5 %>%
group_by(minute) %>%
do(fit = lmer(ENMO ~ 1 + (1 | ID), data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_mean',
type = 'Weekdays',
resolution = resolution)
#Method3
#Full week
message('running model for Method3 full week')
m3_1 <- weekdata_daily %>%
group_by(minute) %>%
do(fit = lmer(ENMO ~ 1 + (1 | ID), data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'separate_days',
type = 'All days',
resolution = resolution)
#Weekday
message('running model for Method3 weekday')
m3_2 <- weekdata_daily_WD %>%
group_by(minute) %>%
do(fit = lmer(ENMO ~ 1 + (1 | ID), data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'separate_days',
type = 'Tue/Wed',
resolution = resolution)
#Weekend
message('running model for Method3 weekend')
m3_3 <- weekdata_daily_WE %>%
group_by(minute) %>%
do(fit = lmer(ENMO ~ 1 + (1 | ID), data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'separate_days',
type = 'Sat/Sun',
resolution = resolution)
message('running model for Method3 weekday (5days)')
m3_4 <- weekdata_daily_WD5 %>%
group_by(minute) %>%
do(fit = lmer(ENMO ~ 1 + (1 | ID), data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'separate_days',
type = 'Weekdays',
resolution = resolution)
return(bind_rows(m1_1, m1_2, m1_3, m1_4, m2_1, m2_2, m2_3, m2_4, m3_1, m3_2, m3_3, m3_4))
}
else {
#Full week
message('running models for Method1 full week')
m1_1 <- weekdata %>%
do(fit = lmer(ENMO ~ 1 + (1 | ID), data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_block',
type = 'All days',
resolution = resolution)
#Weekday
message('running models for Method1 weekday')
m1_2 <- weekdata_WD %>%
do(fit = lmer(ENMO ~ 1 + (1 | ID), data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_block',
type = 'Tue/Wed',
resolution = resolution)
#Weekend
message('running models for Method1 weekend')
m1_3 <- weekdata_WE %>%
do(fit = lmer(ENMO ~ 1 + (1 | ID), data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_block',
type = 'Sat/Sun',
resolution = resolution)
#Weekday (5days)
message('running models for Method1 weekday (5days)')
m1_4 <- weekdata_WD5 %>%
do(fit = lmer(ENMO ~ 1 + (1 | ID), data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_block',
type = 'Weekdays',
resolution = resolution)
#Method2
#Full week
message('running model for Method2 full week')
m2_1 <- weekdata_mean %>%
do(fit = lmer(ENMO ~ 1 + (1 | ID), data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_mean',
type = 'All days',
resolution = resolution)
#Weekday
message('running model for Method2 weekday')
m2_2 <- weekdata_mean_WD %>%
do(fit = lmer(ENMO ~ 1 + (1 | ID), data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_mean',
type = 'Tue/Wed',
resolution = resolution)
#Weekend
message('running model for Method2 weekend')
m2_3 <- weekdata_mean_WE %>%
do(fit = lmer(ENMO ~ 1 + (1 | ID), data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_mean',
type = 'Sat/Sun',
resolution = resolution)
#Weekday (5days)
message('running model for Method2 weekday (5days)')
m2_4 <- weekdata_mean_WD5 %>%
do(fit = lmer(ENMO ~ 1 + (1 | ID), data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_mean',
type = 'Weekdays',
resolution = resolution)
#Method3
#Full week
message('running model for Method3 full week')
m3_1 <- weekdata_daily %>%
do(fit = lmer(ENMO ~ 1 + (1 | ID), data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'separate_days',
type = 'All days',
resolution = resolution)
#Weekday
message('running model for Method3 weekday')
m3_2 <- weekdata_daily_WD %>%
do(fit = lmer(ENMO ~ 1 + (1 | ID), data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'separate_days',
type = 'Tue/Wed',
resolution = resolution)
#Weekend
message('running model for Method3 weekend')
m3_3 <- weekdata_daily_WE %>%
do(fit = lmer(ENMO ~ 1 + (1 | ID), data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'separate_days',
type = 'Sat/Sun',
resolution = resolution)
#Weekday (5days)
message('running model for Method3 weekday (5days)')
m3_4 <- weekdata_daily_WD5 %>%
do(fit = lmer(ENMO ~ 1 + (1 | ID), data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'separate_days',
type = 'Weekdays',
resolution = resolution)
return(bind_rows(m1_1, m1_2, m1_3, m1_4, m2_1, m2_2, m2_3, m2_4, m3_1, m3_2, m3_3, m3_4))
}
}
downsample <- function(dataset, epoch, method) {
print(paste0('Downsampling 60s epoch data into ', epoch, ' minute epochs.'))
if (method == 'mean') {
dataset <- dataset %>%
mutate(
minute = minute - 1,
minute = floor(minute/epoch)) %>%
group_by(ID, day, minute) %>%
summarise(timestamp = timestamp[1],
ENMO = mean(ENMO, na.rm = T)
#wear = min(wear)
) %>%
mutate(minute = (minute + 1) * epoch) %>%
ungroup()
return(dataset)
}
if (method == 'median') {
dataset <- dataset %>%
mutate(
minute = minute - 1,
minute = floor(minute/epoch)) %>%
group_by(ID, day, minute) %>%
summarise(timestamp = timestamp[1],
ENMO = median(ENMO, na.rm = T)
#wear = min(wear)
) %>%
mutate(minute = (minute + 1) * epoch) %>%
ungroup()
return(dataset)
}
if (method == 'multiple') {
datasetl <- list()
#SD
datasetl[[1]] <- dataset %>%
mutate(
minute = minute - 1,
minute = floor(minute/epoch)) %>%
group_by(ID, day, minute) %>%
summarise(timestamp = timestamp[1],
ENMO = sd(ENMO, na.rm = T)
#wear = min(wear)
) %>%
mutate(minute = (minute + 1) * epoch) %>%
ungroup()
#IQR
datasetl[[2]] <- dataset %>%
mutate(
minute = minute - 1,
minute = floor(minute/epoch)) %>%
group_by(ID, day, minute) %>%
summarise(timestamp = timestamp[1],
ENMO = IQR(ENMO, na.rm = T)
#wear = min(wear)
) %>%
mutate(minute = (minute + 1) * epoch) %>%
ungroup()
#Skewness
datasetl[[3]] <- dataset %>%
mutate(
minute = minute - 1,
minute = floor(minute/epoch)) %>%
group_by(ID, day, minute) %>%
summarise(timestamp = timestamp[1],
ENMO = myskewness(ENMO)
#wear = min(wear)
) %>%
mutate(minute = (minute + 1) * epoch) %>%
ungroup()
#Kurtosis
datasetl[[4]] <- dataset %>%
mutate(
minute = minute - 1,
minute = floor(minute/epoch)) %>%
group_by(ID, day, minute) %>%
summarise(timestamp = timestamp[1],
ENMO = mykurtosis(ENMO)
#wear = min(wear)
) %>%
mutate(minute = (minute + 1) * epoch) %>%
ungroup()
return(datasetl)
}
}
#results_mean <- list()
results_mean_tod <- list()
two_week_datasets <- list()
#loop through all times of days
resolution <- seq(0, 1440, 30)
resolution[[1]] <- 10
n_chunks <- 1440/resolution
n_chunks <- n_chunks[!n_chunks%%1]
resolution <- 1440/n_chunks
#load in 60s data
#act60s <- read_csv('datasets/HBN_actigraphy_60s_95wear_3day.csv')
#695 subjects
###)log transformed data - - - - - - - - - - - - - - --
act60sLog <- act60s %>% mutate(ENMO = log(9250 * ENMO + 1))
write_csv(act60sLog, 'datasets_log/HBN_actigraphy_60s_95wear_3day.csv')
ggplot(act60s, aes(x = ENMO)) + geom_density()
ggsave('plots/60distr.png', width = 8, height = 4)
ggplot(act60s, aes(x = log(ENMO + 1))) + geom_density()
ggsave('plots/60distr_log.png', width = 8, height = 4)
ggplot(act60sLog, aes(x = ENMO)) + geom_density()
ggsave('plots/60distr_log9250.png', width = 8, height = 4)
For 60s data, we use this log transform before further analysis.
results_mean_tod_log <- list()
two_week_datasets_log <- list()
#create datasets
for (i in 1:11) {
actPlotData <- downsample(act60sLog, resolution[i], method = 'mean')
actPlotData <- actPlotData %>%
mutate(dayIndex = paste0(ID, '-', day))
create_2week_datasets_ICC(actPlotData, n_chunks[i])
two_week_datasets_log[[i]] <- act10_2weeks
}
#save datasets
save(two_week_datasets_log, file = 'datasets_log/2week_ICC/two_week_datasets_log.RData')
#load('datasets/2week_ICC/two_week_datasets.RData')
#run ICC
for (i in 1:11) {
results_mean_tod_log[[i]] <- ICC_3methods_WE_WD(two_week_datasets_log[[i]], resolution[i], timeofday = T)
}
ICC_all_res_mean_tod_log <- do.call(rbind, results_mean_tod_log)
#save results
write_csv(ICC_all_res_mean_tod_log, file = 'datasets_log/2week_ICC/two_week_results_log.csv')
#OHBM Abstract plots----------------------------------------
#ICC_all_res_mean_tod <- read_csv('datasets_log/2week_ICC/two_week_results.csv')
#Panel C (10min to 1440min resolution mean ICC)
ICC_week_mean <- ICC_all_res_mean_tod_log %>%
filter(method == 'week_mean')
#Panel B (10 min, 30min, 60min 24hr ICC)
#ICC_week_mean <-
# ICC_week_mean %>% gather(ICC:var_within, key = result, value = value)
#ICC_week_mean <-
# ICC_week_mean %>% spread(key = result, value = value)
ggplot(data = ICC_week_mean, aes(x = resolution, y = ICC, color = type)) +
stat_summary(fun = mean, geom="point",
position=position_dodge(width=0.75), size = 1) +
stat_summary(fun.data = mean_se, geom="errorbar",
position=position_dodge(width=0.75), size = 0.25) +
stat_summary(fun = mean, geom="line", linetype = 2) +
scale_y_continuous(breaks = seq(0.1, 1, 0.1), limits = c(0,1)) +
scale_x_continuous(breaks = c(10, 60, 120, 180, 240, 360, 480, 720, 1440)) +
theme_minimal(base_size = 14) +
theme(panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
axis.text.x = element_text(angle = 45),
legend.title = element_blank()) +
ggtitle('Mean ICC of L-ENMO across different epoch resolutions') +
xlab('Epoch resolution in minutes')
ggsave(filename = 'plots/fig6_D.png', height = 4, width = 8)
Figure 6D: ICC of ENMO data from 10 minute to 1440 minute epoch resolution.
#turn milliG into G
ICC_week_mean <- ICC_week_mean %>%
mutate(var_between = var_between * 1000,
var_within = var_within * 1000)
ICC_week_mean <- ICC_week_mean %>%
mutate(type = factor(type, levels = c("All days", "Weekdays", "Tue/Wed", "Sat/Sun")))
coeff = 2000/0.8
#show_col(hue_pal()(8))
#"#F8766D" "#7CAE00" "#00BFC4" "#C77CFF"
colors <- c("ICC" = '#FF61CC', "var_between" = '#00BE67', "var_within" = '#00BFC4')
ggplot(data = filter(ICC_week_mean, resolution == 10), aes(x = minute)) +
geom_smooth(aes(y = ICC, color = 'ICC'), span = 0.25, alpha = 0, size = 0.75) +
geom_smooth(aes(y = var_between/coeff, color = 'var_between'), span = 0.05, alpha = 0, linetype = 2, size = 0.75) +
geom_smooth(aes(y = var_within/coeff, color = 'var_within'), span = 0.05, alpha = 0, linetype = 2, size = 0.75) +
#geom_smooth(aes(y = ICC), method = "gam", formula = y ~ s(x, bs = "cs")) +
#geom_smooth(aes(y = var_between/coeff), method = "gam", formula = y ~ s(x, bs = "cs")) +
#geom_smooth(aes(y = var_within/coeff), method = "gam", formula = y ~ s(x, bs = "cs")) +
scale_x_continuous(breaks = seq(0, 1440, 240), labels = c('12am', '4am', '8am', '12pm', '4pm', '8pm', '12am')) +
scale_y_continuous(
#breaks = seq(0.2, 1, 0.1),
limits = c(0, 0.8),
name = "ICC",
sec.axis = sec_axis( trans=~.*coeff, name="L-ENMO")
) +
theme_minimal(base_size = 12) +
theme(panel.grid.minor.x = element_blank(),
legend.title = element_blank()) +
facet_wrap(~type, nrow = 3) +
labs(x = '',
color = 'Legend') +
scale_color_manual(values = colors) +
ggtitle('ICC of 10min epoch data over time of day')
ggsave(filename = 'plots/fig6_C_with_var_smooth.png', height = 5 , width = 9)
Figure 6C ICC data of 1 min resolution ENMO data in 10 minute bins across the 24 hour time period. The random effect variance (between subject) and residual variance (within subject) are overlaid in dashed lines.
#no smooth
ggplot(data = filter(ICC_week_mean, resolution == 10), aes(x = minute)) +
geom_line(aes(y = ICC, color = 'ICC')) +
#geom_point(aes(y = ICC, color = 'ICC')) +
geom_line(aes(y = var_between/coeff, color = 'var_between'), linetype = 2) +
#geom_point(aes(y = var_between/coeff, color = 'var_between')) +
geom_line(aes(y = var_within/coeff, color = 'var_within'), linetype = 2) +
#geom_point(aes(y = var_within/coeff, color = 'var_within')) +
#geom_smooth(aes(y = ICC), method = "gam", formula = y ~ s(x, bs = "cs")) +
#geom_smooth(aes(y = var_between/coeff), method = "gam", formula = y ~ s(x, bs = "cs")) +
#geom_smooth(aes(y = var_within/coeff), method = "gam", formula = y ~ s(x, bs = "cs")) +
scale_x_continuous(breaks = seq(0, 1440, 240), labels = c('12am', '4am', '8am', '12pm', '4pm', '8pm', '12am')) +
scale_y_continuous(
breaks = seq(0, 1, 0.1),
#limits = c(0, 0.8),
name = "ICC",
sec.axis = sec_axis( trans=~.*coeff, name="L-ENMO")) +
theme_minimal(base_size = 12) +
theme(panel.grid.minor.x = element_blank(),
legend.title = element_blank()) +
facet_wrap(~type, nrow = 3) +
labs(x = '',
color = 'Legend') +
scale_color_manual(values = colors) +
ggtitle('ICC of 10min epoch data over time of day')
ggsave(filename = 'plots/fig6_C_with_var_nosmooth.png', height = 5 , width = 9)
ICC were calculated for key features from postGGIR Part7 day level summaries:
| Variable name | Description |
|---|---|
| TAC | Total volume of physical activity |
| TLAC | Total volume of log-transformed physical activity |
| SleepDurationInSpt | Total duration within sleep period which the subject was asleep |
| sleep_Midpoint | Time point halfway between sleep onset and wake time |
| sleep_efficiency | Total duration asleep in sleep interval / total duration spent in sleep period |
| RA | Relative amplitude: (M10-L5)/(M10+L5) where M10= most active 10 hrs, L5 = least active 5 hour |
| sed_dur | Total daytime duration in minutes of sedentary activity (<30mg) |
| light_dur | Total daytime duration in minutes of light activity (30-100mg) |
| mod_dur | Total daytime duration in minutes of moderate activity (100-400mg) |
| MVPA_dur | Total daytime duration in minutes of moderate to vigorous activity (>100mg) |
| vig_dur | Total daytime duration in minutes of vigorous activity (>400mg) |
| ASTP | active-to-sedentary transition probabilities |
| SATP | sedentary-to-active transition probabilities |
varnames <- c("TAC", "TLAC", "SleepDurationInSpt", "sleep_Midpoint", "sleep_efficiency",
"RA", "sed_dur", "light_dur", "mod_dur", "MVPA_dur", "vig_dur", "ASTP", "SATP")
#add minute column (turn it in actplotdata format), rename window_number into day
####This is a WW dataset, the M5TIME_num variable will need to be transformed, while the L5TIME_num variable makes sense###
part7_daysummary <- part7_daysummary %>%
mutate(weekday = recode(weekday,
`1` = 'Monday',
`2` = 'Tuesday',
`3` = 'Wednesday',
`4` = 'Thursday',
`5` = 'Friday',
`6` = 'Saturday',
`7` = 'Sunday'))
part7select <- part7_daysummary %>%
select(c("newID", "Date", "weekday", all_of(varnames))) %>%
rename(ID = newID) %>%
mutate(minute = 1440,
dayIndex = paste0(ID, "-", Date))
create_2week_datasets_ICC_multi <- function(data, var, day_length) {
#this is a 'full' dataset, drop the empty days
act10 <- drop_na(data)
length(unique(act10$ID))
#select to one variable
act10 <- act10 %>% select(c("ID", "Date", "weekday", "minute", "dayIndex", var))
##FILTER TO ONLY SUBJECTS WITH 4 WEEKS (this is temporary, comment out)
#act10 <- act10 %>% filter(ID %in% ID_4weeks)
#check if timestamp is character, if it is turn it into POSIX
#if (is.character(act10$timestamp)) {act10$timestamp <- as.POSIXct(act10$timestamp)}
#weekday variable is already created
#### For now get rid of incomplete sundays from daylight savings
dls_days <- act10 %>%
group_by(dayIndex) %>%
summarise(daylength = length(dayIndex)) %>%
filter(daylength == day_length)
act10 <- act10 %>% filter(dayIndex %in% dls_days$dayIndex)
ndays <- act10 %>%
group_by(ID) %>%
summarise(n = length(unique(Date)))
sub2weeks <- ndays %>% filter(n > 14)
length(unique(sub2weeks$ID))
act10_2weeks <- act10 %>% filter(ID %in% sub2weeks$ID)
act10_2weeks <- act10_2weeks %>% mutate(
#weekday = weekdays(timestamp),
weekday_i = paste0(weekday, " ", Date))
weekday_check <- act10_2weeks %>%
group_by(ID) %>%
dplyr::summarize(Mon = length(grep("Mon", weekday)) / day_length,
Tue = length(grep("Tue", weekday)) / day_length,
Wed = length(grep("Wed", weekday)) / day_length,
Thu = length(grep("Thu", weekday)) / day_length,
Fri = length(grep("Fri", weekday)) / day_length,
Sat = length(grep("Sat", weekday)) / day_length,
Sun = length(grep("Sun", weekday)) / day_length)
#have more than 14 days of data but incomplete weeks, like 3 mondays 1 sunday etc
#subset to people with >14 days, and at least 2 complete weeks
#order a matrix by monday-sunday1 monday-sunday2
weekday_check <- weekday_check %>%
rowwise() %>%
mutate(bad_sub = ifelse(Mon < 2 | Tue < 2 | Wed < 2 | Thu < 2 | Fri < 2 | Sat < 2 | Sun < 2, 1, 0)) %>%
ungroup()
good_sub <- weekday_check %>% filter(bad_sub == 0)
act10_2weeks <- act10_2weeks %>% filter(ID %in% good_sub$ID)
length(unique(act10_2weeks$ID))
act10_2weeks$weekday = dplyr::recode(act10_2weeks$weekday,
Friday = "Fri",
Saturday = "Sat",
Sunday = "Sun",
Monday = "Mon",
Tuesday = "Tue",
Wednesday = "Wed",
Thursday = "Thu")
#reformoat data in week format
subjects <- unique(act10_2weeks$ID)
for (i in 1:length(subjects)) {
sub <- subjects[[i]]
subdata <- act10_2weeks %>% filter(ID == sub)
for (wd in c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")) {
allweekday <- subdata %>% filter(weekday == wd)
weekdaychar <- unique(allweekday$weekday_i)
week1 <- allweekday %>% filter(weekday_i == weekdaychar[[1]])
week2 <- allweekday %>% filter(weekday_i == weekdaychar[[2]])
if (wd == "Mon") {
fullweek1 <- week1
fullweek2 <- week2
}
else {
fullweek1 <- rbind(fullweek1, week1)
fullweek2 <- rbind(fullweek2, week2)
}
}
#append data
if (i == 1) {
act10_week1 <- fullweek1
act10_week2 <- fullweek2
}
else {
act10_week1 <- rbind(act10_week1, fullweek1)
act10_week2 <- rbind(act10_week2, fullweek2)
}
message(paste0(subjects[[i]], " is finished ..."))
}
#act10_week1 <- act10_week1 %>%
#rename(ENMO_week1 = ENMO)
#act10_week2 <- act10_week2 %>%
#rename(ENMO_week2 = ENMO)
#act10_2weeks <- act10_week1 %>%
#select(ID, weekday, minute, ENMO_week1) %>%
#mutate(ENMO_week2 = act10_week2$ENMO_week2)
#keep dataset in long form
act10_week1 <- act10_week1 %>%
mutate(week = 1)
act10_week2 <- act10_week2 %>%
mutate(week = 2)
act10_2weeks <- rbind(act10_week1, act10_week2)
act10_2weeks <<- act10_2weeks %>%
mutate(weekday = factor(weekday, ordered = TRUE,
levels = c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"))) %>% ungroup()
#act10_week1_wide <<- act10_week1 %>%
# select(ID, weekday, minute, ENMO) %>%
# spread(key = ID, value = ENMO)
#act10_week2_wide <<- act10_week2 %>%
# select(ID, weekday, minute, ENMO) %>%
# spread(key = ID, value = ENMO)
}
function(weekdata, var, resolution, timeofday = F) {
message('generating datasets for Method 1 ...')
#weekdata <- weekdata %>% gather(var_week1, var_week2, key = week, value = var)
#datasets already in long format
print(var)
var = as.symbol(var)
#Weekday (Tues/Wed)
weekdata_WD <- weekdata %>% filter(weekday %in% c('Tue', 'Wed'))
#Weekends (Sat/Sun)
weekdata_WE <- weekdata %>% filter(weekday %in% c('Sat', 'Sun'))
#Weekdays (5days)
weekdata_WD5 <- weekdata %>% filter(weekday %in% c('Mon', 'Tue', 'Wed', 'Thu', 'Fri'))
#Method2
message('generating datasets for Method 2 ...')
#Full Week
weekdata_mean <- weekdata %>%
group_by(ID, week, minute) %>%
dplyr::summarise(!!var := mean(!!var)) %>%
ungroup()
#Weekday (Tues/Wed)
weekdata_mean_WD <- weekdata_WD %>%
group_by(ID, week, minute) %>%
summarise(!!var := mean(!!var)) %>%
ungroup()
#Weekends (Sat/Sun)
weekdata_mean_WE <- weekdata_WE %>%
group_by(ID, week, minute) %>%
summarise(!!var := mean(!!var)) %>%
ungroup()
#Weekdays (5 days)
weekdata_mean_WD5 <- weekdata_WD5 %>%
group_by(ID, week, minute) %>%
summarise(!!var := mean(!!var)) %>%
ungroup()
#Method2
message('generating datasets for Method 2 ...')
#Full Week
weekdata_mean <- weekdata %>%
group_by(ID, week, minute) %>%
summarise(!!var := mean(!!var)) %>%
ungroup()
#Weekday (Tues/Wed)
weekdata_mean_WD <- weekdata_WD %>%
group_by(ID, week, minute) %>%
summarise(!!var := mean(!!var)) %>%
ungroup()
#Weekends (Sat/Sun)
weekdata_mean_WE <- weekdata_WE %>%
group_by(ID, week, minute) %>%
summarise(!!var := mean(!!var)) %>%
ungroup()
#Weekdays (5 days)
weekdata_mean_WD5 <- weekdata_WD5 %>%
group_by(ID, week, minute) %>%
summarise(!!var := mean(!!var)) %>%
ungroup()
#Method3
message('generating datasets for Method 3 ...')
#Full Week
weekdata_daily <- weekdata %>%
mutate(ID_day = paste0(ID, "_", weekday)) %>%
group_by(ID_day, minute) %>%
summarise(ID = ID[[1]],
weekday = weekday[[1]],
!!var := mean(!!var)) %>%
ungroup() %>%
select(-ID_day)
#Weekday (Tues/Wed)
weekdata_daily_WD <- weekdata_WD %>%
mutate(ID_day = paste0(ID, "_", weekday)) %>%
group_by(ID_day, minute) %>%
summarise(ID = ID[[1]],
weekday = weekday[[1]],
!!var := mean(!!var)) %>%
ungroup() %>%
select(-ID_day)
#Weekends (Sat/Sun)
weekdata_daily_WE <- weekdata_WE %>%
mutate(ID_day = paste0(ID, "_", weekday)) %>%
group_by(ID_day, minute) %>%
summarise(ID = ID[[1]],
weekday = weekday[[1]],
!!var := mean(!!var)) %>%
ungroup() %>%
select(-ID_day)
#Weekday (5 days)
weekdata_daily_WD5 <- weekdata_WD5 %>%
mutate(ID_day = paste0(ID, "_", weekday)) %>%
group_by(ID_day, minute) %>%
summarise(ID = ID[[1]],
weekday = weekday[[1]],
!!var := mean(!!var)) %>%
ungroup() %>%
select(-ID_day)
###Calculate ICC
message('calculating ICC scores')
f1 = as.formula(paste0(var, "~ 1 + (1 | ID)"))
#Method1
if (timeofday) {
#Full week
message('running models for Method1 full week')
m1_1 <- weekdata %>%
group_by(minute) %>%
do(fit = lmer(f1, data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_block',
type = 'All days',
resolution = resolution)
#Weekday
message('running models for Method1 weekday')
m1_2 <- weekdata_WD %>%
group_by(minute) %>%
do(fit = lmer(f1, data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_block',
type = 'Tue/Wed',
resolution = resolution)
#Weekend
message('running models for Method1 weekend')
m1_3 <- weekdata_WE %>%
group_by(minute) %>%
do(fit = lmer(f1, data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_block',
type = 'Sat/Sun',
resolution = resolution)
#Weekday (5 days)
message('running models for Method1 weekday (5 days)')
m1_4 <- weekdata_WD5 %>%
group_by(minute) %>%
do(fit = lmer(f1, data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_block',
type = 'Weekdays',
resolution = resolution)
#Method2
#Full week
message('running model for Method2 full week')
m2_1 <- weekdata_mean %>%
group_by(minute) %>%
do(fit = lmer(f1, data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_mean',
type = 'All days',
resolution = resolution)
#Weekday
message('running model for Method2 weekday')
m2_2 <- weekdata_mean_WD %>%
group_by(minute) %>%
do(fit = lmer(f1, data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_mean',
type = 'Tue/Wed',
resolution = resolution)
#Weekend
message('running model for Method2 weekend')
m2_3 <- weekdata_mean_WE %>%
group_by(minute) %>%
do(fit = lmer(f1, data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_mean',
type = 'Sat/Sun',
resolution = resolution)
message('running model for Method2 weekday (5 days)')
m2_4 <- weekdata_mean_WD5 %>%
group_by(minute) %>%
do(fit = lmer(f1, data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_mean',
type = 'Weekdays',
resolution = resolution)
#Method3
#Full week
message('running model for Method3 full week')
m3_1 <- weekdata_daily %>%
group_by(minute) %>%
do(fit = lmer(f1, data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'separate_days',
type = 'All days',
resolution = resolution)
#Weekday
message('running model for Method3 weekday')
m3_2 <- weekdata_daily_WD %>%
group_by(minute) %>%
do(fit = lmer(f1, data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'separate_days',
type = 'Tue/Wed',
resolution = resolution)
#Weekend
message('running model for Method3 weekend')
m3_3 <- weekdata_daily_WE %>%
group_by(minute) %>%
do(fit = lmer(f1, data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'separate_days',
type = 'Sat/Sun',
resolution = resolution)
message('running model for Method3 weekday (5days)')
m3_4 <- weekdata_daily_WD5 %>%
group_by(minute) %>%
do(fit = lmer(f1, data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'separate_days',
type = 'Weekdays',
resolution = resolution)
result <- bind_rows(m1_1, m1_2, m1_3, m1_4, m2_1, m2_2, m2_3, m2_4, m3_1, m3_2, m3_3, m3_4)
result <- result %>% mutate(variable = as.character(var))
return(result)
}
else {
#Full week
message('running models for Method1 full week')
m1_1 <- weekdata %>%
do(fit = lmer(f1, data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_block',
type = 'All days',
resolution = resolution)
#Weekday
message('running models for Method1 weekday')
m1_2 <- weekdata_WD %>%
do(fit = lmer(f1, data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_block',
type = 'Tue/Wed',
resolution = resolution)
#Weekend
message('running models for Method1 weekend')
m1_3 <- weekdata_WE %>%
do(fit = lmer(f1, data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_block',
type = 'Sat/Sun',
resolution = resolution)
#Weekday (5days)
message('running models for Method1 weekday (5days)')
m1_4 <- weekdata_WD5 %>%
do(fit = lmer(f1, data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_block',
type = 'Weekdays',
resolution = resolution)
#Method2
#Full week
message('running model for Method2 full week')
m2_1 <- weekdata_mean %>%
do(fit = lmer(f1, data = .)) %>%
#ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_mean',
type = 'All days',
resolution = resolution)
#Weekday
message('running model for Method2 weekday')
m2_2 <- weekdata_mean_WD %>%
do(fit = lmer(f1, data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_mean',
type = 'Tue/Wed',
resolution = resolution)
#Weekend
message('running model for Method2 weekend')
m2_3 <- weekdata_mean_WE %>%
do(fit = lmer(f1, data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_mean',
type = 'Sat/Sun',
resolution = resolution)
#Weekday (5days)
message('running model for Method2 weekday (5days)')
m2_4 <- weekdata_mean_WD5 %>%
do(fit = lmer(f1, data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'week_mean',
type = 'Weekdays',
resolution = resolution)
#Method3
#Full week
message('running model for Method3 full week')
m3_1 <- weekdata_daily %>%
do(fit = lmer(f1, data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'separate_days',
type = 'All days',
resolution = resolution)
#Weekday
message('running model for Method3 weekday')
m3_2 <- weekdata_daily_WD %>%
do(fit = lmer(f1, data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'separate_days',
type = 'Tue/Wed',
resolution = resolution)
#Weekend
message('running model for Method3 weekend')
m3_3 <- weekdata_daily_WE %>%
do(fit = lmer(f1, data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'separate_days',
type = 'Sat/Sun',
resolution = resolution)
#Weekday (5days)
message('running model for Method3 weekday (5days)')
m3_4 <- weekdata_daily_WD5 %>%
do(fit = lmer(f1, data = .)) %>%
ungroup() %>%
mutate(ICC = map_dbl(fit, ~icc(.)[[1]]),
var_between = map_dbl(fit, ~get_variance(.)[['var.random']]),
var_within = map_dbl(fit, ~get_variance(.)[['var.residual']])
) %>%
select(-fit) %>%
mutate(method = 'separate_days',
type = 'Weekdays',
resolution = resolution)
result <- bind_rows(m1_1, m1_2, m1_3, m1_4, m2_1, m2_2, m2_3, m2_4, m3_1, m3_2, m3_3, m3_4)
result <- result %>% mutate(variable = as.character(var))
return(result)
}
}
#create datasets for each GGIR variable
two_week_datasets_GGIR <- map(varnames, ~create_2week_datasets_ICC_multi(part7select, var = ., day_length = 1))
#save datasets
save(two_week_datasets_GGIR, file = 'datasets_log/2week_ICC/two_week_datasets_GGIR.RData')
load('datasets_log/2week_ICC/two_week_datasets_GGIR.RData')
#run ICC
results_mean_tod_GGIR <- map2(two_week_datasets_GGIR, varnames,
~ICC_3methods_WE_WD_multi(weekdata = .x, var = .y, 1440, timeofday = F))
ICC_GGIR <- do.call(rbind, results_mean_tod_GGIR)
ICC_GGIR_week_mean <- ICC_GGIR %>% filter(method == "week_mean")
ICC_GGIR_week_mean <- ICC_GGIR_week_mean %>%
group_by(type) %>%
arrange(ICC, .by_group = T)
arranged_names <- unique(ICC_GGIR_week_mean$variable)
#ICC_GGIR_week_mean <- ICC_GGIR_week_mean %>% filter(variable %in% arranged_names[4:12])
ICC_GGIR_week_mean$variable <- factor(ICC_GGIR_week_mean$variable, levels = unique(ICC_GGIR_week_mean$variable))
ICC_GGIR_week_mean$variable <- fct_rev(ICC_GGIR_week_mean$variable)
ggplot(data = ICC_GGIR_week_mean, aes(x = variable, y = ICC, color = type, group = type)) +
geom_line(linetype = 2) +
geom_point() +
scale_y_continuous(breaks = seq(0.1, 1, 0.1), limits = c(0,1)) +
scale_x_discrete(labels = c('MVPA_dur', 'TAC', 'slp_eff', 'mod_dur', 'vig_dur', 'sed_dur', 'SATP', 'RA', 'light_dur', 'ASTP', 'slp_dur', 'TLAC', 'slp_midpoint')) +
theme_minimal(base_size = 14) +
theme(panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
legend.title = element_blank(),
axis.text.x = element_text(angle = 45)) +
xlab('') +
ggtitle('ICC of day level summary measures')
ggsave(filename = 'plots/fig6_B.png', height = 5, width = 8)
Figure 6B: Reliability results of week 1 vs week 2 test/retest for day-level summary metrics and different resolution ENMO data. Prior to analysis, the sample was subsetted to participants with at least two full weeks with greater than 95% wear (n=354). All plots are grouped by all days, weekdays, Tues/Wed, Sat/Sun. ICC of day-level summary metrics averaged across the days of the week for each subset.
ICC_7_day_combo_GGIR <- function(dataset, combo_num, varname) {
weekdays <- c('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun')
week1 <- combn(weekdays, combo_num)
week2 <- combn(weekdays, combo_num)
week1 = split(week1, rep(1:ncol(week1), each = nrow(week1)))
week2 = split(week2, rep(1:ncol(week2), each = nrow(week2)))
combo_data <- tibble(week1 = week1, week2 = week2, ICC = NA)
combo_data <- combo_data %>%
complete(week1, week2) %>%
mutate(days = combo_num,
var = varname)
#now iterate through each row of combo data
combo_data$ICC <- map2(.x = combo_data$week1, .y = combo_data$week2, ~ICC_by_row_GGIR(dataset, varname, .x, .y))
return(combo_data)
}
ICC_by_row_GGIR <- function(dataset, varname, week1, week2) {
var = as.symbol(varname)
rowdata <- dataset
rowdata1 <- rowdata %>% filter(week == 1 & weekday %in% week1)
rowdata2 <- rowdata %>% filter(week == 2 & weekday %in% week2)
rowdata <- rbind(rowdata1, rowdata2)
#take average of all days in the week
rowdata <- rowdata %>%
group_by(ID, week, minute) %>%
summarise(!!var := mean(!!var)) %>%
ungroup()
f1 = as.formula(paste0(var, "~ 1 + (1 | ID)"))
fit <- lmer(f1, data = rowdata)
message(paste0(varname, ": ", unlist(week1), " vs ", unlist(week2)))
return(icc(fit)[[1]])
}
#7 day ICC plot for 1440 resolution acceleration data
ICC_7_day_combo_results <- ICC_7_day_combo_GGIR_results[[1]]
weekdays <- c('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun')
ICC_7_day_combo_results <- ICC_7_day_combo_results %>%
mutate(ICC = as.numeric(ICC),
days = as.numeric(days),
week1 = factor(week1, levels = weekdays),
week2 = factor(week2, levels = weekdays))
#density plot
ggplot(data = ICC_7_day_combo_results %>% mutate(days = as.factor(days)), aes(x = ICC, color = days, group = days, fill = days)) +
geom_density(alpha = 0.2) +
#geom_boxplot(width = 0.5) +
#scale_y_continuous(breaks = seq(0.2, 1, 0.1)) +
#scale_x_discrete(labels = c('acc_day', 'dur_vig','dur_mod', 'M5_val', 'dur_lig', 'dur_in', 'slp_onset', 'slp_dur','slp_eff')) +
theme_minimal(base_size = 12) +
labs(fill = 'Days per week') +
scale_color_discrete(guide = F) +
ggtitle('ICC of day level TAC')
ggsave(filename = 'plots/7days_density.png', height = 4, width = 6)
#violin plot
ggplot(data = ICC_7_day_combo_results, aes(y = ICC, x = days, group = NULL)) +
stat_summary(fun = mean, geom="point",
position=position_dodge(width=0.75), size = 0.75, color = '#00BFC4') +
#stat_summary(fun.data = mean_se, geom="errorbar",
# position=position_dodge(width=0.75), size = 0.25) +
geom_violin(aes(group = days), fill = '#00BFC4', color = '#00BFC4', alpha = 0.1) +
stat_summary(fun = mean, geom="line", linetype = 2, color = '#00BFC4') +
#geom_boxplot(width = 0.5) +
#scale_y_continuous(breaks = seq(0.2, 1, 0.1)) +
scale_x_continuous(breaks = c(1:7)) +
theme_minimal(base_size = 14) +
theme(legend.position = 'none') +
xlab('Days per week') +
ggtitle('ICC of day level TAC')
ggsave(filename = 'plots/fig_6A.png', height = 4, width = 6)
Figure 6A: ICC of day level mean ENMO as a function of the number of days used in test/retest. The distributions show ICC calculated for all possible combinations of weekdays for the number of days used.
#GGIR features
ICC_7_day_combo_GGIR_results <- do.call('rbind', ICC_7_day_combo_GGIR_results)
ICC_7_day_combo_GGIR_results <- ICC_7_day_combo_GGIR_results %>% mutate(ICC = as.numeric(ICC))
ggir_mean <- ICC_7_day_combo_GGIR_results %>%
group_by(var) %>%
summarise(ICC = mean(ICC, na.rm = T)) %>%
arrange(desc(ICC))
mean_order = ggir_mean$var
ICC_7_day_combo_GGIR_results$var <- factor(ICC_7_day_combo_GGIR_results$var, levels = mean_order)
#facet violin plot
ICC_7_day_combo_GGIR_results <- ICC_7_day_combo_GGIR_results %>%
mutate(var = factor(var, levels = rev(arranged_names)))
#density plot
ggplot(data = ICC_7_day_combo_GGIR_results %>% mutate(days = as.factor(days)), aes(x = ICC, color = days, fill = days)) +
geom_density(alpha = 0.2) +
#geom_boxplot(width = 0.5) +
#scale_y_continuous(breaks = seq(0.2, 1, 0.1)) +
scale_x_continuous(breaks = seq(0,1,0.2), limits = c(0,1)) +
theme_minimal(base_size = 8) +
labs(fill = 'Days per week') +
scale_color_discrete(guide = F) +
ggtitle('ICC of day level GGIR summaries') +
facet_wrap(~var)
ggsave(filename = 'plots/7days_all_features_density.png', height = 5, width = 10)
#violin
ggplot(data = ICC_7_day_combo_GGIR_results, aes(x = days, y = ICC, color = days, fill = days)) +
stat_summary(fun = mean, geom="point",
position=position_dodge(width=0.75), size = 0.2) +
geom_violin(aes(group = days), alpha = 0.3, size = 0.3) +
#geom_boxplot(width = 0.5) +
scale_x_continuous(breaks = 1:7) +
scale_y_continuous(breaks = seq(0,1,0.2), limits = c(0,1)) +
theme_minimal(base_size = 8) +
labs(fill = 'Days per week') +
scale_color_continuous(guide = F) +
scale_fill_continuous(guide = F) +
ggtitle('ICC of day level GGIR summaries') +
facet_wrap(~var)
ggsave(filename = 'plots/7days_all_features_violin.png', height = 5, width = 8)
#line plot
ggplot(data = ICC_7_day_combo_GGIR_results, aes(y = ICC, x = days, color = var)) +
stat_summary(fun = mean, geom="point", size = 0.75) +
#stat_summary(fun.data = mean_se, geom="errorbar",
# position=position_dodge(width=0.75), size = 0.25) +
#geom_violin(aes(group = days, fill = as.factor(days), color = as.factor(days)), alpha = 0.1) +
stat_summary(fun = mean, geom="line", linetype = 2) +
#geom_boxplot(width = 0.5) +
#scale_y_continuous(breaks = seq(0.2, 1, 0.1)) +
scale_x_continuous(breaks = c(1:7)) +
scale_y_continuous(breaks = seq(0,1,0.1)) +
theme_minimal(base_size = 14) +
#theme(legend.position = 'none') +
xlab('Days per week') +
ggtitle('ICC of day level GGIR summaries')
ggsave(filename = 'plots/7days_all_features_line.png', height = 5, width = 8)
These are variables from postGGIR output, part7b_HBN_all_features_2_dayclean.csv.
ggplot(part7_daysummary, aes(x = SleepDurationInSpt)) +
geom_histogram(bins = 100, color = 'blue', fill = 'blue', alpha = 0.5)
ggsave('plots/postGGIR_sleepdur.png', height = 4, width = 6)
ggplot(part7_daysummary, aes(x = sleep_efficiency)) +
geom_histogram(bins = 100, color = 'blue', fill = 'blue', alpha = 0.5)
ggsave('plots/postGGIR_sleepeff.png', height = 4, width = 6)