library(gamlss)
library(ggsci)
library(viridis)
library(car)
library(sjPlot)
library(mosaic)
library(tidyverse)

Data Prep

#get physical activity person summaries from GGIR part 5
part5 <- read_csv('GGIR_summaries/part5_personsummary_WW_L50M125V500_T5A5.csv')
plotdata <- part5 %>% dplyr::select(ID, ACC_day_mg_pla, ACC_spt_mg_pla, ACC_day_spt_mg_pla, dur_day_total_IN_min_pla, dur_day_total_LIG_min_pla,
                             dur_day_total_MOD_min_pla, dur_day_total_VIG_min_pla)

Load in GGIR Part5 subject level summaries, picked out these physical activity variables of interest:

Variable name Description
ACC_day_mg_pla Average acceleration for activity outside sleep window across all days, calculated from 5s epoch data
ACC_spt_mg_pla Average acceleration for activity within sleep window across all days, calculated from 5s epoch data
ACC_day_spt_mg_pla Average acceleration across entire period of wear across all days, calculated from 5s epoch data
dur_day_total_IN_min_pla Average duration spent in inactivity (<50mg) per day outside of sleep window
dur_day_total_LIG_min_pla Average duration spent in light activity (50-125mg) per day outside of sleep window
dur_day_total_MOD_min_pla Average duration spent in moderate activity (125-500mg) per day outside of sleep window
dur_day_total_VIG_min_pla Average duration spent in vigorous activity (>500mg) within a day outside of sleep window


Make MVPA duration variable by adding moderate and vigorous duration variables:

#create MVPA duration variable
plotdata <- plotdata %>% mutate(dur_day_total_MVPA = dur_day_total_MOD_min_pla + dur_day_total_VIG_min_pla)

Load and join basic phenotypic variables and pubertal status:

#get age and sex from HBN demo data
demo <- read_csv('pheno/Basic_Demos.csv')
demo <- demo %>% select(EID, Age, Sex) %>% rename(ID = EID)

#get pubertal status from male and female datasets
pubertal_m <- read_csv('pheno/PPS_M_20200814.csv', na = c('NULL'))
pubertal_f <- read_csv('pheno/PPS_F_20200814.csv', na = c('NULL'))

pubertal <- bind_rows(pubertal_m, pubertal_f)
pubertal <- pubertal %>% rename(ID = EID)

#join age, sex, pubertal status to activity data
plotdata <- left_join(plotdata, demo)
plotdata <- left_join(plotdata, pubertal)

Checking sample sizes:

909 subjects passed GGIR part5.

missing_demo <- plotdata %>% filter(is.na(Sex))
plotdata <- filter(plotdata, ! ID %in% missing_demo$ID)

866 of these subjects have sex and age variables.

missing_puberty <- plotdata %>% filter(is.na(PPS_M_Score) & is.na(PPS_F_Score))
plotdata <- filter(plotdata, ! ID %in% missing_puberty$ID)

694 of these subjects have sex, age, and pubertal status variables.

#recode sex variable into character
plotdata <- plotdata %>%
  mutate(
    Sex = as.factor(Sex),
    Sex = recode(Sex, `0` = 'Male', `1` = 'Female'))

Checking age and sex in sample:

table(plotdata$Sex)
## 
##   Male Female 
##    439    255
#554M 313F (63.9% male)

summary(plotdata$Age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   5.036   7.746   9.836  10.456  12.737  21.419
#Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#5.036   7.535   9.510  10.293  12.481  21.482 

Subset data to subjects who have at least 3 days of 95% wear:

#load in 3 days of 95% wear data
act10 <- read_csv('HBN_actigraphy_600s_95wear_3day.csv')
goodIDs <- unique(act10$ID)

#filter to subjects with at least 3 days of 95% wear
plotdata <- filter(plotdata, ID %in% goodIDs)

519 of subjects have sex, age, and pubertal status, and 3 days of 95% wear. This sample is used for all further analysis shown below.

Check how age and sex compares in this sample.

table(plotdata$Sex)
## 
##   Male Female 
##    329    190
#329M #190F (63.4% male)

summary(plotdata$Age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   5.036   7.683   9.516  10.275  12.427  21.419
# Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#5.036   7.683   9.516  10.275  12.427  21.419 

Recode pubertal status (males)

Following guidelines on data dictionary:

  • 3=Prepubertal
  • 4,5=Beginning Pubertal
  • 6,7,8=Mid-Pubertal
  • 9,10,11=Advanced Pubertal
  • 12=Postpubertal


Females are already on a 1-5 scale.

plotdata <- plotdata %>%
  mutate(PPS_M_Score = ifelse(PPS_M_Score == 3, 1, 
                              ifelse(PPS_M_Score %in% c(4,5), 2,
                                     ifelse(PPS_M_Score %in% c(6,7,8), 3,
                                            ifelse(PPS_M_Score %in% c(9,10, 11), 4,
                                                   ifelse(PPS_M_Score == 12, 5, NA))))),
         PPS_Score_Combined = ifelse(is.na(PPS_M_Score), PPS_F_Score, PPS_M_Score))

mean activity count (mAC) variable

Made a mistake earlier, I defined mLAC as log(1 + ACC_day_mg_pla). However, log(mean(X)) =/= mean(log(X)). Can’t recover mean(log(X)) from mean(X).

ACC_day_mg_pla (renamed as mAC for “mean activity count”) is already normally distributed, no need to log transform.

hist(plotdata$ACC_day_mg_pla)

Some notes about choice of activity count variable:

TLAC value depends on the epoch the person uses to sum. So if he uses 5 second means, the value would be 12 times as great as if he used 60s means.

Note however, that mean AC calculated for the whole day using 5 second and 60 second epochs would be the same.

Outliers

Function to identify outliers based on median +/- threshold * IQR

find_outliers <- function(x, threshold = 2.22) {
  
  qtiles = summary(x)
  
  median = qtiles[[3]]
  iqr = qtiles[[5]] - qtiles[[2]]
  
  print(paste0('median = ', median))
  print(paste0('iqr = ', iqr))
  
  lb <- median - threshold * iqr
  ub <- median + threshold * iqr
  
  print(paste0('lowerbound = ', lb, ', upperbound = ', ub))
  
  return(ifelse(x > ub | x < lb, 1, 0))
  
  
}
mAC

Remove outliers for mAC based on median +/- 3 * IQR:

age_mAC <- plotdata %>% 
  dplyr::select(ID, Sex, Age, ACC_day_mg_pla, PPS_M_Score, PPS_F_Score, PPS_Score_Combined) %>%
  mutate(mAC = ACC_day_mg_pla,
         mAC_outliers = find_outliers(mAC, 3))
## [1] "median = 57.8642203"
## [1] "iqr = 27.5556727"
## [1] "lowerbound = -24.8027978, upperbound = 140.5312384"
age_mAC_outliers <- age_mAC %>% filter(mAC_outliers == 1)
age_mAC_cleaned <- age_mAC %>% filter(mAC_outliers == 0)

Found and removed 1 outlier.

Exploratory Plots

Distributions

mAC
ggplot(data = age_mAC_cleaned, aes(mAC, fill = Sex, color = Sex)) +
  geom_histogram(aes(y=..density..), alpha = 0.5) +
  geom_density(linetype = 2, alpha = 0.5) +
  theme_minimal(base_size = 12) +
  scale_fill_manual(values = c("#0D0887FF", "#7E03A8FF")) +
  scale_color_manual(values = c("#0D0887FF", "#7E03A8FF")) +
  ggtitle('mAC Distributions') +
  facet_wrap(vars(Sex)) 

Age
ggplot(data = age_mAC_cleaned, aes(Age, fill = Sex, color = Sex)) +
  geom_histogram(aes(y=..density..), alpha = 0.5) +
  geom_density(linetype = 2, alpha = 0.5) +
  theme_minimal(base_size = 12) +
  scale_fill_manual(values = c("#0D0887FF", "#7E03A8FF")) +
  scale_color_manual(values = c("#0D0887FF", "#7E03A8FF")) +
  ggtitle('Age Distributions') +
  facet_wrap(vars(Sex)) 

PPS score
ggplot(data = age_mAC_cleaned, aes(PPS_Score_Combined, fill = Sex, color = Sex)) +
  geom_histogram(alpha = 0.5) +
  #geom_density(linetype = 2, alpha = 0.5) +
  theme_minimal(base_size = 12) +
  scale_fill_manual(values = c("#0D0887FF", "#7E03A8FF")) +
  scale_color_manual(values = c("#0D0887FF", "#7E03A8FF")) +
  ggtitle('PPS Distributions') +
  facet_wrap(vars(Sex), scales = 'free_y') 

Age vs PPS score

Violin with quantiles
age_pps_lines <- age_mAC_cleaned %>%
  group_by(Sex, PPS_Score_Combined) %>%
  summarise(`5` = quantile(Age)[[1]],
            `25` = quantile(Age)[[2]],
            `50` = quantile(Age)[[3]],
            `75` = quantile(Age)[[4]],
            `95` = quantile(Age)[[5]],
            )
age_pps_lines <- age_pps_lines %>% 
  tidyr::gather(3:7, key = Percentile, value = Age) %>%
  mutate(Percentile = factor(Percentile, ordered = T, levels = c(5,25,50,75,95)))

ggplot(data = age_mAC_cleaned, aes(x = as.character(PPS_Score_Combined), y = Age, fill = Sex, color = Sex)) +
  geom_violin(alpha = 0.5) +
  geom_boxplot(width = 0.25, alpha = 0.7) +
  geom_line(data = age_pps_lines, aes(x = PPS_Score_Combined, y = Age, linetype = Percentile, group = Percentile), color = 'black') +
  theme_minimal(base_size = 12) +
  scale_fill_manual(values = c("#0D0887FF", "#7E03A8FF")) +
  scale_color_manual(values = c("#0D0887FF", "#7E03A8FF")) +
  ggtitle('PPS vs Age') +
  facet_wrap(vars(Sex), scales = 'free_y') +
  xlab('PPS_Score_Combined')

GAMLSS
#function to run LMS regression, then extract and append fitted percentile values into current dataframe
tidy_cent <- function(data, xvar, yvar, cent) {
  
  f1 = as.formula(paste0(yvar, "~pb(", xvar, ")"))
  f2 = as.formula(paste0("~pb(", xvar, ")"))
  
  #run lms.bct quantile regression
  model = gamlss(f1, sigma.formula = f2, family=BCT, data=data) 
  
  xvar = data[[xvar]]
  oxvar = xvar[order(xvar)]
  
  #extract fitted values for each quantile
  #qtiles <- cent %>% map_dfc(setNames, object = list(numeric()))
  qtiles = vector("list", length(cent))
  names(qtiles) = cent
  
  for (i in cent) {
    
    qfit = qBCT(i/100, 
                mu = fitted(model, "mu")[order(xvar)], 
                sigma = fitted(model, "sigma")[order(xvar)], 
                nu = fitted(model, "nu")[order(xvar)], 
                tau = fitted(model, "tau")[order(xvar)])
    
    qtiles[[as.character(i)]] <- qfit
    
  
  }
  
  qtile_df = bind_cols(qtiles)
  
  ID_df = tibble(ID = data$ID[order(xvar)])
  
  #dataframe with fitted quantile values
  result = cbind(ID_df, qtile_df)
  
  #append to current dataframe
  return(left_join(data, result, by = 'ID'))
  
}


age_PPS <- age_mAC_cleaned %>% select(ID, Sex, Age, PPS_Score_Combined)

age_PPS_male <- tidy_cent(filter(age_PPS, Sex == 'Male'), xvar = "PPS_Score_Combined", yvar = "Age", cent = c(5, 10, 25, 50, 75, 90, 95))
## GAMLSS-RS iteration 1: Global Deviance = 1354.898 
## GAMLSS-RS iteration 2: Global Deviance = 1349.733 
## GAMLSS-RS iteration 3: Global Deviance = 1348.92 
## GAMLSS-RS iteration 4: Global Deviance = 1348.809 
## GAMLSS-RS iteration 5: Global Deviance = 1348.795 
## GAMLSS-RS iteration 6: Global Deviance = 1348.79 
## GAMLSS-RS iteration 7: Global Deviance = 1348.79
age_PPS_female <- tidy_cent(filter(age_PPS, Sex == 'Female'), xvar = "PPS_Score_Combined", yvar = "Age", cent = c(5, 10, 25, 50, 75, 90, 95))
## GAMLSS-RS iteration 1: Global Deviance = 749.5505 
## GAMLSS-RS iteration 2: Global Deviance = 749.0072 
## GAMLSS-RS iteration 3: Global Deviance = 749.1831 
## GAMLSS-RS iteration 4: Global Deviance = 749.4601 
## GAMLSS-RS iteration 5: Global Deviance = 749.705 
## GAMLSS-RS iteration 6: Global Deviance = 749.9052 
## GAMLSS-RS iteration 7: Global Deviance = 750.0645 
## GAMLSS-RS iteration 8: Global Deviance = 750.1731 
## GAMLSS-RS iteration 9: Global Deviance = 750.1927 
## GAMLSS-RS iteration 10: Global Deviance = 750.1948 
## GAMLSS-RS iteration 11: Global Deviance = 750.1952
age_PPS_long <- rbind(age_PPS_male, age_PPS_female) %>%
  select(-Age) %>%
  gather(4:10, key = percentile, value = Age) %>%
  mutate(percentile = factor(percentile, levels = c(5, 10, 25, 50, 75, 90, 95)))

ggplot() + 
  geom_point(data = age_PPS, aes(x = PPS_Score_Combined, y = Age, color = Sex), position = position_jitter(), alpha = 0.5, size = 1) +
  geom_line(data = age_PPS_long, aes(x = PPS_Score_Combined, y = Age, color = Sex, linetype = percentile), size = 0.7) +
  theme_minimal(base_size = 12) +
  guides(colour = guide_legend(reverse=T),
         linetype = guide_legend(reverse=T)) +
  scale_color_manual(values = c("#0D0887FF", "#7E03A8FF")) +
  facet_wrap(~Sex, scales = 'free_x') +
  ggtitle('PPS vs Age') 

Help!!

Loess Smoother

Trying another smoother.

ggplot(data = age_mAC_cleaned, aes(x = PPS_Score_Combined, y = Age, fill = Sex, color = Sex)) +
  geom_point(position = position_jitter(), alpha = 0.5, size = 1) +
  geom_smooth(method = 'loess', formula = y~x) +
  theme_minimal(base_size = 12) +
  scale_fill_manual(values = c("#0D0887FF", "#7E03A8FF")) +
  scale_color_manual(values = c("#0D0887FF", "#7E03A8FF")) +
  ggtitle('PPS vs Age') +
  facet_wrap(vars(Sex), scales = 'free_y') 

Age vs mAC (colored by pubertal status)

age_mAC_plot <- age_mAC_cleaned %>% select(ID, Sex, Age, PPS_Score_Combined, mAC)

age_mAC_plot_male <- tidy_cent(filter(age_mAC_plot, Sex == 'Male'), xvar = "Age", yvar = "mAC", cent = c(5, 10, 25, 50, 75, 90, 95))
## GAMLSS-RS iteration 1: Global Deviance = 2737.05 
## GAMLSS-RS iteration 2: Global Deviance = 2731.531 
## GAMLSS-RS iteration 3: Global Deviance = 2730.774 
## GAMLSS-RS iteration 4: Global Deviance = 2730.805 
## GAMLSS-RS iteration 5: Global Deviance = 2730.814 
## GAMLSS-RS iteration 6: Global Deviance = 2730.815 
## GAMLSS-RS iteration 7: Global Deviance = 2730.816
age_mAC_plot_female <- tidy_cent(filter(age_mAC_plot, Sex == 'Female'), xvar = "Age", yvar = "mAC", cent = c(5, 10, 25, 50, 75, 90, 95))
## GAMLSS-RS iteration 1: Global Deviance = 1464.543 
## GAMLSS-RS iteration 2: Global Deviance = 1464.83 
## GAMLSS-RS iteration 3: Global Deviance = 1465.385 
## GAMLSS-RS iteration 4: Global Deviance = 1465.752 
## GAMLSS-RS iteration 5: Global Deviance = 1466.048 
## GAMLSS-RS iteration 6: Global Deviance = 1466.151 
## GAMLSS-RS iteration 7: Global Deviance = 1466.165 
## GAMLSS-RS iteration 8: Global Deviance = 1466.17 
## GAMLSS-RS iteration 9: Global Deviance = 1466.171 
## GAMLSS-RS iteration 10: Global Deviance = 1466.172
age_mAC_plot_long <- rbind(age_mAC_plot_male, age_mAC_plot_female) %>%
  select(-mAC) %>%
  gather(5:11, key = percentile, value = mAC) %>%
  mutate(percentile = factor(percentile, levels = c(5, 10, 25, 50, 75, 90, 95)))

age_mAC_plot <- age_mAC_plot %>%
  mutate(PPS_Score_Combined = factor(PPS_Score_Combined, ordered = T))

p1 <- ggplot() + 
  geom_point(data = age_mAC_plot, aes(x = Age, y = mAC, fill = PPS_Score_Combined), shape = 21, stroke = 0, alpha = 0.6, size = 3 ) +
  geom_line(data = age_mAC_plot_long, aes(x = Age, y = mAC, linetype = percentile), size = 0.7) +
  theme_minimal(base_size = 18) +
  guides(colour = guide_legend(reverse=T),
         linetype = guide_legend(reverse=T)) +
  scale_fill_viridis(option = 'plasma', discrete = T) +
  facet_wrap(~ Sex) +
  ggtitle('Quantile curves for age vs mAC') +
  guides(fill = guide_legend(order=1, title.theme = element_text(size = 12)),
         linetype = guide_legend(order=2,title.theme = element_text(size = 12))) +
  theme(plot.title = element_text(vjust = 4),
        plot.margin = unit(c(1,0.1,0.1,0.1), "cm"))

ggsave('plots/mAC_Age_Pubertal.png', p1, width = 12, height = 5.5)

Pubertal status vs mAC

mAC_pps_lines <- age_mAC_cleaned %>%
  group_by(Sex, PPS_Score_Combined) %>%
  summarise(`5` = quantile(mAC)[[1]],
            `25` = quantile(mAC)[[2]],
            `50` = quantile(mAC)[[3]],
            `75` = quantile(mAC)[[4]],
            `95` = quantile(mAC)[[5]],
            )
mAC_pps_lines <- mAC_pps_lines %>% 
  tidyr::gather(3:7, key = Percentile, value = mAC) %>%
  mutate(Percentile = factor(Percentile, ordered = T, levels = c(5,25,50,75,95)))

p2 <- ggplot(data = age_mAC_cleaned, aes(x = as.character(PPS_Score_Combined), y = mAC, fill = as.character(PPS_Score_Combined))) +
  geom_violin(alpha = 0.5) +
  geom_boxplot(width = 0.25, alpha = 0.7) +
  geom_line(data = mAC_pps_lines, aes(x = PPS_Score_Combined, y = mAC, linetype = Percentile, group = Percentile), color = 'black') +
  theme_minimal(base_size = 18) +
  scale_fill_viridis(option = 'plasma', discrete = T) +
  ggtitle('PPS vs mAC') +
  facet_wrap(vars(Sex)) +
  guides(fill = FALSE,
         linetype = guide_legend(order=2,title.theme = element_text(size = 12))) +
  xlab('PPS_Score_Combined')

ggsave('plots/mAC_Pubertal.png', p2, width = 12, height = 5.5)

ADHD Combined Type

Distributions
#load in consensensus diagnosis data
diagnosis <- read_csv('pheno/Diagnosis_ClinicianConsensus.csv')

#create dummy var for ADHD_c
diagnosis$ADHD_c <- ifelse(apply(diagnosis, 1, function(x) any(x %in% "ADHD-Combined Type")), 1, 0) 

#Pull vars and join with activity dataset
diagnosis <- diagnosis %>% 
  select(EID, ADHD_c) %>%
  rename(ID = EID)

age_mAC_cleaned <- left_join(age_mAC_cleaned, diagnosis)

age_mAC_cleaned %>%
  with(., table(Sex, ADHD_c)) %>%
  as_data_frame() %>%
  ggplot(aes(x = Sex, y = n, fill = ADHD_c, color = ADHD_c)) +
  geom_col(alpha = 0.5, position = 'stack') +
  #geom_density(linetype = 2, alpha = 0.5) +
  theme_minimal(base_size = 14) +
  scale_fill_manual(values = c("#0D0887FF", "#CC4678FF")) +
  scale_color_manual(values = c("#0D0887FF", "#CC4678FF")) +
  ggtitle('ADHD Combined Distribution (Sex)')

#ADHD_c by PPS

age_mAC_cleaned %>%
  with(., table(Sex, PPS_Score_Combined, ADHD_c)) %>%
  as_data_frame() %>%
  ggplot(aes(x = PPS_Score_Combined, y = n, fill = ADHD_c, color = ADHD_c)) +
  geom_col(alpha = 0.5, position = 'stack') +
  #geom_density(linetype = 2, alpha = 0.5) +
  theme_minimal(base_size = 14) +
  scale_fill_manual(values = c("#0D0887FF", "#CC4678FF")) +
  scale_color_manual(values = c("#0D0887FF", "#CC4678FF")) +
  facet_wrap(~Sex, scales = 'free') +
  ggtitle('ADHD Combined Distribution PPS')

NOTE: y-axis scale is free

by Age and Sex
age_mAC_cleaned$ADHD_c <- as.factor(age_mAC_cleaned$ADHD_c)

p3 <- ggplot(data = age_mAC_cleaned, aes(x = Age, y = mAC, fill = ADHD_c, color = ADHD_c)) +
  geom_point(shape = 21, stroke = 0, alpha = 0.4, size = 3 ) +
  #geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs")) +
  geom_smooth(method = 'loess', size = 1.5, alpha = 0.1) +
  theme_minimal(base_size = 18) +
  scale_fill_manual(values = c("#0D0887FF", "#CC4678FF")) +
  scale_color_manual(values = c("#0D0887FF", "#CC4678FF")) +
  ggtitle('ADHD_c vs Age vs Sex') +
  facet_wrap(~Sex) 

ggsave('plots/mAC_Age_ADHD_c.png', p3, width = 12, height = 5.5)

by PPS and Sex
mAC_pps_lines <- age_mAC_cleaned %>%
  group_by(Sex, ADHD_c, PPS_Score_Combined) %>%
  summarise(`5` = quantile(mAC)[[1]],
            `25` = quantile(mAC)[[2]],
            `50` = quantile(mAC)[[3]],
            `75` = quantile(mAC)[[4]],
            `95` = quantile(mAC)[[5]],
            )
mAC_pps_lines <- mAC_pps_lines %>% 
  tidyr::gather(4:8, key = Percentile, value = mAC) %>%
  mutate(Percentile = factor(Percentile, ordered = T, levels = c(5,25,50,75,95)))

p4 <- ggplot(data = age_mAC_cleaned, aes(x = as.character(PPS_Score_Combined), y = mAC, fill = ADHD_c)) +
  geom_violin(alpha = 0.5) +
  geom_boxplot(width = 0.25, alpha = 0.7) +
  geom_line(data = mAC_pps_lines, aes(x = PPS_Score_Combined, y = mAC, linetype = Percentile, group = Percentile), color = 'black') +
  theme_minimal(base_size = 18) +
  scale_fill_manual(values = c("#0D0887FF", "#CC4678FF")) +
  ggtitle('ADHD_c vs PPS vs mAC') +
  facet_grid(Sex ~ ADHD_c) +
  guides(linetype = guide_legend(order=2,title.theme = element_text(size = 12)),
         fill = guide_legend(title.theme = element_text(size = 12))) +
  xlab('PPS_Score_Combined')


ggsave('plots/mAC_PPS_ADHD_c.png', p4, width = 12, height = 8)

SWAN_Hyperactive

Distributions
swan <- read_csv('pheno/SWAN.csv')

#Pull vars and join with activity dataset
swan <- swan %>% 
  select(EID, SWAN_IN, SWAN_HY, SWAN_Total) %>%
  rename(ID = EID)

age_mAC_cleaned <- left_join(age_mAC_cleaned, swan)

#distribution by sex

ggplot(data = age_mAC_cleaned, aes(SWAN_HY, fill = Sex, color = Sex)) +
  geom_histogram(aes(y=..density..), alpha = 0.5) +
  geom_density(linetype = 2, alpha = 0.5) +
  theme_minimal(base_size = 12) +
  scale_fill_manual(values = c("#0D0887FF", "#7E03A8FF")) +
  scale_color_manual(values = c("#0D0887FF", "#7E03A8FF")) +
  ggtitle('SWAN_HY Distributions (Sex)') +
  facet_wrap(~Sex) 

p5 <- ggplot(data = age_mAC_cleaned, aes(x = Age, y = SWAN_HY, fill = Sex, color = Sex)) +
  geom_point(shape = 21, stroke = 0, alpha = 0.4, size = 3 ) +
  #geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs")) +
  geom_smooth(method = 'loess', size = 1.5, alpha = 0.1) +
  theme_minimal(base_size = 18) +
  scale_fill_manual(values = c("#0D0887FF", "#CC4678FF")) +
  scale_color_manual(values = c("#0D0887FF", "#CC4678FF")) +
  ggtitle('SWAN_HY vs Age vs Sex') +
  facet_wrap(~Sex) 

ggsave('plots/Sex_Age_SWAN_HY.png', p5, width = 12, height = 5.5)

#bin into tertiles

my_ntiles <- function (x, n = 3, digits = 3) 
  
{
  x <- as.numeric(x)
  xrank <- rank(x, na.last = TRUE, ties.method = "first")
  xrank[is.na(x)] <- NA
  size <- max(xrank, na.rm = TRUE)
  
  if(size[[1]] < n + 1) {res <- rep(NA, length(x))}
  
  else{
    
    cts <- floor(seq(0, size, length.out = (n + 1)))
    bin <- as.numeric(cut(xrank, breaks = cts))
    left <- mosaic::min(x ~ bin, na.rm = TRUE)
    right <- mosaic::max(x ~ bin, na.rm = TRUE)
    res <- factor(bin, labels = paste0("Q", mosaic::max(bin ~ bin, na.rm = T)," [", round(left, digits = digits), " to ", round(right, digits = digits), "]"), ordered = TRUE)
  }
  
  return(res)
}

age_mAC_cleaned <- age_mAC_cleaned %>% mutate(SWAN_HY_q = my_ntiles(SWAN_HY, 3, 2))

table(age_mAC_cleaned$SWAN_HY_q, age_mAC_cleaned$Sex)
##                 
##                  Male Female
##   Q1 [-3 to 0]     91     80
##   Q2 [0 to 0.67]  113     58
##   Q3 [0.67 to 3]  122     50
by Age and Sex
p6 <- ggplot(data = filter(age_mAC_cleaned, !is.na(SWAN_HY_q)), aes(x = Age, y = mAC, fill = SWAN_HY_q, color = SWAN_HY_q)) +
  geom_point(shape = 21, stroke = 0, alpha = 0.4, size = 3 ) +
  #geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs")) +
  geom_smooth(method = 'loess', size = 1.5, alpha = 0.1) +
  theme_minimal(base_size = 18) +
  scale_fill_manual(values = c("#0D0887FF", "#CC4678FF", "#FDB32FFF")) +
  scale_color_manual(values = c("#0D0887FF", "#CC4678FF", "#FDB32FFF")) +
  ggtitle('SWAN_HY vs Age vs Sex') +
  facet_wrap(~Sex) 

ggsave('plots/mAC_Age_SWAN_HY.png', p6, width = 12, height = 5)

by PPS and Sex
mAC_pps_lines <- age_mAC_cleaned %>%
  filter(!is.na(SWAN_HY_q)) %>%
  group_by(Sex, SWAN_HY_q, PPS_Score_Combined) %>%
  summarise(`5` = quantile(mAC)[[1]],
            `25` = quantile(mAC)[[2]],
            `50` = quantile(mAC)[[3]],
            `75` = quantile(mAC)[[4]],
            `95` = quantile(mAC)[[5]],
            )
mAC_pps_lines <- mAC_pps_lines %>% 
  tidyr::gather(4:8, key = Percentile, value = mAC) %>%
  mutate(Percentile = factor(Percentile, ordered = T, levels = c(5,25,50,75,95)))

p7 <- ggplot(data = filter(age_mAC_cleaned, !is.na(SWAN_HY_q)), aes(x = as.character(PPS_Score_Combined), y = mAC, fill = SWAN_HY_q)) +
  geom_violin(alpha = 0.5) +
  geom_boxplot(width = 0.25, alpha = 0.7) +
  geom_line(data = mAC_pps_lines, aes(x = PPS_Score_Combined, y = mAC, linetype = Percentile, group = Percentile), color = 'black') +
  theme_minimal(base_size = 18) +
  scale_fill_manual(values = c("#0D0887FF", "#CC4678FF", "#FDB32FFF")) +
  ggtitle('SWAN_HY vs PPS vs mAC') +
  facet_grid(Sex ~ SWAN_HY_q) +
  guides(linetype = guide_legend(order=2,title.theme = element_text(size = 12)),
         fill = guide_legend(title.theme = element_text(size = 12))) +
  xlab('PPS_Score_Combined')


ggsave('plots/mAC_PPS_SWAN_HY.png', p7, width = 12, height = 6)

Diurnal Surface Plots

Don’t know how to make these, learn about the models used to generate these coefficient surfaces.

Testing

Full Sample

#model 1

#males
age_mAC_male <- age_mAC_cleaned %>% 
  filter(Sex == 'Male') %>%
  mutate(PPS_M_Score = as.numeric(PPS_M_Score))

#mAC by age
m1 <- lm(mAC ~ Age, data = age_mAC_male)

#mAC by pps
m2 <- lm(mAC ~ PPS_M_Score, data = age_mAC_male)

#both
m3 <- lm(mAC ~ Age + PPS_M_Score, data = age_mAC_male)

vif(m3)
##         Age PPS_M_Score 
##    2.998889    2.998889
#Age PPS interaction
m4 <- lm(mAC ~ Age*PPS_M_Score, data = age_mAC_male)

vif(m4)
##             Age     PPS_M_Score Age:PPS_M_Score 
##        6.370316       17.161635       29.817177
#females
age_mAC_female <- age_mAC_cleaned %>% 
  filter(Sex == 'Female') %>%
  mutate(PPS_F_Score = as.numeric(PPS_F_Score))

#mAC by age
f1 <- lm(mAC ~ Age, data = age_mAC_female)

#mAC by pps
f2 <- lm(mAC ~ PPS_F_Score, data = age_mAC_female)

#both
f3 <- lm(mAC ~ Age + PPS_F_Score, data = age_mAC_female)

vif(f3)
##         Age PPS_F_Score 
##    3.556056    3.556056
#Age PPS interaction
f4 <- lm(mAC ~ Age*PPS_F_Score, data = age_mAC_female)

vif(f4)
##             Age     PPS_F_Score Age:PPS_F_Score 
##        10.40104        13.63360        32.00549
Males
tab_model(m1, m2, m3, m4, collapse.ci = T)
  mAC mAC mAC mAC
Predictors Estimates p Estimates p Estimates p Estimates p
(Intercept) 102.91
(97.16 – 108.66)
<0.001 80.93
(77.23 – 84.64)
<0.001 101.28
(94.95 – 107.60)
<0.001 111.31
(98.94 – 123.68)
<0.001
Age -3.95
(-4.48 – -3.43)
<0.001 -3.49
(-4.41 – -2.58)
<0.001 -4.40
(-5.73 – -3.08)
<0.001
PPS_M_Score -10.21
(-11.92 – -8.50)
<0.001 -1.68
(-4.41 – 1.05)
0.226 -7.26
(-13.77 – -0.75)
0.029
Age * PPS_M_Score 0.44
(-0.03 – 0.90)
0.064
Observations 328 328 328 328
R2 / R2 adjusted 0.400 / 0.398 0.298 / 0.296 0.402 / 0.399 0.409 / 0.403
Females
tab_model(f1, f2, f3, f4, collapse.ci = T)
  mAC mAC mAC mAC
Predictors Estimates p Estimates p Estimates p Estimates p
(Intercept) 83.88
(78.50 – 89.26)
<0.001 69.75
(65.80 – 73.71)
<0.001 83.44
(77.63 – 89.25)
<0.001 83.98
(71.08 – 96.88)
<0.001
Age -3.01
(-3.51 – -2.51)
<0.001 -2.85
(-3.79 – -1.91)
<0.001 -2.91
(-4.53 – -1.29)
<0.001
PPS_F_Score -6.78
(-8.19 – -5.37)
<0.001 -0.50
(-2.95 – 1.95)
0.688 -0.69
(-5.50 – 4.12)
0.777
Age * PPS_F_Score 0.02
(-0.39 – 0.43)
0.927
Observations 190 190 190 190
R2 / R2 adjusted 0.430 / 0.427 0.322 / 0.319 0.431 / 0.425 0.431 / 0.422

Remove age 5-10

#model 1

#males
age_mAC_male <- age_mAC_cleaned %>% 
  filter(Sex == 'Male', Age > 10) %>% 
  mutate(PPS_M_Score = as.numeric(PPS_M_Score))

#mAC by age
m1 <- lm(mAC ~ Age, data = age_mAC_male)

#mAC by pps
m2 <- lm(mAC ~ PPS_M_Score, data = age_mAC_male)

#both
m3 <- lm(mAC ~ Age + PPS_M_Score, data = age_mAC_male)

vif(m3)
##         Age PPS_M_Score 
##    2.232727    2.232727
#Age PPS interaction
m4 <- lm(mAC ~ Age*PPS_M_Score, data = age_mAC_male)

vif(m4)
##             Age     PPS_M_Score Age:PPS_M_Score 
##        10.32328        23.11808        49.29361
#females
age_mAC_female <- age_mAC_cleaned %>% 
  filter(Sex == 'Female', Age > 10) %>%
  mutate(PPS_F_Score = as.numeric(PPS_F_Score))

#mAC by age
f1 <- lm(mAC ~ Age, data = age_mAC_female)

#mAC by pps
f2 <- lm(mAC ~ PPS_F_Score, data = age_mAC_female)

#both
f3 <- lm(mAC ~ Age + PPS_F_Score, data = age_mAC_female)

vif(f3)
##         Age PPS_F_Score 
##    2.188281    2.188281
#Age PPS interaction
f4 <- lm(mAC ~ Age*PPS_F_Score, data = age_mAC_female)

vif(f4)
##             Age     PPS_F_Score Age:PPS_F_Score 
##        21.36514        34.42512        89.05771
Males
tab_model(m1, m2, m3, m4, collapse.ci = T)
  mAC mAC mAC mAC
Predictors Estimates p Estimates p Estimates p Estimates p
(Intercept) 87.96
(76.23 – 99.70)
<0.001 65.06
(59.25 – 70.87)
<0.001 83.61
(70.94 – 96.27)
<0.001 97.76
(65.47 – 130.05)
<0.001
Age -2.95
(-3.81 – -2.08)
<0.001 -2.11
(-3.39 – -0.82)
0.002 -3.27
(-6.04 – -0.50)
0.021
PPS_M_Score -6.08
(-8.08 – -4.07)
<0.001 -2.55
(-5.45 – 0.35)
0.085 -6.79
(-16.14 – 2.56)
0.154
Age * PPS_M_Score 0.34
(-0.37 – 1.04)
0.348
Observations 151 151 151 151
R2 / R2 adjusted 0.232 / 0.227 0.194 / 0.189 0.247 / 0.237 0.252 / 0.237
Females
tab_model(f1, f2, f3, f4, collapse.ci = T)
  mAC mAC mAC mAC
Predictors Estimates p Estimates p Estimates p Estimates p
(Intercept) 65.33
(52.65 – 78.01)
<0.001 52.99
(45.38 – 60.60)
<0.001 63.92
(50.85 – 76.99)
<0.001 77.82
(28.88 – 126.77)
0.002
Age -1.82
(-2.73 – -0.91)
<0.001 -1.37
(-2.72 – -0.02)
0.046 -2.55
(-6.77 – 1.67)
0.233
PPS_F_Score -3.55
(-5.59 – -1.52)
0.001 -1.34
(-4.29 – 1.61)
0.370 -4.69
(-16.45 – 7.07)
0.429
Age * PPS_F_Score 0.28
(-0.67 – 1.23)
0.559
Observations 79 79 79 79
R2 / R2 adjusted 0.171 / 0.160 0.136 / 0.124 0.180 / 0.158 0.184 / 0.151