library(gamlss)
library(ggsci)
library(viridis)
library(car)
library(sjPlot)
library(mosaic)
library(tidyverse)
#get physical activity person summaries from GGIR part 5
part5 <- read_csv('GGIR_summaries/part5_personsummary_WW_L50M125V500_T5A5.csv')
plotdata <- part5 %>% dplyr::select(ID, ACC_day_mg_pla, ACC_spt_mg_pla, ACC_day_spt_mg_pla, dur_day_total_IN_min_pla, dur_day_total_LIG_min_pla,
dur_day_total_MOD_min_pla, dur_day_total_VIG_min_pla)
Load in GGIR Part5 subject level summaries, picked out these physical activity variables of interest:
| Variable name | Description |
|---|---|
| ACC_day_mg_pla | Average acceleration for activity outside sleep window across all days, calculated from 5s epoch data |
| ACC_spt_mg_pla | Average acceleration for activity within sleep window across all days, calculated from 5s epoch data |
| ACC_day_spt_mg_pla | Average acceleration across entire period of wear across all days, calculated from 5s epoch data |
| dur_day_total_IN_min_pla | Average duration spent in inactivity (<50mg) per day outside of sleep window |
| dur_day_total_LIG_min_pla | Average duration spent in light activity (50-125mg) per day outside of sleep window |
| dur_day_total_MOD_min_pla | Average duration spent in moderate activity (125-500mg) per day outside of sleep window |
| dur_day_total_VIG_min_pla | Average duration spent in vigorous activity (>500mg) within a day outside of sleep window |
Make MVPA duration variable by adding moderate and vigorous duration variables:
#create MVPA duration variable
plotdata <- plotdata %>% mutate(dur_day_total_MVPA = dur_day_total_MOD_min_pla + dur_day_total_VIG_min_pla)
Load and join basic phenotypic variables and pubertal status:
#get age and sex from HBN demo data
demo <- read_csv('pheno/Basic_Demos.csv')
demo <- demo %>% select(EID, Age, Sex) %>% rename(ID = EID)
#get pubertal status from male and female datasets
pubertal_m <- read_csv('pheno/PPS_M_20200814.csv', na = c('NULL'))
pubertal_f <- read_csv('pheno/PPS_F_20200814.csv', na = c('NULL'))
pubertal <- bind_rows(pubertal_m, pubertal_f)
pubertal <- pubertal %>% rename(ID = EID)
#join age, sex, pubertal status to activity data
plotdata <- left_join(plotdata, demo)
plotdata <- left_join(plotdata, pubertal)
909 subjects passed GGIR part5.
missing_demo <- plotdata %>% filter(is.na(Sex))
plotdata <- filter(plotdata, ! ID %in% missing_demo$ID)
866 of these subjects have sex and age variables.
missing_puberty <- plotdata %>% filter(is.na(PPS_M_Score) & is.na(PPS_F_Score))
plotdata <- filter(plotdata, ! ID %in% missing_puberty$ID)
694 of these subjects have sex, age, and pubertal status variables.
#recode sex variable into character
plotdata <- plotdata %>%
mutate(
Sex = as.factor(Sex),
Sex = recode(Sex, `0` = 'Male', `1` = 'Female'))
Checking age and sex in sample:
table(plotdata$Sex)
##
## Male Female
## 439 255
#554M 313F (63.9% male)
summary(plotdata$Age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.036 7.746 9.836 10.456 12.737 21.419
#Min. 1st Qu. Median Mean 3rd Qu. Max.
#5.036 7.535 9.510 10.293 12.481 21.482
Subset data to subjects who have at least 3 days of 95% wear:
#load in 3 days of 95% wear data
act10 <- read_csv('HBN_actigraphy_600s_95wear_3day.csv')
goodIDs <- unique(act10$ID)
#filter to subjects with at least 3 days of 95% wear
plotdata <- filter(plotdata, ID %in% goodIDs)
519 of subjects have sex, age, and pubertal status, and 3 days of 95% wear. This sample is used for all further analysis shown below.
Check how age and sex compares in this sample.
table(plotdata$Sex)
##
## Male Female
## 329 190
#329M #190F (63.4% male)
summary(plotdata$Age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.036 7.683 9.516 10.275 12.427 21.419
# Min. 1st Qu. Median Mean 3rd Qu. Max.
#5.036 7.683 9.516 10.275 12.427 21.419
Following guidelines on data dictionary:
Females are already on a 1-5 scale.
plotdata <- plotdata %>%
mutate(PPS_M_Score = ifelse(PPS_M_Score == 3, 1,
ifelse(PPS_M_Score %in% c(4,5), 2,
ifelse(PPS_M_Score %in% c(6,7,8), 3,
ifelse(PPS_M_Score %in% c(9,10, 11), 4,
ifelse(PPS_M_Score == 12, 5, NA))))),
PPS_Score_Combined = ifelse(is.na(PPS_M_Score), PPS_F_Score, PPS_M_Score))
Made a mistake earlier, I defined mLAC as log(1 + ACC_day_mg_pla). However, log(mean(X)) =/= mean(log(X)). Can’t recover mean(log(X)) from mean(X).
ACC_day_mg_pla (renamed as mAC for “mean activity count”) is already normally distributed, no need to log transform.
hist(plotdata$ACC_day_mg_pla)
Some notes about choice of activity count variable:
TLAC value depends on the epoch the person uses to sum. So if he uses 5 second means, the value would be 12 times as great as if he used 60s means.
Note however, that mean AC calculated for the whole day using 5 second and 60 second epochs would be the same.
Function to identify outliers based on median +/- threshold * IQR
find_outliers <- function(x, threshold = 2.22) {
qtiles = summary(x)
median = qtiles[[3]]
iqr = qtiles[[5]] - qtiles[[2]]
print(paste0('median = ', median))
print(paste0('iqr = ', iqr))
lb <- median - threshold * iqr
ub <- median + threshold * iqr
print(paste0('lowerbound = ', lb, ', upperbound = ', ub))
return(ifelse(x > ub | x < lb, 1, 0))
}
Remove outliers for mAC based on median +/- 3 * IQR:
age_mAC <- plotdata %>%
dplyr::select(ID, Sex, Age, ACC_day_mg_pla, PPS_M_Score, PPS_F_Score, PPS_Score_Combined) %>%
mutate(mAC = ACC_day_mg_pla,
mAC_outliers = find_outliers(mAC, 3))
## [1] "median = 57.8642203"
## [1] "iqr = 27.5556727"
## [1] "lowerbound = -24.8027978, upperbound = 140.5312384"
age_mAC_outliers <- age_mAC %>% filter(mAC_outliers == 1)
age_mAC_cleaned <- age_mAC %>% filter(mAC_outliers == 0)
Found and removed 1 outlier.
ggplot(data = age_mAC_cleaned, aes(mAC, fill = Sex, color = Sex)) +
geom_histogram(aes(y=..density..), alpha = 0.5) +
geom_density(linetype = 2, alpha = 0.5) +
theme_minimal(base_size = 12) +
scale_fill_manual(values = c("#0D0887FF", "#7E03A8FF")) +
scale_color_manual(values = c("#0D0887FF", "#7E03A8FF")) +
ggtitle('mAC Distributions') +
facet_wrap(vars(Sex))
ggplot(data = age_mAC_cleaned, aes(Age, fill = Sex, color = Sex)) +
geom_histogram(aes(y=..density..), alpha = 0.5) +
geom_density(linetype = 2, alpha = 0.5) +
theme_minimal(base_size = 12) +
scale_fill_manual(values = c("#0D0887FF", "#7E03A8FF")) +
scale_color_manual(values = c("#0D0887FF", "#7E03A8FF")) +
ggtitle('Age Distributions') +
facet_wrap(vars(Sex))
ggplot(data = age_mAC_cleaned, aes(PPS_Score_Combined, fill = Sex, color = Sex)) +
geom_histogram(alpha = 0.5) +
#geom_density(linetype = 2, alpha = 0.5) +
theme_minimal(base_size = 12) +
scale_fill_manual(values = c("#0D0887FF", "#7E03A8FF")) +
scale_color_manual(values = c("#0D0887FF", "#7E03A8FF")) +
ggtitle('PPS Distributions') +
facet_wrap(vars(Sex), scales = 'free_y')
age_pps_lines <- age_mAC_cleaned %>%
group_by(Sex, PPS_Score_Combined) %>%
summarise(`5` = quantile(Age)[[1]],
`25` = quantile(Age)[[2]],
`50` = quantile(Age)[[3]],
`75` = quantile(Age)[[4]],
`95` = quantile(Age)[[5]],
)
age_pps_lines <- age_pps_lines %>%
tidyr::gather(3:7, key = Percentile, value = Age) %>%
mutate(Percentile = factor(Percentile, ordered = T, levels = c(5,25,50,75,95)))
ggplot(data = age_mAC_cleaned, aes(x = as.character(PPS_Score_Combined), y = Age, fill = Sex, color = Sex)) +
geom_violin(alpha = 0.5) +
geom_boxplot(width = 0.25, alpha = 0.7) +
geom_line(data = age_pps_lines, aes(x = PPS_Score_Combined, y = Age, linetype = Percentile, group = Percentile), color = 'black') +
theme_minimal(base_size = 12) +
scale_fill_manual(values = c("#0D0887FF", "#7E03A8FF")) +
scale_color_manual(values = c("#0D0887FF", "#7E03A8FF")) +
ggtitle('PPS vs Age') +
facet_wrap(vars(Sex), scales = 'free_y') +
xlab('PPS_Score_Combined')
#function to run LMS regression, then extract and append fitted percentile values into current dataframe
tidy_cent <- function(data, xvar, yvar, cent) {
f1 = as.formula(paste0(yvar, "~pb(", xvar, ")"))
f2 = as.formula(paste0("~pb(", xvar, ")"))
#run lms.bct quantile regression
model = gamlss(f1, sigma.formula = f2, family=BCT, data=data)
xvar = data[[xvar]]
oxvar = xvar[order(xvar)]
#extract fitted values for each quantile
#qtiles <- cent %>% map_dfc(setNames, object = list(numeric()))
qtiles = vector("list", length(cent))
names(qtiles) = cent
for (i in cent) {
qfit = qBCT(i/100,
mu = fitted(model, "mu")[order(xvar)],
sigma = fitted(model, "sigma")[order(xvar)],
nu = fitted(model, "nu")[order(xvar)],
tau = fitted(model, "tau")[order(xvar)])
qtiles[[as.character(i)]] <- qfit
}
qtile_df = bind_cols(qtiles)
ID_df = tibble(ID = data$ID[order(xvar)])
#dataframe with fitted quantile values
result = cbind(ID_df, qtile_df)
#append to current dataframe
return(left_join(data, result, by = 'ID'))
}
age_PPS <- age_mAC_cleaned %>% select(ID, Sex, Age, PPS_Score_Combined)
age_PPS_male <- tidy_cent(filter(age_PPS, Sex == 'Male'), xvar = "PPS_Score_Combined", yvar = "Age", cent = c(5, 10, 25, 50, 75, 90, 95))
## GAMLSS-RS iteration 1: Global Deviance = 1354.898
## GAMLSS-RS iteration 2: Global Deviance = 1349.733
## GAMLSS-RS iteration 3: Global Deviance = 1348.92
## GAMLSS-RS iteration 4: Global Deviance = 1348.809
## GAMLSS-RS iteration 5: Global Deviance = 1348.795
## GAMLSS-RS iteration 6: Global Deviance = 1348.79
## GAMLSS-RS iteration 7: Global Deviance = 1348.79
age_PPS_female <- tidy_cent(filter(age_PPS, Sex == 'Female'), xvar = "PPS_Score_Combined", yvar = "Age", cent = c(5, 10, 25, 50, 75, 90, 95))
## GAMLSS-RS iteration 1: Global Deviance = 749.5505
## GAMLSS-RS iteration 2: Global Deviance = 749.0072
## GAMLSS-RS iteration 3: Global Deviance = 749.1831
## GAMLSS-RS iteration 4: Global Deviance = 749.4601
## GAMLSS-RS iteration 5: Global Deviance = 749.705
## GAMLSS-RS iteration 6: Global Deviance = 749.9052
## GAMLSS-RS iteration 7: Global Deviance = 750.0645
## GAMLSS-RS iteration 8: Global Deviance = 750.1731
## GAMLSS-RS iteration 9: Global Deviance = 750.1927
## GAMLSS-RS iteration 10: Global Deviance = 750.1948
## GAMLSS-RS iteration 11: Global Deviance = 750.1952
age_PPS_long <- rbind(age_PPS_male, age_PPS_female) %>%
select(-Age) %>%
gather(4:10, key = percentile, value = Age) %>%
mutate(percentile = factor(percentile, levels = c(5, 10, 25, 50, 75, 90, 95)))
ggplot() +
geom_point(data = age_PPS, aes(x = PPS_Score_Combined, y = Age, color = Sex), position = position_jitter(), alpha = 0.5, size = 1) +
geom_line(data = age_PPS_long, aes(x = PPS_Score_Combined, y = Age, color = Sex, linetype = percentile), size = 0.7) +
theme_minimal(base_size = 12) +
guides(colour = guide_legend(reverse=T),
linetype = guide_legend(reverse=T)) +
scale_color_manual(values = c("#0D0887FF", "#7E03A8FF")) +
facet_wrap(~Sex, scales = 'free_x') +
ggtitle('PPS vs Age')
Help!!
Trying another smoother.
ggplot(data = age_mAC_cleaned, aes(x = PPS_Score_Combined, y = Age, fill = Sex, color = Sex)) +
geom_point(position = position_jitter(), alpha = 0.5, size = 1) +
geom_smooth(method = 'loess', formula = y~x) +
theme_minimal(base_size = 12) +
scale_fill_manual(values = c("#0D0887FF", "#7E03A8FF")) +
scale_color_manual(values = c("#0D0887FF", "#7E03A8FF")) +
ggtitle('PPS vs Age') +
facet_wrap(vars(Sex), scales = 'free_y')
age_mAC_plot <- age_mAC_cleaned %>% select(ID, Sex, Age, PPS_Score_Combined, mAC)
age_mAC_plot_male <- tidy_cent(filter(age_mAC_plot, Sex == 'Male'), xvar = "Age", yvar = "mAC", cent = c(5, 10, 25, 50, 75, 90, 95))
## GAMLSS-RS iteration 1: Global Deviance = 2737.05
## GAMLSS-RS iteration 2: Global Deviance = 2731.531
## GAMLSS-RS iteration 3: Global Deviance = 2730.774
## GAMLSS-RS iteration 4: Global Deviance = 2730.805
## GAMLSS-RS iteration 5: Global Deviance = 2730.814
## GAMLSS-RS iteration 6: Global Deviance = 2730.815
## GAMLSS-RS iteration 7: Global Deviance = 2730.816
age_mAC_plot_female <- tidy_cent(filter(age_mAC_plot, Sex == 'Female'), xvar = "Age", yvar = "mAC", cent = c(5, 10, 25, 50, 75, 90, 95))
## GAMLSS-RS iteration 1: Global Deviance = 1464.543
## GAMLSS-RS iteration 2: Global Deviance = 1464.83
## GAMLSS-RS iteration 3: Global Deviance = 1465.385
## GAMLSS-RS iteration 4: Global Deviance = 1465.752
## GAMLSS-RS iteration 5: Global Deviance = 1466.048
## GAMLSS-RS iteration 6: Global Deviance = 1466.151
## GAMLSS-RS iteration 7: Global Deviance = 1466.165
## GAMLSS-RS iteration 8: Global Deviance = 1466.17
## GAMLSS-RS iteration 9: Global Deviance = 1466.171
## GAMLSS-RS iteration 10: Global Deviance = 1466.172
age_mAC_plot_long <- rbind(age_mAC_plot_male, age_mAC_plot_female) %>%
select(-mAC) %>%
gather(5:11, key = percentile, value = mAC) %>%
mutate(percentile = factor(percentile, levels = c(5, 10, 25, 50, 75, 90, 95)))
age_mAC_plot <- age_mAC_plot %>%
mutate(PPS_Score_Combined = factor(PPS_Score_Combined, ordered = T))
p1 <- ggplot() +
geom_point(data = age_mAC_plot, aes(x = Age, y = mAC, fill = PPS_Score_Combined), shape = 21, stroke = 0, alpha = 0.6, size = 3 ) +
geom_line(data = age_mAC_plot_long, aes(x = Age, y = mAC, linetype = percentile), size = 0.7) +
theme_minimal(base_size = 18) +
guides(colour = guide_legend(reverse=T),
linetype = guide_legend(reverse=T)) +
scale_fill_viridis(option = 'plasma', discrete = T) +
facet_wrap(~ Sex) +
ggtitle('Quantile curves for age vs mAC') +
guides(fill = guide_legend(order=1, title.theme = element_text(size = 12)),
linetype = guide_legend(order=2,title.theme = element_text(size = 12))) +
theme(plot.title = element_text(vjust = 4),
plot.margin = unit(c(1,0.1,0.1,0.1), "cm"))
ggsave('plots/mAC_Age_Pubertal.png', p1, width = 12, height = 5.5)
mAC_pps_lines <- age_mAC_cleaned %>%
group_by(Sex, PPS_Score_Combined) %>%
summarise(`5` = quantile(mAC)[[1]],
`25` = quantile(mAC)[[2]],
`50` = quantile(mAC)[[3]],
`75` = quantile(mAC)[[4]],
`95` = quantile(mAC)[[5]],
)
mAC_pps_lines <- mAC_pps_lines %>%
tidyr::gather(3:7, key = Percentile, value = mAC) %>%
mutate(Percentile = factor(Percentile, ordered = T, levels = c(5,25,50,75,95)))
p2 <- ggplot(data = age_mAC_cleaned, aes(x = as.character(PPS_Score_Combined), y = mAC, fill = as.character(PPS_Score_Combined))) +
geom_violin(alpha = 0.5) +
geom_boxplot(width = 0.25, alpha = 0.7) +
geom_line(data = mAC_pps_lines, aes(x = PPS_Score_Combined, y = mAC, linetype = Percentile, group = Percentile), color = 'black') +
theme_minimal(base_size = 18) +
scale_fill_viridis(option = 'plasma', discrete = T) +
ggtitle('PPS vs mAC') +
facet_wrap(vars(Sex)) +
guides(fill = FALSE,
linetype = guide_legend(order=2,title.theme = element_text(size = 12))) +
xlab('PPS_Score_Combined')
ggsave('plots/mAC_Pubertal.png', p2, width = 12, height = 5.5)
#load in consensensus diagnosis data
diagnosis <- read_csv('pheno/Diagnosis_ClinicianConsensus.csv')
#create dummy var for ADHD_c
diagnosis$ADHD_c <- ifelse(apply(diagnosis, 1, function(x) any(x %in% "ADHD-Combined Type")), 1, 0)
#Pull vars and join with activity dataset
diagnosis <- diagnosis %>%
select(EID, ADHD_c) %>%
rename(ID = EID)
age_mAC_cleaned <- left_join(age_mAC_cleaned, diagnosis)
age_mAC_cleaned %>%
with(., table(Sex, ADHD_c)) %>%
as_data_frame() %>%
ggplot(aes(x = Sex, y = n, fill = ADHD_c, color = ADHD_c)) +
geom_col(alpha = 0.5, position = 'stack') +
#geom_density(linetype = 2, alpha = 0.5) +
theme_minimal(base_size = 14) +
scale_fill_manual(values = c("#0D0887FF", "#CC4678FF")) +
scale_color_manual(values = c("#0D0887FF", "#CC4678FF")) +
ggtitle('ADHD Combined Distribution (Sex)')
#ADHD_c by PPS
age_mAC_cleaned %>%
with(., table(Sex, PPS_Score_Combined, ADHD_c)) %>%
as_data_frame() %>%
ggplot(aes(x = PPS_Score_Combined, y = n, fill = ADHD_c, color = ADHD_c)) +
geom_col(alpha = 0.5, position = 'stack') +
#geom_density(linetype = 2, alpha = 0.5) +
theme_minimal(base_size = 14) +
scale_fill_manual(values = c("#0D0887FF", "#CC4678FF")) +
scale_color_manual(values = c("#0D0887FF", "#CC4678FF")) +
facet_wrap(~Sex, scales = 'free') +
ggtitle('ADHD Combined Distribution PPS')
NOTE: y-axis scale is free
age_mAC_cleaned$ADHD_c <- as.factor(age_mAC_cleaned$ADHD_c)
p3 <- ggplot(data = age_mAC_cleaned, aes(x = Age, y = mAC, fill = ADHD_c, color = ADHD_c)) +
geom_point(shape = 21, stroke = 0, alpha = 0.4, size = 3 ) +
#geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs")) +
geom_smooth(method = 'loess', size = 1.5, alpha = 0.1) +
theme_minimal(base_size = 18) +
scale_fill_manual(values = c("#0D0887FF", "#CC4678FF")) +
scale_color_manual(values = c("#0D0887FF", "#CC4678FF")) +
ggtitle('ADHD_c vs Age vs Sex') +
facet_wrap(~Sex)
ggsave('plots/mAC_Age_ADHD_c.png', p3, width = 12, height = 5.5)
mAC_pps_lines <- age_mAC_cleaned %>%
group_by(Sex, ADHD_c, PPS_Score_Combined) %>%
summarise(`5` = quantile(mAC)[[1]],
`25` = quantile(mAC)[[2]],
`50` = quantile(mAC)[[3]],
`75` = quantile(mAC)[[4]],
`95` = quantile(mAC)[[5]],
)
mAC_pps_lines <- mAC_pps_lines %>%
tidyr::gather(4:8, key = Percentile, value = mAC) %>%
mutate(Percentile = factor(Percentile, ordered = T, levels = c(5,25,50,75,95)))
p4 <- ggplot(data = age_mAC_cleaned, aes(x = as.character(PPS_Score_Combined), y = mAC, fill = ADHD_c)) +
geom_violin(alpha = 0.5) +
geom_boxplot(width = 0.25, alpha = 0.7) +
geom_line(data = mAC_pps_lines, aes(x = PPS_Score_Combined, y = mAC, linetype = Percentile, group = Percentile), color = 'black') +
theme_minimal(base_size = 18) +
scale_fill_manual(values = c("#0D0887FF", "#CC4678FF")) +
ggtitle('ADHD_c vs PPS vs mAC') +
facet_grid(Sex ~ ADHD_c) +
guides(linetype = guide_legend(order=2,title.theme = element_text(size = 12)),
fill = guide_legend(title.theme = element_text(size = 12))) +
xlab('PPS_Score_Combined')
ggsave('plots/mAC_PPS_ADHD_c.png', p4, width = 12, height = 8)
swan <- read_csv('pheno/SWAN.csv')
#Pull vars and join with activity dataset
swan <- swan %>%
select(EID, SWAN_IN, SWAN_HY, SWAN_Total) %>%
rename(ID = EID)
age_mAC_cleaned <- left_join(age_mAC_cleaned, swan)
#distribution by sex
ggplot(data = age_mAC_cleaned, aes(SWAN_HY, fill = Sex, color = Sex)) +
geom_histogram(aes(y=..density..), alpha = 0.5) +
geom_density(linetype = 2, alpha = 0.5) +
theme_minimal(base_size = 12) +
scale_fill_manual(values = c("#0D0887FF", "#7E03A8FF")) +
scale_color_manual(values = c("#0D0887FF", "#7E03A8FF")) +
ggtitle('SWAN_HY Distributions (Sex)') +
facet_wrap(~Sex)
p5 <- ggplot(data = age_mAC_cleaned, aes(x = Age, y = SWAN_HY, fill = Sex, color = Sex)) +
geom_point(shape = 21, stroke = 0, alpha = 0.4, size = 3 ) +
#geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs")) +
geom_smooth(method = 'loess', size = 1.5, alpha = 0.1) +
theme_minimal(base_size = 18) +
scale_fill_manual(values = c("#0D0887FF", "#CC4678FF")) +
scale_color_manual(values = c("#0D0887FF", "#CC4678FF")) +
ggtitle('SWAN_HY vs Age vs Sex') +
facet_wrap(~Sex)
ggsave('plots/Sex_Age_SWAN_HY.png', p5, width = 12, height = 5.5)
#bin into tertiles
my_ntiles <- function (x, n = 3, digits = 3)
{
x <- as.numeric(x)
xrank <- rank(x, na.last = TRUE, ties.method = "first")
xrank[is.na(x)] <- NA
size <- max(xrank, na.rm = TRUE)
if(size[[1]] < n + 1) {res <- rep(NA, length(x))}
else{
cts <- floor(seq(0, size, length.out = (n + 1)))
bin <- as.numeric(cut(xrank, breaks = cts))
left <- mosaic::min(x ~ bin, na.rm = TRUE)
right <- mosaic::max(x ~ bin, na.rm = TRUE)
res <- factor(bin, labels = paste0("Q", mosaic::max(bin ~ bin, na.rm = T)," [", round(left, digits = digits), " to ", round(right, digits = digits), "]"), ordered = TRUE)
}
return(res)
}
age_mAC_cleaned <- age_mAC_cleaned %>% mutate(SWAN_HY_q = my_ntiles(SWAN_HY, 3, 2))
table(age_mAC_cleaned$SWAN_HY_q, age_mAC_cleaned$Sex)
##
## Male Female
## Q1 [-3 to 0] 91 80
## Q2 [0 to 0.67] 113 58
## Q3 [0.67 to 3] 122 50
p6 <- ggplot(data = filter(age_mAC_cleaned, !is.na(SWAN_HY_q)), aes(x = Age, y = mAC, fill = SWAN_HY_q, color = SWAN_HY_q)) +
geom_point(shape = 21, stroke = 0, alpha = 0.4, size = 3 ) +
#geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs")) +
geom_smooth(method = 'loess', size = 1.5, alpha = 0.1) +
theme_minimal(base_size = 18) +
scale_fill_manual(values = c("#0D0887FF", "#CC4678FF", "#FDB32FFF")) +
scale_color_manual(values = c("#0D0887FF", "#CC4678FF", "#FDB32FFF")) +
ggtitle('SWAN_HY vs Age vs Sex') +
facet_wrap(~Sex)
ggsave('plots/mAC_Age_SWAN_HY.png', p6, width = 12, height = 5)
mAC_pps_lines <- age_mAC_cleaned %>%
filter(!is.na(SWAN_HY_q)) %>%
group_by(Sex, SWAN_HY_q, PPS_Score_Combined) %>%
summarise(`5` = quantile(mAC)[[1]],
`25` = quantile(mAC)[[2]],
`50` = quantile(mAC)[[3]],
`75` = quantile(mAC)[[4]],
`95` = quantile(mAC)[[5]],
)
mAC_pps_lines <- mAC_pps_lines %>%
tidyr::gather(4:8, key = Percentile, value = mAC) %>%
mutate(Percentile = factor(Percentile, ordered = T, levels = c(5,25,50,75,95)))
p7 <- ggplot(data = filter(age_mAC_cleaned, !is.na(SWAN_HY_q)), aes(x = as.character(PPS_Score_Combined), y = mAC, fill = SWAN_HY_q)) +
geom_violin(alpha = 0.5) +
geom_boxplot(width = 0.25, alpha = 0.7) +
geom_line(data = mAC_pps_lines, aes(x = PPS_Score_Combined, y = mAC, linetype = Percentile, group = Percentile), color = 'black') +
theme_minimal(base_size = 18) +
scale_fill_manual(values = c("#0D0887FF", "#CC4678FF", "#FDB32FFF")) +
ggtitle('SWAN_HY vs PPS vs mAC') +
facet_grid(Sex ~ SWAN_HY_q) +
guides(linetype = guide_legend(order=2,title.theme = element_text(size = 12)),
fill = guide_legend(title.theme = element_text(size = 12))) +
xlab('PPS_Score_Combined')
ggsave('plots/mAC_PPS_SWAN_HY.png', p7, width = 12, height = 6)
Don’t know how to make these, learn about the models used to generate these coefficient surfaces.
#model 1
#males
age_mAC_male <- age_mAC_cleaned %>%
filter(Sex == 'Male') %>%
mutate(PPS_M_Score = as.numeric(PPS_M_Score))
#mAC by age
m1 <- lm(mAC ~ Age, data = age_mAC_male)
#mAC by pps
m2 <- lm(mAC ~ PPS_M_Score, data = age_mAC_male)
#both
m3 <- lm(mAC ~ Age + PPS_M_Score, data = age_mAC_male)
vif(m3)
## Age PPS_M_Score
## 2.998889 2.998889
#Age PPS interaction
m4 <- lm(mAC ~ Age*PPS_M_Score, data = age_mAC_male)
vif(m4)
## Age PPS_M_Score Age:PPS_M_Score
## 6.370316 17.161635 29.817177
#females
age_mAC_female <- age_mAC_cleaned %>%
filter(Sex == 'Female') %>%
mutate(PPS_F_Score = as.numeric(PPS_F_Score))
#mAC by age
f1 <- lm(mAC ~ Age, data = age_mAC_female)
#mAC by pps
f2 <- lm(mAC ~ PPS_F_Score, data = age_mAC_female)
#both
f3 <- lm(mAC ~ Age + PPS_F_Score, data = age_mAC_female)
vif(f3)
## Age PPS_F_Score
## 3.556056 3.556056
#Age PPS interaction
f4 <- lm(mAC ~ Age*PPS_F_Score, data = age_mAC_female)
vif(f4)
## Age PPS_F_Score Age:PPS_F_Score
## 10.40104 13.63360 32.00549
tab_model(m1, m2, m3, m4, collapse.ci = T)
| mAC | mAC | mAC | mAC | |||||
|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | p | Estimates | p | Estimates | p | Estimates | p |
| (Intercept) |
102.91 (97.16 – 108.66) |
<0.001 |
80.93 (77.23 – 84.64) |
<0.001 |
101.28 (94.95 – 107.60) |
<0.001 |
111.31 (98.94 – 123.68) |
<0.001 |
| Age |
-3.95 (-4.48 – -3.43) |
<0.001 |
-3.49 (-4.41 – -2.58) |
<0.001 |
-4.40 (-5.73 – -3.08) |
<0.001 | ||
| PPS_M_Score |
-10.21 (-11.92 – -8.50) |
<0.001 |
-1.68 (-4.41 – 1.05) |
0.226 |
-7.26 (-13.77 – -0.75) |
0.029 | ||
| Age * PPS_M_Score |
0.44 (-0.03 – 0.90) |
0.064 | ||||||
| Observations | 328 | 328 | 328 | 328 | ||||
| R2 / R2 adjusted | 0.400 / 0.398 | 0.298 / 0.296 | 0.402 / 0.399 | 0.409 / 0.403 | ||||
tab_model(f1, f2, f3, f4, collapse.ci = T)
| mAC | mAC | mAC | mAC | |||||
|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | p | Estimates | p | Estimates | p | Estimates | p |
| (Intercept) |
83.88 (78.50 – 89.26) |
<0.001 |
69.75 (65.80 – 73.71) |
<0.001 |
83.44 (77.63 – 89.25) |
<0.001 |
83.98 (71.08 – 96.88) |
<0.001 |
| Age |
-3.01 (-3.51 – -2.51) |
<0.001 |
-2.85 (-3.79 – -1.91) |
<0.001 |
-2.91 (-4.53 – -1.29) |
<0.001 | ||
| PPS_F_Score |
-6.78 (-8.19 – -5.37) |
<0.001 |
-0.50 (-2.95 – 1.95) |
0.688 |
-0.69 (-5.50 – 4.12) |
0.777 | ||
| Age * PPS_F_Score |
0.02 (-0.39 – 0.43) |
0.927 | ||||||
| Observations | 190 | 190 | 190 | 190 | ||||
| R2 / R2 adjusted | 0.430 / 0.427 | 0.322 / 0.319 | 0.431 / 0.425 | 0.431 / 0.422 | ||||
#model 1
#males
age_mAC_male <- age_mAC_cleaned %>%
filter(Sex == 'Male', Age > 10) %>%
mutate(PPS_M_Score = as.numeric(PPS_M_Score))
#mAC by age
m1 <- lm(mAC ~ Age, data = age_mAC_male)
#mAC by pps
m2 <- lm(mAC ~ PPS_M_Score, data = age_mAC_male)
#both
m3 <- lm(mAC ~ Age + PPS_M_Score, data = age_mAC_male)
vif(m3)
## Age PPS_M_Score
## 2.232727 2.232727
#Age PPS interaction
m4 <- lm(mAC ~ Age*PPS_M_Score, data = age_mAC_male)
vif(m4)
## Age PPS_M_Score Age:PPS_M_Score
## 10.32328 23.11808 49.29361
#females
age_mAC_female <- age_mAC_cleaned %>%
filter(Sex == 'Female', Age > 10) %>%
mutate(PPS_F_Score = as.numeric(PPS_F_Score))
#mAC by age
f1 <- lm(mAC ~ Age, data = age_mAC_female)
#mAC by pps
f2 <- lm(mAC ~ PPS_F_Score, data = age_mAC_female)
#both
f3 <- lm(mAC ~ Age + PPS_F_Score, data = age_mAC_female)
vif(f3)
## Age PPS_F_Score
## 2.188281 2.188281
#Age PPS interaction
f4 <- lm(mAC ~ Age*PPS_F_Score, data = age_mAC_female)
vif(f4)
## Age PPS_F_Score Age:PPS_F_Score
## 21.36514 34.42512 89.05771
tab_model(m1, m2, m3, m4, collapse.ci = T)
| mAC | mAC | mAC | mAC | |||||
|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | p | Estimates | p | Estimates | p | Estimates | p |
| (Intercept) |
87.96 (76.23 – 99.70) |
<0.001 |
65.06 (59.25 – 70.87) |
<0.001 |
83.61 (70.94 – 96.27) |
<0.001 |
97.76 (65.47 – 130.05) |
<0.001 |
| Age |
-2.95 (-3.81 – -2.08) |
<0.001 |
-2.11 (-3.39 – -0.82) |
0.002 |
-3.27 (-6.04 – -0.50) |
0.021 | ||
| PPS_M_Score |
-6.08 (-8.08 – -4.07) |
<0.001 |
-2.55 (-5.45 – 0.35) |
0.085 |
-6.79 (-16.14 – 2.56) |
0.154 | ||
| Age * PPS_M_Score |
0.34 (-0.37 – 1.04) |
0.348 | ||||||
| Observations | 151 | 151 | 151 | 151 | ||||
| R2 / R2 adjusted | 0.232 / 0.227 | 0.194 / 0.189 | 0.247 / 0.237 | 0.252 / 0.237 | ||||
tab_model(f1, f2, f3, f4, collapse.ci = T)
| mAC | mAC | mAC | mAC | |||||
|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | p | Estimates | p | Estimates | p | Estimates | p |
| (Intercept) |
65.33 (52.65 – 78.01) |
<0.001 |
52.99 (45.38 – 60.60) |
<0.001 |
63.92 (50.85 – 76.99) |
<0.001 |
77.82 (28.88 – 126.77) |
0.002 |
| Age |
-1.82 (-2.73 – -0.91) |
<0.001 |
-1.37 (-2.72 – -0.02) |
0.046 |
-2.55 (-6.77 – 1.67) |
0.233 | ||
| PPS_F_Score |
-3.55 (-5.59 – -1.52) |
0.001 |
-1.34 (-4.29 – 1.61) |
0.370 |
-4.69 (-16.45 – 7.07) |
0.429 | ||
| Age * PPS_F_Score |
0.28 (-0.67 – 1.23) |
0.559 | ||||||
| Observations | 79 | 79 | 79 | 79 | ||||
| R2 / R2 adjusted | 0.171 / 0.160 | 0.136 / 0.124 | 0.180 / 0.158 | 0.184 / 0.151 | ||||