there are NO multiple testing corrections on any of these results
model each peptide z score independently with predictors step count, age, sex, bmi, and presence/absence of symptomatic knee OA
1 peptide in which steps is a significant predictor: nalwhtgdtesqvr (COMP)
If we run the models without bmi and OA status covariates, results are the same. GAMs (rather than LMs) give same results
covariates <- c("age", "sex", "bmi", "symptomatic_OA")
peptide_zs <- c("aeieylek_z", "afpaltsldlsdnpglger_z", "diaptltlyvgk_z", "dspvlidffedter_z", "efnplvivglsk_z", "eylgaicsctcfggqr_z", "fagvfhvek_z", "fceigsddcyvgdgysyr_z", "fgsslitvr_z", "ftvdrpflfliyehr_z", "fvfgttpedilr_z", "ggegtgyfvdfsvr_z", "gvaslfagr_z", "gygfytk_z", "hqpqefptyveptndeiceafr_z", "htlnqidevk_z", "ialggllfpasnlr_z", "icldlqaplyk_z", "iihfgtr_z", "itevwgipspidtvftr_z", "laipegk_z", "lcqdlgpgafr_z", "ldddlehqgghvldhghk_z", "livhngycdgr_z", "llevpegr_z", "lqaeafqar_z", "lvllnaiylsak_z", "lvngqshislsk_z", "nalwhtgdtesqvr_z", "qpqfisr_z", "scesnspfpvhpgtaectk_z", "sqcledhtwappfpick_z", "sspyyalr_z", "tgyyfdgisr_z", "vapeehpvllteaplnpk_z", "vistleeptpqcptsqgr_z", "vleptlk_z", "vvnptll_z", "yycfqgnqflr_z")
# pa variables: steps = ad_tot_step_count_0_24hr, light mins = dur_day_total_lig_min_pla, mod mins = dur_day_total_mod_min_pla, vig mins = dur_day_total_vig_min_pla, light and mod = lig_mod_pooled_mins, mod and vig = mod_vig_pooled_mins
# lifestyle variables: urb_score, market_diet_index
results_list <- list()
for (var in peptide_zs) {
# peptide ~ PA (is PA a significant predictor of peptide?)
f <- as.formula(paste(var, "~ ad_tot_step_count_0_24hr", "+", paste(covariates, collapse = " + ")))
# linear regression
model <- lm(f, data = dat_clean)
# for gams
# model <- gam(f, data = dat_clean)
# coefficients from lm
coef_summary <- summary(model)$coefficients
est <- coef(model)[2]
se <- coef_summary[2, "Std. Error"]
pval <- coef_summary[2, "Pr(>|t|)"]
# # coefficients from gam
# est <- summary(model)$p.coeff[2]
# se <- summary(model)$se[2]
# pval <- summary(model)$p.pv[2]
# effect size and CI
effect_size <- est
ci_low <- (est - 1.96 * se)
ci_high <- (est + 1.96 * se)
# append
results_list[[var]] <- data.frame(
variable = var,
effect_size = effect_size,
ci_low = ci_low,
ci_high = ci_high,
p_value = pval
)
}
# results df (all peptides)
results_df <- do.call(rbind, results_list)
# order by p-value (lowest to highest)
results_df$variable <- factor(results_df$variable, levels = results_df$variable[order(1-results_df$p_value)])
# forest plot
ggplot(results_df, aes(x = variable, y = effect_size)) +
geom_point() +
geom_errorbar(aes(ymin = ci_low, ymax = ci_high), width = 0.2) +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
#geom_text(aes(label = sprintf("p = %.3f", p_value)), hjust = -4, size = 3) +
coord_flip() +
labs(
title = "Forest Plot of Average Daily Step Count Effect Sizes",
y = "Estimate (95% CI)",
x = "Peptide"
) +
theme_pomol_leg()
model each peptide z score independently with predictors mvpa, age, sex, bmi, and presence/absence of symptomatic knee OA
1 peptide in which mvpa is a significant predictor: nalwhtgdtesqvr (COMP)
If we run the models without bmi and OA status covariates, results are the same. GAMs (rather than LMs) give same results
covariates <- c("age", "sex", "bmi", "symptomatic_OA")
peptide_zs <- c("aeieylek_z", "afpaltsldlsdnpglger_z", "diaptltlyvgk_z", "dspvlidffedter_z", "efnplvivglsk_z", "eylgaicsctcfggqr_z", "fagvfhvek_z", "fceigsddcyvgdgysyr_z", "fgsslitvr_z", "ftvdrpflfliyehr_z", "fvfgttpedilr_z", "ggegtgyfvdfsvr_z", "gvaslfagr_z", "gygfytk_z", "hqpqefptyveptndeiceafr_z", "htlnqidevk_z", "ialggllfpasnlr_z", "icldlqaplyk_z", "iihfgtr_z", "itevwgipspidtvftr_z", "laipegk_z", "lcqdlgpgafr_z", "ldddlehqgghvldhghk_z", "livhngycdgr_z", "llevpegr_z", "lqaeafqar_z", "lvllnaiylsak_z", "lvngqshislsk_z", "nalwhtgdtesqvr_z", "qpqfisr_z", "scesnspfpvhpgtaectk_z", "sqcledhtwappfpick_z", "sspyyalr_z", "tgyyfdgisr_z", "vapeehpvllteaplnpk_z", "vistleeptpqcptsqgr_z", "vleptlk_z", "vvnptll_z", "yycfqgnqflr_z")
# pa variables: steps = ad_tot_step_count_0_24hr, light mins = dur_day_total_lig_min_pla, mod mins = dur_day_total_mod_min_pla, vig mins = dur_day_total_vig_min_pla, light and mod = lig_mod_pooled_mins, mod and vig = mod_vig_pooled_mins
# lifestyle variables: urb_score, market_diet_index
results_list <- list()
for (var in peptide_zs) {
# peptide ~ PA (is PA a significant predictor of peptide?)
f <- as.formula(paste(var, "~ mod_vig_pooled_mins", "+", paste(covariates, collapse = " + ")))
# linear regression
model <- lm(f, data = dat_clean)
# for gams
# model <- gam(f, data = dat_clean)
# coefficients from lm
coef_summary <- summary(model)$coefficients
est <- coef(model)[2]
se <- coef_summary[2, "Std. Error"]
pval <- coef_summary[2, "Pr(>|t|)"]
# coefficients from gam
# est <- summary(model)$p.coeff[2]
# se <- summary(model)$se[2]
# pval <- summary(model)$p.pv[2]
# effect size and CI
effect_size <- est
ci_low <- (est - 1.96 * se)
ci_high <- (est + 1.96 * se)
# append
results_list[[var]] <- data.frame(
variable = var,
effect_size = effect_size,
ci_low = ci_low,
ci_high = ci_high,
p_value = pval
)
}
# results df (all peptides)
results_df <- do.call(rbind, results_list)
# order by p-value (lowest to highest)
results_df$variable <- factor(results_df$variable, levels = results_df$variable[order(1-results_df$p_value)])
# forest plot
ggplot(results_df, aes(x = variable, y = effect_size)) +
geom_point() +
geom_errorbar(aes(ymin = ci_low, ymax = ci_high), width = 0.2) +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
#geom_text(aes(label = sprintf("p = %.3f", p_value)), hjust = -4, size = 3) +
coord_flip() +
labs(
title = "Forest Plot of MVPA Effect Sizes",
y = "Estimate (95% CI)",
x = "Peptide"
) +
theme_pomol_leg()
model each peptide z score independently with urbanicity score predictor. results vary depending the adjustment set of covariates included. forest plot here shows results with no covariate adjustment.
6 peptides in which urbanicity is a significant predictor if no adjustment for covariates: itevwgipspidtvftr (PRG4), iihfgtr (CO5), aeieylek (LYAM1), vistleeptpqcptsqgr (DOPO), icldlqaplyk (PLF4), vleptlk (VTDB)
3 peptides in which urbanicity is a significant (positive) predictor if bmi is not included in adjustment set (control for age and sex only): itevwgipspidtvftr (PRG4), iihfgtr (CO5), livhngycdgr (RET4)
3 peptides in which urbanicity is a significant predictor if adjustment set includes bmi (control for age, sex, bmi). Controlling for OA status does not alter these results: icldlqaplyk (PLF4), iihfgtr (CO5), fvfgttpedilr (TSP1)
GAMs rather than LMs do not alter results
# covariates <- c("age", "sex", "bmi", "symptomatic_OA")
peptide_zs <- c("aeieylek_z", "afpaltsldlsdnpglger_z", "diaptltlyvgk_z", "dspvlidffedter_z", "efnplvivglsk_z", "eylgaicsctcfggqr_z", "fagvfhvek_z", "fceigsddcyvgdgysyr_z", "fgsslitvr_z", "ftvdrpflfliyehr_z", "fvfgttpedilr_z", "ggegtgyfvdfsvr_z", "gvaslfagr_z", "gygfytk_z", "hqpqefptyveptndeiceafr_z", "htlnqidevk_z", "ialggllfpasnlr_z", "icldlqaplyk_z", "iihfgtr_z", "itevwgipspidtvftr_z", "laipegk_z", "lcqdlgpgafr_z", "ldddlehqgghvldhghk_z", "livhngycdgr_z", "llevpegr_z", "lqaeafqar_z", "lvllnaiylsak_z", "lvngqshislsk_z", "nalwhtgdtesqvr_z", "qpqfisr_z", "scesnspfpvhpgtaectk_z", "sqcledhtwappfpick_z", "sspyyalr_z", "tgyyfdgisr_z", "vapeehpvllteaplnpk_z", "vistleeptpqcptsqgr_z", "vleptlk_z", "vvnptll_z", "yycfqgnqflr_z")
# pa variables: steps = ad_tot_step_count_0_24hr, light mins = dur_day_total_lig_min_pla, mod mins = dur_day_total_mod_min_pla, vig mins = dur_day_total_vig_min_pla, light and mod = lig_mod_pooled_mins, mod and vig = mod_vig_pooled_mins
# lifestyle variables: urb_score, market_diet_index
results_list <- list()
for (var in peptide_zs) {
# peptide ~ urb (is urb a significant predictor of peptide?)
# f <- as.formula(paste(var, "~ urb_score", "+", paste(covariates, collapse = " + ")))
f <- as.formula(paste(var, "~ urb_score"))
# linear regression
model <- lm(f, data = dat_clean)
# for gams
# model <- gam(f, data = dat_clean)
# coefficients from lm
coef_summary <- summary(model)$coefficients
est <- coef(model)[2]
se <- coef_summary[2, "Std. Error"]
pval <- coef_summary[2, "Pr(>|t|)"]
# coefficients from gam
# est <- summary(model)$p.coeff[2]
# se <- summary(model)$se[2]
# pval <- summary(model)$p.pv[2]
# effect size and CI
effect_size <- est
ci_low <- (est - 1.96 * se)
ci_high <- (est + 1.96 * se)
# append
results_list[[var]] <- data.frame(
variable = var,
effect_size = effect_size,
ci_low = ci_low,
ci_high = ci_high,
p_value = pval
)
}
# results df (all peptides)
results_df <- do.call(rbind, results_list)
# order by p-value (lowest to highest)
results_df$variable <- factor(results_df$variable, levels = results_df$variable[order(1-results_df$p_value)])
# forest plot
ggplot(results_df, aes(x = variable, y = effect_size)) +
geom_point() +
geom_errorbar(aes(ymin = ci_low, ymax = ci_high), width = 0.2) +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
#geom_text(aes(label = sprintf("p = %.3f", p_value)), hjust = -4, size = 3) +
coord_flip() +
labs(
title = "Forest Plot of Urbanicity Effect Sizes",
y = "Estimate (95% CI)",
x = "Peptide"
) +
theme_pomol_leg()
model each peptide z score independently with market diet index predictor. results vary depending the adjustment set of covariates included. forest plot here shows results with no covariate adjustment.
4 peptides in which urbanicity is a significant predictor if no adjustment for covariates: iihfgtr (CO5), lvllnaiylsak_z (IC1), aeieylek (LYAM1), tgyyfdgisr_z (FBLN1)
4 peptides in which urbanicity is a significant (positive) predictor if bmi is not included in adjustment set (control for age and sex only): iihfgtr (CO5), tgyyfdgisr_z (FBLN1), lvllnaiylsak_z (IC1), aeieylek (LYAM1)
3 peptides in which urbanicity is a significant predictor if adjustment set includes bmi (control for age, sex, bmi). Controlling for OA status does not alter these results: lvllnaiylsak_z (IC1), iihfgtr (CO5), tgyyfdgisr_z (FBLN1)
# covariates <- c("age", "sex", "bmi")
peptide_zs <- c("aeieylek_z", "afpaltsldlsdnpglger_z", "diaptltlyvgk_z", "dspvlidffedter_z", "efnplvivglsk_z", "eylgaicsctcfggqr_z", "fagvfhvek_z", "fceigsddcyvgdgysyr_z", "fgsslitvr_z", "ftvdrpflfliyehr_z", "fvfgttpedilr_z", "ggegtgyfvdfsvr_z", "gvaslfagr_z", "gygfytk_z", "hqpqefptyveptndeiceafr_z", "htlnqidevk_z", "ialggllfpasnlr_z", "icldlqaplyk_z", "iihfgtr_z", "itevwgipspidtvftr_z", "laipegk_z", "lcqdlgpgafr_z", "ldddlehqgghvldhghk_z", "livhngycdgr_z", "llevpegr_z", "lqaeafqar_z", "lvllnaiylsak_z", "lvngqshislsk_z", "nalwhtgdtesqvr_z", "qpqfisr_z", "scesnspfpvhpgtaectk_z", "sqcledhtwappfpick_z", "sspyyalr_z", "tgyyfdgisr_z", "vapeehpvllteaplnpk_z", "vistleeptpqcptsqgr_z", "vleptlk_z", "vvnptll_z", "yycfqgnqflr_z")
# pa variables: steps = ad_tot_step_count_0_24hr, light mins = dur_day_total_lig_min_pla, mod mins = dur_day_total_mod_min_pla, vig mins = dur_day_total_vig_min_pla, light and mod = lig_mod_pooled_mins, mod and vig = mod_vig_pooled_mins
# lifestyle variables: urb_score, market_diet_index
results_list <- list()
for (var in peptide_zs) {
# peptide ~ urb (is urb a significant predictor of peptide?)
# f <- as.formula(paste(var, "~ market_diet_index", "+", paste(covariates, collapse = " + ")))
f <- as.formula(paste(var, "~ market_diet_index"))
# linear regression
model <- lm(f, data = dat_clean)
# for gams
# model <- gam(f, data = dat_clean)
# coefficients from lm
coef_summary <- summary(model)$coefficients
est <- coef(model)[2]
se <- coef_summary[2, "Std. Error"]
pval <- coef_summary[2, "Pr(>|t|)"]
# coefficients from gam
# est <- summary(model)$p.coeff[2]
# se <- summary(model)$se[2]
# pval <- summary(model)$p.pv[2]
# effect size and CI
effect_size <- est
ci_low <- (est - 1.96 * se)
ci_high <- (est + 1.96 * se)
# append
results_list[[var]] <- data.frame(
variable = var,
effect_size = effect_size,
ci_low = ci_low,
ci_high = ci_high,
p_value = pval
)
}
# results df (all peptides)
results_df <- do.call(rbind, results_list)
# order by p-value (lowest to highest)
results_df$variable <- factor(results_df$variable, levels = results_df$variable[order(1-results_df$p_value)])
# forest plot
ggplot(results_df, aes(x = variable, y = effect_size)) +
geom_point() +
geom_errorbar(aes(ymin = ci_low, ymax = ci_high), width = 0.2) +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
#geom_text(aes(label = sprintf("p = %.3f", p_value)), hjust = -4, size = 3) +
coord_flip() +
labs(
title = "Forest Plot of Market Diet Index Effect Sizes",
y = "Estimate (95% CI)",
x = "Peptide"
) +
theme_pomol_leg()