there are NO multiple testing corrections on any of these results

does average daily step count predict OA peptide z scores?

model each peptide z score independently with predictors step count, age, sex, bmi, and presence/absence of symptomatic knee OA

1 peptide in which steps is a significant predictor: nalwhtgdtesqvr (COMP)

If we run the models without bmi and OA status covariates, results are the same. GAMs (rather than LMs) give same results

covariates <- c("age", "sex", "bmi", "symptomatic_OA")  

peptide_zs <- c("aeieylek_z",  "afpaltsldlsdnpglger_z", "diaptltlyvgk_z", "dspvlidffedter_z", "efnplvivglsk_z", "eylgaicsctcfggqr_z", "fagvfhvek_z", "fceigsddcyvgdgysyr_z", "fgsslitvr_z", "ftvdrpflfliyehr_z", "fvfgttpedilr_z", "ggegtgyfvdfsvr_z", "gvaslfagr_z", "gygfytk_z", "hqpqefptyveptndeiceafr_z", "htlnqidevk_z", "ialggllfpasnlr_z", "icldlqaplyk_z", "iihfgtr_z", "itevwgipspidtvftr_z", "laipegk_z", "lcqdlgpgafr_z", "ldddlehqgghvldhghk_z", "livhngycdgr_z", "llevpegr_z", "lqaeafqar_z", "lvllnaiylsak_z", "lvngqshislsk_z", "nalwhtgdtesqvr_z", "qpqfisr_z", "scesnspfpvhpgtaectk_z", "sqcledhtwappfpick_z", "sspyyalr_z", "tgyyfdgisr_z", "vapeehpvllteaplnpk_z", "vistleeptpqcptsqgr_z", "vleptlk_z", "vvnptll_z", "yycfqgnqflr_z")

# pa variables: steps = ad_tot_step_count_0_24hr, light mins = dur_day_total_lig_min_pla, mod mins = dur_day_total_mod_min_pla, vig mins = dur_day_total_vig_min_pla, light and mod = lig_mod_pooled_mins, mod and vig = mod_vig_pooled_mins

# lifestyle variables: urb_score, market_diet_index


results_list <- list()

for (var in peptide_zs) {
  # peptide ~ PA (is PA a significant predictor of peptide?)
  f <- as.formula(paste(var, "~ ad_tot_step_count_0_24hr", "+", paste(covariates, collapse = " + ")))
  
  # linear regression
  model <- lm(f, data = dat_clean)
  
  # for gams
  # model <- gam(f, data = dat_clean)
  
  # coefficients from lm
  coef_summary <- summary(model)$coefficients
  est <- coef(model)[2]
  se <- coef_summary[2, "Std. Error"]
  pval <- coef_summary[2, "Pr(>|t|)"]

  # # coefficients from gam
  #  est <- summary(model)$p.coeff[2]
  #  se <- summary(model)$se[2]
  #  pval <- summary(model)$p.pv[2]

  # effect size and CI
  effect_size <- est
  ci_low <- (est - 1.96 * se)
  ci_high <- (est + 1.96 * se)
  
  # append
  results_list[[var]] <- data.frame(
    variable = var,
    effect_size = effect_size,
    ci_low = ci_low,
    ci_high = ci_high,
    p_value = pval
  )
}

# results df (all peptides)
results_df <- do.call(rbind, results_list)
# order by p-value (lowest to highest)
results_df$variable <- factor(results_df$variable, levels = results_df$variable[order(1-results_df$p_value)])

# forest plot
ggplot(results_df, aes(x = variable, y = effect_size)) +
  geom_point() +
  geom_errorbar(aes(ymin = ci_low, ymax = ci_high), width = 0.2) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  #geom_text(aes(label = sprintf("p = %.3f", p_value)), hjust = -4, size = 3) +
  coord_flip() +
  labs(
    title = "Forest Plot of Average Daily Step Count Effect Sizes",
    y = "Estimate (95% CI)",
    x = "Peptide"
  ) +
  theme_pomol_leg()

does average daily minutes of moderate and vigorous intensity physical activity predict OA peptide z scores?

model each peptide z score independently with predictors mvpa, age, sex, bmi, and presence/absence of symptomatic knee OA

1 peptide in which mvpa is a significant predictor: nalwhtgdtesqvr (COMP)

If we run the models without bmi and OA status covariates, results are the same. GAMs (rather than LMs) give same results

covariates <- c("age", "sex", "bmi", "symptomatic_OA")  

peptide_zs <- c("aeieylek_z",  "afpaltsldlsdnpglger_z", "diaptltlyvgk_z", "dspvlidffedter_z", "efnplvivglsk_z", "eylgaicsctcfggqr_z", "fagvfhvek_z", "fceigsddcyvgdgysyr_z", "fgsslitvr_z", "ftvdrpflfliyehr_z", "fvfgttpedilr_z", "ggegtgyfvdfsvr_z", "gvaslfagr_z", "gygfytk_z", "hqpqefptyveptndeiceafr_z", "htlnqidevk_z", "ialggllfpasnlr_z", "icldlqaplyk_z", "iihfgtr_z", "itevwgipspidtvftr_z", "laipegk_z", "lcqdlgpgafr_z", "ldddlehqgghvldhghk_z", "livhngycdgr_z", "llevpegr_z", "lqaeafqar_z", "lvllnaiylsak_z", "lvngqshislsk_z", "nalwhtgdtesqvr_z", "qpqfisr_z", "scesnspfpvhpgtaectk_z", "sqcledhtwappfpick_z", "sspyyalr_z", "tgyyfdgisr_z", "vapeehpvllteaplnpk_z", "vistleeptpqcptsqgr_z", "vleptlk_z", "vvnptll_z", "yycfqgnqflr_z")

# pa variables: steps = ad_tot_step_count_0_24hr, light mins = dur_day_total_lig_min_pla, mod mins = dur_day_total_mod_min_pla, vig mins = dur_day_total_vig_min_pla, light and mod = lig_mod_pooled_mins, mod and vig = mod_vig_pooled_mins

# lifestyle variables: urb_score, market_diet_index

results_list <- list()

for (var in peptide_zs) {
  # peptide ~ PA (is PA a significant predictor of peptide?)
  f <- as.formula(paste(var, "~ mod_vig_pooled_mins", "+", paste(covariates, collapse = " + ")))
  
  # linear regression
  model <- lm(f, data = dat_clean)
  
  # for gams
  # model <- gam(f, data = dat_clean)
  
  # coefficients from lm
  coef_summary <- summary(model)$coefficients
  est <- coef(model)[2]
  se <- coef_summary[2, "Std. Error"]
  pval <- coef_summary[2, "Pr(>|t|)"]
  
  # coefficients from gam
  # est <- summary(model)$p.coeff[2]
  # se <- summary(model)$se[2]
  # pval <- summary(model)$p.pv[2]
  
  # effect size and CI
  effect_size <- est
  ci_low <- (est - 1.96 * se)
  ci_high <- (est + 1.96 * se)
  
  # append
  results_list[[var]] <- data.frame(
    variable = var,
    effect_size = effect_size,
    ci_low = ci_low,
    ci_high = ci_high,
    p_value = pval
  )
}

# results df (all peptides)
results_df <- do.call(rbind, results_list)
# order by p-value (lowest to highest)
results_df$variable <- factor(results_df$variable, levels = results_df$variable[order(1-results_df$p_value)])

# forest plot
ggplot(results_df, aes(x = variable, y = effect_size)) +
  geom_point() +
  geom_errorbar(aes(ymin = ci_low, ymax = ci_high), width = 0.2) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  #geom_text(aes(label = sprintf("p = %.3f", p_value)), hjust = -4, size = 3) +
  coord_flip() +
  labs(
    title = "Forest Plot of MVPA Effect Sizes",
    y = "Estimate (95% CI)",
    x = "Peptide"
  ) +
  theme_pomol_leg()

does urbanicity index predict OA peptide z scores?

model each peptide z score independently with urbanicity score predictor. results vary depending the adjustment set of covariates included. forest plot here shows results with no covariate adjustment.

6 peptides in which urbanicity is a significant predictor if no adjustment for covariates: itevwgipspidtvftr (PRG4), iihfgtr (CO5), aeieylek (LYAM1), vistleeptpqcptsqgr (DOPO), icldlqaplyk (PLF4), vleptlk (VTDB)

3 peptides in which urbanicity is a significant (positive) predictor if bmi is not included in adjustment set (control for age and sex only): itevwgipspidtvftr (PRG4), iihfgtr (CO5), livhngycdgr (RET4)

3 peptides in which urbanicity is a significant predictor if adjustment set includes bmi (control for age, sex, bmi). Controlling for OA status does not alter these results: icldlqaplyk (PLF4), iihfgtr (CO5), fvfgttpedilr (TSP1)

GAMs rather than LMs do not alter results

# covariates <- c("age", "sex", "bmi", "symptomatic_OA")  

peptide_zs <- c("aeieylek_z",  "afpaltsldlsdnpglger_z", "diaptltlyvgk_z", "dspvlidffedter_z", "efnplvivglsk_z", "eylgaicsctcfggqr_z", "fagvfhvek_z", "fceigsddcyvgdgysyr_z", "fgsslitvr_z", "ftvdrpflfliyehr_z", "fvfgttpedilr_z", "ggegtgyfvdfsvr_z", "gvaslfagr_z", "gygfytk_z", "hqpqefptyveptndeiceafr_z", "htlnqidevk_z", "ialggllfpasnlr_z", "icldlqaplyk_z", "iihfgtr_z", "itevwgipspidtvftr_z", "laipegk_z", "lcqdlgpgafr_z", "ldddlehqgghvldhghk_z", "livhngycdgr_z", "llevpegr_z", "lqaeafqar_z", "lvllnaiylsak_z", "lvngqshislsk_z", "nalwhtgdtesqvr_z", "qpqfisr_z", "scesnspfpvhpgtaectk_z", "sqcledhtwappfpick_z", "sspyyalr_z", "tgyyfdgisr_z", "vapeehpvllteaplnpk_z", "vistleeptpqcptsqgr_z", "vleptlk_z", "vvnptll_z", "yycfqgnqflr_z")

# pa variables: steps = ad_tot_step_count_0_24hr, light mins = dur_day_total_lig_min_pla, mod mins = dur_day_total_mod_min_pla, vig mins = dur_day_total_vig_min_pla, light and mod = lig_mod_pooled_mins, mod and vig = mod_vig_pooled_mins

# lifestyle variables: urb_score, market_diet_index

results_list <- list()

for (var in peptide_zs) {
  # peptide ~ urb (is urb a significant predictor of peptide?)
  # f <- as.formula(paste(var, "~ urb_score", "+", paste(covariates, collapse = " + ")))
  f <- as.formula(paste(var, "~ urb_score"))
  
  # linear regression
  model <- lm(f, data = dat_clean)
  
  # for gams
  # model <- gam(f, data = dat_clean)
  
  # coefficients from lm
  coef_summary <- summary(model)$coefficients
  est <- coef(model)[2]
  se <- coef_summary[2, "Std. Error"]
  pval <- coef_summary[2, "Pr(>|t|)"]
  
  # coefficients from gam
  # est <- summary(model)$p.coeff[2]
  # se <- summary(model)$se[2]
  # pval <- summary(model)$p.pv[2]
  
  # effect size and CI
  effect_size <- est
  ci_low <- (est - 1.96 * se)
  ci_high <- (est + 1.96 * se)
  
  # append
  results_list[[var]] <- data.frame(
    variable = var,
    effect_size = effect_size,
    ci_low = ci_low,
    ci_high = ci_high,
    p_value = pval
  )
}

# results df (all peptides)
results_df <- do.call(rbind, results_list)
# order by p-value (lowest to highest)
results_df$variable <- factor(results_df$variable, levels = results_df$variable[order(1-results_df$p_value)])

# forest plot
ggplot(results_df, aes(x = variable, y = effect_size)) +
  geom_point() +
  geom_errorbar(aes(ymin = ci_low, ymax = ci_high), width = 0.2) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  #geom_text(aes(label = sprintf("p = %.3f", p_value)), hjust = -4, size = 3) +
  coord_flip() +
  labs(
    title = "Forest Plot of Urbanicity Effect Sizes",
    y = "Estimate (95% CI)",
    x = "Peptide"
  ) +
  theme_pomol_leg()

does market diet index predict OA peptide z scores?

model each peptide z score independently with market diet index predictor. results vary depending the adjustment set of covariates included. forest plot here shows results with no covariate adjustment.

4 peptides in which urbanicity is a significant predictor if no adjustment for covariates: iihfgtr (CO5), lvllnaiylsak_z (IC1), aeieylek (LYAM1), tgyyfdgisr_z (FBLN1)

4 peptides in which urbanicity is a significant (positive) predictor if bmi is not included in adjustment set (control for age and sex only): iihfgtr (CO5), tgyyfdgisr_z (FBLN1), lvllnaiylsak_z (IC1), aeieylek (LYAM1)

3 peptides in which urbanicity is a significant predictor if adjustment set includes bmi (control for age, sex, bmi). Controlling for OA status does not alter these results: lvllnaiylsak_z (IC1), iihfgtr (CO5), tgyyfdgisr_z (FBLN1)

# covariates <- c("age", "sex", "bmi")  

peptide_zs <- c("aeieylek_z",  "afpaltsldlsdnpglger_z", "diaptltlyvgk_z", "dspvlidffedter_z", "efnplvivglsk_z", "eylgaicsctcfggqr_z", "fagvfhvek_z", "fceigsddcyvgdgysyr_z", "fgsslitvr_z", "ftvdrpflfliyehr_z", "fvfgttpedilr_z", "ggegtgyfvdfsvr_z", "gvaslfagr_z", "gygfytk_z", "hqpqefptyveptndeiceafr_z", "htlnqidevk_z", "ialggllfpasnlr_z", "icldlqaplyk_z", "iihfgtr_z", "itevwgipspidtvftr_z", "laipegk_z", "lcqdlgpgafr_z", "ldddlehqgghvldhghk_z", "livhngycdgr_z", "llevpegr_z", "lqaeafqar_z", "lvllnaiylsak_z", "lvngqshislsk_z", "nalwhtgdtesqvr_z", "qpqfisr_z", "scesnspfpvhpgtaectk_z", "sqcledhtwappfpick_z", "sspyyalr_z", "tgyyfdgisr_z", "vapeehpvllteaplnpk_z", "vistleeptpqcptsqgr_z", "vleptlk_z", "vvnptll_z", "yycfqgnqflr_z")

# pa variables: steps = ad_tot_step_count_0_24hr, light mins = dur_day_total_lig_min_pla, mod mins = dur_day_total_mod_min_pla, vig mins = dur_day_total_vig_min_pla, light and mod = lig_mod_pooled_mins, mod and vig = mod_vig_pooled_mins

# lifestyle variables: urb_score, market_diet_index

results_list <- list()

for (var in peptide_zs) {
  # peptide ~ urb (is urb a significant predictor of peptide?)
  # f <- as.formula(paste(var, "~ market_diet_index", "+", paste(covariates, collapse = " + ")))
  f <- as.formula(paste(var, "~ market_diet_index"))

  # linear regression
  model <- lm(f, data = dat_clean)
  
  # for gams
  # model <- gam(f, data = dat_clean)
  
  # coefficients from lm
  coef_summary <- summary(model)$coefficients
  est <- coef(model)[2]
  se <- coef_summary[2, "Std. Error"]
  pval <- coef_summary[2, "Pr(>|t|)"]
  
  # coefficients from gam
  # est <- summary(model)$p.coeff[2]
  # se <- summary(model)$se[2]
  # pval <- summary(model)$p.pv[2]
  
  # effect size and CI
  effect_size <- est
  ci_low <- (est - 1.96 * se)
  ci_high <- (est + 1.96 * se)
  
  # append
  results_list[[var]] <- data.frame(
    variable = var,
    effect_size = effect_size,
    ci_low = ci_low,
    ci_high = ci_high,
    p_value = pval
  )
}

# results df (all peptides)
results_df <- do.call(rbind, results_list)
# order by p-value (lowest to highest)
results_df$variable <- factor(results_df$variable, levels = results_df$variable[order(1-results_df$p_value)])

# forest plot
ggplot(results_df, aes(x = variable, y = effect_size)) +
  geom_point() +
  geom_errorbar(aes(ymin = ci_low, ymax = ci_high), width = 0.2) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  #geom_text(aes(label = sprintf("p = %.3f", p_value)), hjust = -4, size = 3) +
  coord_flip() +
  labs(
    title = "Forest Plot of Market Diet Index Effect Sizes",
    y = "Estimate (95% CI)",
    x = "Peptide"
  ) +
  theme_pomol_leg()