Mediation and Software

Setup

library(broom)
library(compositions)
library(glue)
library(mediation)
library(progress)
library(tidyverse)

filter_mean <- function(x, threshold = 4) {
  col_means <- colMeans(x)
  x[, col_means > threshold]
}

simplify_tax_names <- function(x) {
  colnames(x) <- str_extract(colnames(x), "g__[A-z]+")
  x
}

simplify_metab_names <- function(x) {
  colnames(x) <- colnames(x) |>
    str_remove_all("[[:punct:|NA| ]]") |>
    str_replace_all("-", "_")
  x
}

Preprocessing

Sys.setenv("VROOM_CONNECTION_SIZE" = 5e6)
taxa <- read_tsv("https://raw.githubusercontent.com/borenstein-lab/microbiome-metabolome-curated-data/main/data/processed_data/FRANZOSA_IBD_2019/genera.tsv")[, -1]
metabolites <- read_tsv("https://raw.githubusercontent.com/borenstein-lab/microbiome-metabolome-curated-data/main/data/processed_data/FRANZOSA_IBD_2019/mtb.tsv")[, -1]
metadata <- read_tsv("https://github.com/borenstein-lab/microbiome-metabolome-curated-data/raw/main/data/processed_data/FRANZOSA_IBD_2019/metadata.tsv")

taxa <- clr(taxa) |>
  filter_mean() |>
  simplify_tax_names()

metabolites <- log(1 + metabolites) |>
  filter_mean(threshold = 3) |>
  simplify_metab_names()
  
combined <- metabolites |>
  bind_cols(taxa) |>
  bind_cols(metadata) |>
  mutate(treatment = ifelse(Study.Group == "Control", "control", "treatment"))

Fit Models

taxa_names <- colnames(taxa)[1:20]
metabolite_names <- colnames(metabolites)[1:20]
mediations <- list()

pb <- progress_bar$new(total = 400, format = "[:bar] :percent eta: :eta")
for (i in seq_along(metabolite_names)) {
  for (i_prime in seq_along(taxa_names)) {
    fit1 <- lm(formula(glue("{metabolite_names[i]} ~ treatment")), data = combined)
    fit2 <- lm(formula(glue("{metabolite_names[i]} ~ treatment + {taxa_names[i_prime]}")), data = combined)
    pair <- glue("{metabolite_names[i]}_{taxa_names[i_prime]}")
    mediations[[pair]] <- mediate(fit1, fit2, sims = 100, treat = "treatment", mediator = taxa_names[i_prime], control.value = "control", treat.value = "treatment")
    pb$tick()
  }
}

Summary Statistics

p_values <- map_dfr(mediations, ~ tidy(.), .id = "pair") |>
  mutate(
    p.value = ifelse(p.value == 0, 5e-3, p.value)
  ) |>
  separate_wider_regex(pair, c(metabolite = ".*", "_g__", taxon = ".*")) |>
  mutate(term = case_when(
    term == "acme_0" ~ "Indirect(0)",
    term == "acme_1" ~ "Indirect(1)",
    term == "ade_0" ~ "Direct(0)",
    term == "ade_1" ~ "Direct(1)"
  ))

ggplot(p_values) +
  geom_point(aes(estimate, -log(p.value))) +
  facet_wrap(~ term) +
  labs(
    x = "Effect Estimate",
    y = expression(-log(p))
  ) +
  theme(
    axis.title = element_text(size = 16),
    axis.text = element_text(size = 12),
    strip.text = element_text(size = 14)
  )

p_values |>
  filter(term == "Indirect(0)", abs(estimate) > .4, -log(p.value) > 4) |>
  arrange(-abs(estimate)) |>
  mutate(across(estimate:p.value, round, 3))
#> # A tibble: 92 x 6
#>    metabolite        taxon                 term       estimate std.error p.value
#>    <chr>             <chr>                 <chr>         <dbl>     <dbl>   <dbl>
#>  1 C18_eg_Clser_0018 Streptomyces          Indirect(~     3.09     1.26    0.005
#>  2 C18_eg_Clser_0011 Copromorpha           Indirect(~    -2.74     0.443   0.005
#>  3 C18_eg_Clser_0011 Faecousia             Indirect(~    -2.70     0.355   0.005
#>  4 C18_eg_Clser_0018 Faecousia             Indirect(~     2.45     0.745   0.005
#>  5 C18_eg_Clser_0011 Lachnoclostridium_B   Indirect(~    -2.23     0.411   0.005
#>  6 C18_eg_Clser_0011 Marvinbryantia        Indirect(~    -2.21     0.461   0.005
#>  7 C18_eg_Clser_0023 Marvinbryantia        Indirect(~    -2.19     0.555   0.005
#>  8 C18_eg_Clser_0011 Anaeromassilibacillus Indirect(~    -2.15     0.378   0.005
#>  9 C18_eg_Clser_0018 Anaeromassilibacillus Indirect(~     2.15     0.629   0.005
#> 10 C18_eg_Clser_0023 Faecousia             Indirect(~    -2.1      0.458   0.005
#> # i 82 more rows

Interpretation

asv <- str_c("g__", "Faecousia")
mtb <- "C18_eg_Clser_0011"

ggplot(combined) +
  geom_boxplot(aes(treatment, .data[[asv]], fill = treatment))


ggplot(combined) +
  geom_boxplot(aes(treatment, .data[[mtb]], fill = treatment)) +
  theme(
    axis.title = element_text(size = 16),
    axis.text = element_text(size = 12),
    strip.text = element_text(size = 14)
  )


ggplot(combined, aes(.data[[asv]], .data[[mtb]])) +
  stat_smooth(method = "lm", col = "#d3d3d3") +
  geom_point(aes(col = treatment), position = position_jitter(w = 0.05)) +
  theme(
    axis.title = element_text(size = 16),
    axis.text = element_text(size = 12),
    strip.text = element_text(size = 14)
  )