library(broom)
library(compositions)
library(glue)
library(mediation)
library(progress)
library(tidyverse)Mediation and Software
Setup
filter_mean <- function(x, threshold = 4) {
col_means <- colMeans(x)
x[, col_means > threshold]
}
simplify_tax_names <- function(x) {
colnames(x) <- str_extract(colnames(x), "g__[A-z]+")
x
}
simplify_metab_names <- function(x) {
colnames(x) <- colnames(x) |>
str_remove_all("[[:punct:|NA| ]]") |>
str_replace_all("-", "_")
x
}Preprocessing
Sys.setenv("VROOM_CONNECTION_SIZE" = 5e6)
taxa <- read_tsv("https://raw.githubusercontent.com/borenstein-lab/microbiome-metabolome-curated-data/main/data/processed_data/FRANZOSA_IBD_2019/genera.tsv")[, -1]
metabolites <- read_tsv("https://raw.githubusercontent.com/borenstein-lab/microbiome-metabolome-curated-data/main/data/processed_data/FRANZOSA_IBD_2019/mtb.tsv")[, -1]
metadata <- read_tsv("https://github.com/borenstein-lab/microbiome-metabolome-curated-data/raw/main/data/processed_data/FRANZOSA_IBD_2019/metadata.tsv")taxa <- clr(taxa) |>
filter_mean() |>
simplify_tax_names()
metabolites <- log(1 + metabolites) |>
filter_mean(threshold = 3) |>
simplify_metab_names()
combined <- metabolites |>
bind_cols(taxa) |>
bind_cols(metadata) |>
mutate(treatment = ifelse(Study.Group == "Control", "control", "treatment"))Fit Models
taxa_names <- colnames(taxa)[1:20]
metabolite_names <- colnames(metabolites)[1:20]
mediations <- list()pb <- progress_bar$new(total = 400, format = "[:bar] :percent eta: :eta")
for (i in seq_along(metabolite_names)) {
for (i_prime in seq_along(taxa_names)) {
fit1 <- lm(formula(glue("{metabolite_names[i]} ~ treatment")), data = combined)
fit2 <- lm(formula(glue("{metabolite_names[i]} ~ treatment + {taxa_names[i_prime]}")), data = combined)
pair <- glue("{metabolite_names[i]}_{taxa_names[i_prime]}")
mediations[[pair]] <- mediate(fit1, fit2, sims = 100, treat = "treatment", mediator = taxa_names[i_prime], control.value = "control", treat.value = "treatment")
pb$tick()
}
}Summary Statistics
p_values <- map_dfr(mediations, ~ tidy(.), .id = "pair") |>
mutate(
p.value = ifelse(p.value == 0, 5e-3, p.value)
) |>
separate_wider_regex(pair, c(metabolite = ".*", "_g__", taxon = ".*")) |>
mutate(term = case_when(
term == "acme_0" ~ "Indirect(0)",
term == "acme_1" ~ "Indirect(1)",
term == "ade_0" ~ "Direct(0)",
term == "ade_1" ~ "Direct(1)"
))
ggplot(p_values) +
geom_point(aes(estimate, -log(p.value))) +
facet_wrap(~ term) +
labs(
x = "Effect Estimate",
y = expression(-log(p))
) +
theme(
axis.title = element_text(size = 16),
axis.text = element_text(size = 12),
strip.text = element_text(size = 14)
)p_values |>
filter(term == "Indirect(0)", abs(estimate) > .4, -log(p.value) > 4) |>
arrange(-abs(estimate)) |>
mutate(across(estimate:p.value, round, 3))
#> # A tibble: 92 x 6
#> metabolite taxon term estimate std.error p.value
#> <chr> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 C18_eg_Clser_0018 Streptomyces Indirect(~ 3.09 1.26 0.005
#> 2 C18_eg_Clser_0011 Copromorpha Indirect(~ -2.74 0.443 0.005
#> 3 C18_eg_Clser_0011 Faecousia Indirect(~ -2.70 0.355 0.005
#> 4 C18_eg_Clser_0018 Faecousia Indirect(~ 2.45 0.745 0.005
#> 5 C18_eg_Clser_0011 Lachnoclostridium_B Indirect(~ -2.23 0.411 0.005
#> 6 C18_eg_Clser_0011 Marvinbryantia Indirect(~ -2.21 0.461 0.005
#> 7 C18_eg_Clser_0023 Marvinbryantia Indirect(~ -2.19 0.555 0.005
#> 8 C18_eg_Clser_0011 Anaeromassilibacillus Indirect(~ -2.15 0.378 0.005
#> 9 C18_eg_Clser_0018 Anaeromassilibacillus Indirect(~ 2.15 0.629 0.005
#> 10 C18_eg_Clser_0023 Faecousia Indirect(~ -2.1 0.458 0.005
#> # i 82 more rowsInterpretation
asv <- str_c("g__", "Faecousia")
mtb <- "C18_eg_Clser_0011"
ggplot(combined) +
geom_boxplot(aes(treatment, .data[[asv]], fill = treatment))
ggplot(combined) +
geom_boxplot(aes(treatment, .data[[mtb]], fill = treatment)) +
theme(
axis.title = element_text(size = 16),
axis.text = element_text(size = 12),
strip.text = element_text(size = 14)
)
ggplot(combined, aes(.data[[asv]], .data[[mtb]])) +
stat_smooth(method = "lm", col = "#d3d3d3") +
geom_point(aes(col = treatment), position = position_jitter(w = 0.05)) +
theme(
axis.title = element_text(size = 16),
axis.text = element_text(size = 12),
strip.text = element_text(size = 14)
)