setwd("C:/Users/ROSFRB/Desktop/Mult2")
#The earthy rainbow pallette.
# ---- Earthy palette (15 colors) ----
pal_earth15 <- c(
"#2F5D50", # pine
"#6B8F71", # sage
"#8C9C68", # olive
"#4E7A48", # moss
"#C9A227", # ochre
"#B98E5A", # tan
"#D8C7A0", # sand
"#C46D3B", # terracotta
"#B56969", # clay
"#8C4A2F", # rust
"#7D5A4F", # umber
"#5B7065", # slate
"#2F6F72", # deep teal
"#3F4E6B", # blue slate
"#50514F" # stone
)
# Convenience ggplot2 scales
scale_fill_earth15 <- function(...) ggplot2::scale_fill_manual(values = pal_earth15, ...)
scale_color_earth15 <- function(...) ggplot2::scale_color_manual(values = pal_earth15, ...)
# Packages
install_if_missing <- function(pkgs){
miss <- pkgs[!vapply(pkgs, requireNamespace, logical(1), quietly = TRUE)]
if(length(miss)) install.packages(miss, dependencies = TRUE)
}
pkgs <- c("tidyverse","janitor","matrixStats","sva")
install_if_missing(pkgs)
lapply(pkgs, library, character.only = TRUE)
## Warning: package 'tidyverse' was built under R version 4.5.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Warning: package 'janitor' was built under R version 4.5.1
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
## Warning: package 'matrixStats' was built under R version 4.5.1
##
## Attaching package: 'matrixStats'
##
## The following object is masked from 'package:dplyr':
##
## count
##
## Loading required package: mgcv
## Loading required package: nlme
##
## Attaching package: 'nlme'
##
## The following object is masked from 'package:dplyr':
##
## collapse
##
## This is mgcv 1.9-1. For overview type 'help("mgcv-package")'.
## Loading required package: genefilter
##
## Attaching package: 'genefilter'
##
## The following objects are masked from 'package:matrixStats':
##
## rowSds, rowVars
##
## The following object is masked from 'package:readr':
##
## spec
##
## Loading required package: BiocParallel
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "janitor" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "matrixStats" "janitor" "lubridate" "forcats" "stringr"
## [6] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [11] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [16] "utils" "datasets" "methods" "base"
##
## [[4]]
## [1] "sva" "BiocParallel" "genefilter" "mgcv" "nlme"
## [6] "matrixStats" "janitor" "lubridate" "forcats" "stringr"
## [11] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [16] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [21] "utils" "datasets" "methods" "base"
BiocManager::install("sva")
## Bioconductor version 3.21 (BiocManager 1.30.26), R 4.5.0 (2025-04-11 ucrt)
## Warning: package(s) not installed when version(s) same as or greater than current; use
## `force = TRUE` to re-install: 'sva'
## Installation paths not writeable, unable to update packages
## path: C:/Program Files/R/R-4.5.0/library
## packages:
## boot, lattice, Matrix, mgcv
## Old packages: 'cli', 'DESeq2', 'purrr', 'RcppArmadillo', 'stringr', 'tibble',
## 'xfun'
library(pheatmap)
## Warning: package 'pheatmap' was built under R version 4.5.1
library(uwot)
## Warning: package 'uwot' was built under R version 4.5.1
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(vegan)
## Warning: package 'vegan' was built under R version 4.5.1
## Loading required package: permute
## Warning: package 'permute' was built under R version 4.5.1
install.packages("sva")
## Warning: package 'sva' is in use and will not be installed
library("sva")
# Paths
# Set this if your CSVs are elsewhere
base_dir <- "."
paths <- list(
metadata = file.path(base_dir, "metadata.csv"),
otu_table = file.path(base_dir, "otu_table.csv"),
metabolites = file.path(base_dir, "metabolites.csv"),
tx_counts = file.path(base_dir, "transcriptomics_counts.csv"),
tx_logcpm = file.path(base_dir, "transcriptomics_logCPM.csv"),
feature_annotations = file.path(base_dir, "feature_annotations.csv"),
ground_truth_edges = file.path(base_dir, "ground_truth_edges.csv")
)
# Load
meta <- read.csv(paths$metadata, check.names = FALSE) %>% clean_names()
fa <- read.csv(paths$feature_annotations, check.names = FALSE)
otu <- read.csv(paths$otu_table, check.names = FALSE, row.names = 1)
met <- read.csv(paths$metabolites, check.names = FALSE)
txc <- read.csv(paths$tx_counts, check.names = FALSE, row.names = 1)
# Prefer provided logCPM, else compute a simple logCPM
if (file.exists(paths$tx_logcpm)) {
txl <- read.csv(paths$tx_logcpm, check.names = FALSE, row.names = 1)
} else {
message("transcriptomics_logCPM.csv not found — computing a simple logCPM from counts.")
libsize <- rowSums(txc)
cpm <- sweep(txc, 1, libsize, "/") * 1e6
txl <- log2(cpm + 1)
}
# Harmonize samples
# Ensure all layers share the same sample set & order
samples_all <- Reduce(intersect, list(
meta$sample_id,
rownames(otu),
met$sample_id,
rownames(txc),
rownames(txl)
))
stopifnot(length(samples_all) > 0)
meta <- meta %>% filter(sample_id %in% samples_all)
meta <- meta %>% mutate(
group = factor(group, levels = c("Vegan","Vegetarian","Carnivore")),
batch = factor(batch),
sex = factor(sex)
)
otu <- otu[ samples_all, , drop = FALSE ]
met <- met %>% filter(sample_id %in% samples_all)
txc <- txc[ samples_all, , drop = FALSE ]
txl <- txl[ samples_all, , drop = FALSE ]
# Align row order across tables
ord <- meta$sample_id
otu <- otu[ord, , drop = FALSE]
met <- met %>% arrange(match(sample_id, ord))
txc <- txc[ord, , drop = FALSE]
txl <- txl[ord, , drop = FALSE]
# Feature sets & labels
# From feature_annotations.csv
taxa <- fa %>% filter(type == "Taxon") %>% pull(feature)
metabolites <- fa %>% filter(type == "Metabolite") %>% pull(feature)
genes_pro <- fa %>% filter(type == "Gene", class == "Pro_inflammatory") %>% pull(feature)
genes_anti <- fa %>% filter(type == "Gene", class == "Anti_inflammatory") %>% pull(feature)
# Microbiome preparation
# Total-sum scaling (proportions)
otu_mat <- as.matrix(otu)
row_sums <- rowSums(otu_mat)
if (any(row_sums == 0)) {
warning("Some samples have zero total counts; adding a small pseudocount.")
otu_mat[row_sums == 0, ] <- otu_mat[row_sums == 0, ] + 1L
row_sums <- rowSums(otu_mat)
}
otu_prop <- sweep(otu_mat, 1, row_sums, "/")
# CLR transform with small pseudocount (robust for compositional analyses)
pseudocount <- 0.5
logp <- log(otu_prop + pseudocount / row_sums)
clr_center <- rowMeans(logp)
otu_clr <- sweep(logp, 1, clr_center, "-")
# Metabolomics preparation
# Keep only metabolite columns (safety)
met_mat <- met %>% select(any_of(metabolites)) %>% as.matrix()
# Log-transform
met_log <- log1p(met_mat)
met_scaled <- scale(met_log)
# SCFA composite: (Acetate + Propionate + Butyrate) - (Isobutyrate + Isovalerate)
scfa_cols <- intersect(colnames(met_mat), c("Acetate","Propionate","Butyrate","Isobutyrate","Isovalerate"))
stopifnot(all(c("Acetate","Propionate","Butyrate","Isobutyrate","Isovalerate") %in% scfa_cols))
scfa_score <- with(as.data.frame(met_mat),
Acetate + Propionate + 1.3*Butyrate - (Isobutyrate + Isovalerate)
)
scfa_score_z <- as.numeric(scale(scfa_score))
# batch-correct metabolites (recommended for DR/clustering)
# Adjusting for group + BMI/sex/age in the model matrix
combat_mod <- model.matrix(~ group + bmi + sex + age, data = meta)
met_bc <- sva::ComBat(dat = t(met_scaled),
batch = meta$batch,
mod = combat_mod,
par.prior = TRUE, prior.plots = FALSE)
## Found2batches
## Adjusting for5covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
met_bc <- t(met_bc)
# Transcriptomics preparation
tx_logcpm <- as.matrix(txl)
tx_logcpm_scaled <- scale(tx_logcpm)
# Gene-set scores (mean z across member genes)
score_set <- function(gene_set, zmat){
keep <- intersect(gene_set, colnames(zmat))
if (length(keep) == 0) return(rep(NA_real_, nrow(zmat)))
rowMeans(zmat[, keep, drop = TRUE])
}
pro_score <- score_set(genes_pro, tx_logcpm_scaled)
anti_score <- score_set(genes_anti, tx_logcpm_scaled)
gene_scores <- tibble(
sample_id = ord,
pro_infl_score = pro_score,
anti_infl_score = anti_score
)
# Master objects for analysis
# Matrices ready for: correlations, DR, clustering, networks
X_micro_clr <- otu_clr
X_metab_bc <- met_bc
X_transcript <- tx_logcpm_scaled
# Convenience long forms for plotting/networking
micro_long <- as_tibble(X_micro_clr, rownames = "sample_id") %>%
pivot_longer(-sample_id, names_to = "feature", values_to = "clr_abundance") %>%
left_join(meta, by = "sample_id") %>%
left_join(fa %>% filter(type=="Taxon") %>% select(feature, class), by = c("feature"="feature"))
metab_long <- as_tibble(X_metab_bc, rownames = "sample_id") %>%
pivot_longer(-sample_id, names_to = "metabolite", values_to = "scaled_value") %>%
left_join(meta, by = "sample_id") %>%
left_join(fa %>% filter(type=="Metabolite") %>% select(feature, class),
by = c("metabolite"="feature"))
tx_long <- as_tibble(X_transcript, rownames = "sample_id") %>%
pivot_longer(-sample_id, names_to = "gene", values_to = "z_expr") %>%
left_join(meta, by = "sample_id") %>%
left_join(
fa %>% filter(type=="Gene") %>% select(feature, class),
by = c("gene"="feature")
)
# Add scores to metadata
meta_enhanced <- meta %>%
mutate(scfa_score = scfa_score_z) %>%
left_join(gene_scores, by = "sample_id")
# Ground truth edges
gt_edges <- NULL
if (file.exists(paths$ground_truth_edges)) {
gt_edges <- read.csv(paths$ground_truth_edges, check.names = FALSE)
}
# Quick sanity checks
message("Samples per group:")
## Samples per group:
print(table(meta_enhanced$group))
##
## Vegan Vegetarian Carnivore
## 20 20 20
message("SCFA score by group (mean ± sd):")
## SCFA score by group (mean ± sd):
print(meta_enhanced %>% group_by(group) %>%
summarise(mean = mean(scfa_score), sd = sd(scfa_score), .groups="drop"))
## # A tibble: 3 × 3
## group mean sd
## <fct> <dbl> <dbl>
## 1 Vegan 0.900 0.984
## 2 Vegetarian 0.0198 0.372
## 3 Carnivore -0.920 0.503
# Save prepared objects
saveRDS(list(
metadata = meta_enhanced,
feature_annotations= fa,
X_micro_clr = X_micro_clr,
X_metab_bc = X_metab_bc,
X_transcript = X_transcript,
micro_long = micro_long,
metab_long = metab_long,
tx_long = tx_long,
ground_truth_edges = gt_edges
), file = file.path(base_dir, "prepared_objects.rds"))
# write a slim csv with key sample-level scores for quick plotting
meta_enhanced %>%
select(sample_id, group, batch, sex, age, bmi = bmi,
scfa_score, pro_infl_score, anti_infl_score) %>%
write.csv(file.path(base_dir, "sample_level_scores.csv"), row.names = FALSE)
#First look and sanity check of compiled data
# Load prepared objects
objs <- readRDS("prepared_objects.rds")
meta <- objs$metadata
# Boxplots: SCFA score by group
SCFA <- ggplot(meta, aes(x = group, y = scfa_score, fill = group)) +
geom_boxplot(outlier.shape = NA, alpha = 0.7) +
geom_jitter(width = 0.2, size = 1, alpha = 0.8) +
labs(title = "SCFA score across diet groups",
y = "Composite SCFA score (z)", x = "Diet group") +
theme_minimal(base_size = 14)+
theme(legend.position="none")+
scale_fill_earth15()
# Boxplots: gene set scores
meta_long <- meta %>%
select(sample_id, group, pro_infl_score, anti_infl_score) %>%
pivot_longer(cols = c(pro_infl_score, anti_infl_score),
names_to = "score_type", values_to = "value")
GeneS <- ggplot(meta_long, aes(x = group, y = value, fill = group)) +
geom_boxplot(outlier.shape = NA, alpha = 0.7) +
geom_jitter(width = 0.2, size = 1, alpha = 0.8) +
facet_wrap(~score_type, scales = "free_y") +
labs(title = "Gene set scores across diet groups",
y = "Score (mean z)", x = "Diet group") +
theme_minimal(base_size = 14)+
scale_fill_earth15()
# Correlation: SCFA vs Pro-inflammatory
SCFAPRO <- ggplot(meta, aes(x = scfa_score, y = pro_infl_score, color = group)) +
geom_point(size = 3, alpha = 0.8) +
geom_smooth(method = "lm", se = TRUE) +
labs(title = "SCFA vs Pro-inflammatory score",
x = "SCFA score (z)", y = "Pro-inflammatory gene score") +
theme_minimal(base_size = 14)+
theme(legend.position="none")+
scale_color_earth15()
# Correlation: SCFA vs Anti-inflammatory
SCFAANTI <- ggplot(meta, aes(x = scfa_score, y = anti_infl_score, color = group)) +
geom_point(size = 3, alpha = 0.8) +
geom_smooth(method = "lm", se = TRUE) +
labs(title = "SCFA vs Anti-inflammatory score",
x = "SCFA score (z)", y = "Anti-inflammatory gene score") +
theme_minimal(base_size = 14)+
scale_color_earth15()
library(patchwork)
## Warning: package 'patchwork' was built under R version 4.5.1
##
## Attaching package: 'patchwork'
## The following object is masked from 'package:genefilter':
##
## area
Sanity <- (SCFA + GeneS) / (SCFAPRO + SCFAANTI)
print(Sanity)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
ggsave("Sanity.png", Sanity, width = 12, height = 12, dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
# Palette & scales
pal_earth15 <- c(
"#2F5D50","#6B8F71","#8C9C68","#4E7A48","#C9A227",
"#B98E5A","#D8C7A0","#C46D3B","#B56969","#8C4A2F",
"#7D5A4F","#5B7065","#3E5C76","#A3B18A","#264653"
)
scale_fill_earth15 <- function(...) ggplot2::scale_fill_manual(values = pal_earth15, ...)
scale_color_earth15 <- function(...) ggplot2::scale_color_manual(values = pal_earth15, ...)
# Load prepared objects
objs <- readRDS("prepared_objects.rds")
meta <- objs$metadata
X_micro_clr <- objs$X_micro_clr
X_metab_bc <- objs$X_metab_bc
X_transcript <- objs$X_transcript
dir.create("embeddings", showWarnings = FALSE)
# A small plotting helper (single purpose)
plot_xy <- function(df, x, y, title){
ggplot(df, aes(.data[[x]], .data[[y]], color = group, fill = group)) +
stat_ellipse(aes(fill = group),
geom = "polygon", type = "norm", level = 0.68,
alpha = 0.30, color = "black", linewidth = 0.6) +
geom_point(size = 2.6, alpha = 0.95) +
scale_color_earth15(name = "Group") +
scale_fill_earth15(name = "Group") +
labs(title = title, x = x, y = y) +
theme_minimal(base_size = 13)
}
# MICROBIOME (CLR)
# PCA
pc_micro <- prcomp(X_micro_clr, center = TRUE, scale. = FALSE)
var_micro <- (pc_micro$sdev^2) / sum(pc_micro$sdev^2)
df_pc_micro <- as_tibble(pc_micro$x[, 1:3]) %>%
mutate(sample_id = meta$sample_id, group = meta$group)
p1 <- plot_xy(df_pc_micro, "PC1", "PC2",
sprintf("Microbiome (CLR) – PCA | PC1 %.1f%%, PC2 %.1f%%",
100*var_micro[1], 100*var_micro[2]))
ggsave("embeddings/pca_microbiome.png", p1, width = 6.5, height = 5, dpi = 300)
write.csv(cbind(sample_id = meta$sample_id, df_pc_micro), "embeddings/pca_microbiome_scores.csv", row.names = FALSE)
# PCoA on Aitchison (Euclidean on CLR)
d_ait <- dist(X_micro_clr, method = "euclidean")
fit_pcoa <- cmdscale(d_ait, k = 3, eig = TRUE)
df_pcoa_micro <- tibble(PCo1 = fit_pcoa$points[,1],
PCo2 = fit_pcoa$points[,2],
sample_id = meta$sample_id,
group = meta$group)
# percent variance (positive eigenvalues only)
evals <- fit_pcoa$eig; pve <- evals[evals>0]/sum(evals[evals>0])
p2 <- plot_xy(df_pcoa_micro, "PCo1", "PCo2",
sprintf("Microbiome – PCoA (Aitchison) | PCo1 %.1f%%, PCo2 %.1f%%",
100*ifelse(length(pve)>=1,pve[1],NA),
100*ifelse(length(pve)>=2,pve[2],NA)))
ggsave("embeddings/pcoa_microbiome_aitchison.png", p2, width = 6.5, height = 5, dpi = 300)
write.csv(df_pcoa_micro, "embeddings/pcoa_microbiome_coords.csv", row.names = FALSE)
# UMAP (on CLR)
set.seed(123)
U_micro <- umap(X_micro_clr, n_neighbors = 15, min_dist = 0.15, metric = "euclidean")
df_umap_micro <- tibble(UMAP1 = U_micro[,1], UMAP2 = U_micro[,2],
sample_id = meta$sample_id, group = meta$group)
p3 <- plot_xy(df_umap_micro, "UMAP1", "UMAP2", "Microbiome (CLR) – UMAP")
ggsave("embeddings/umap_microbiome.png", p3, width = 6.5, height = 5, dpi = 300)
write.csv(df_umap_micro, "embeddings/umap_microbiome_coords.csv", row.names = FALSE)
# METABOLOMICS
# PCA
pc_met <- prcomp(X_metab_bc, center = TRUE, scale. = FALSE)
var_met <- (pc_met$sdev^2)/sum(pc_met$sdev^2)
df_pc_met <- as_tibble(pc_met$x[,1:3]) %>%
mutate(sample_id = meta$sample_id, group = meta$group)
p4 <- plot_xy(df_pc_met, "PC1", "PC2",
sprintf("Metabolomics – PCA | PC1 %.1f%%, PC2 %.1f%%",
100*var_met[1], 100*var_met[2]))
ggsave("embeddings/pca_metabolomics.png", p4, width = 6.5, height = 5, dpi = 300)
write.csv(cbind(sample_id = meta$sample_id, df_pc_met), "embeddings/pca_metabolomics_scores.csv", row.names = FALSE)
# UMAP
set.seed(123)
U_met <- umap(X_metab_bc, n_neighbors = 15, min_dist = 0.15, metric = "euclidean")
df_umap_met <- tibble(UMAP1 = U_met[,1], UMAP2 = U_met[,2],
sample_id = meta$sample_id, group = meta$group)
df_umap_met <- tibble(UMAP1 = U_met[,1], UMAP2 = U_met[,2],
sample_id = meta$sample_id, group = meta$group)
p5 <- plot_xy(df_umap_met, "UMAP1", "UMAP2", "Metabolomics – UMAP")
ggsave("embeddings/umap_metabolomics.png", p5, width = 6.5, height = 5, dpi = 300)
write.csv(df_umap_met, "embeddings/umap_metabolomics_coords.csv", row.names = FALSE)
# TRANSCRIPTOMICS
# PCA
pc_tx <- prcomp(X_transcript, center = TRUE, scale. = FALSE)
var_tx <- (pc_tx$sdev^2)/sum(pc_tx$sdev^2)
df_pc_tx <- as_tibble(pc_tx$x[,1:3]) %>%
mutate(sample_id = meta$sample_id, group = meta$group)
p6 <- plot_xy(df_pc_tx, "PC1", "PC2",
sprintf("Transcriptomics – PCA | PC1 %.1f%%, PC2 %.1f%%",
100*var_tx[1], 100*var_tx[2]))
ggsave("embeddings/pca_transcriptomics.png", p6, width = 6.5, height = 5, dpi = 300)
write.csv(cbind(sample_id = meta$sample_id, df_pc_tx), "embeddings/pca_transcriptomics_scores.csv", row.names = FALSE)
# UMAP
set.seed(123)
U_tx <- umap(X_transcript, n_neighbors = 15, min_dist = 0.15, metric = "euclidean")
df_umap_tx <- tibble(UMAP1 = U_tx[,1], UMAP2 = U_tx[,2],
sample_id = meta$sample_id, group = meta$group)
p7 <- plot_xy(df_umap_tx, "UMAP1", "UMAP2", "Transcriptomics – UMAP")
ggsave("embeddings/umap_transcriptomics.png", p7, width = 6.5, height = 5, dpi = 300)
write.csv(df_umap_tx, "embeddings/umap_transcriptomics_coords.csv", row.names = FALSE)
# PCA + PCoA + UMAP for microbiome in one row
A <- (p1 | p2 | p3) + plot_annotation(title = "Microbiome – PCA, PCoA (Aitchison), UMAP")
# PCA + UMAP for metabolomics and transcriptomics in one row
B <- ((p4 | p5) / (p6 | p7)) + plot_annotation(title = "Metabolomics & Transcriptomics – PCA and UMAP")
ggsave("PCAUMPAPCOA.png", A, width = 20, height = 6, dpi = 300)
ggsave("PCAUMPA.png", B, width = 12, height = 6, dpi = 300)
# Load prepared objects
objs <- readRDS("prepared_objects.rds")
meta <- objs$metadata
X_micro_clr <- objs$X_micro_clr
X_metab_bc <- objs$X_metab_bc
X_transcript <- objs$X_transcript
# Gene set scores (already in metadata)
gene_scores <- meta %>%
select(sample_id, pro_infl_score, anti_infl_score)
# Microbiome vs metabolites
cor_micro_met <- cor(X_micro_clr, X_metab_bc, method = "spearman")
A <- pheatmap(cor_micro_met,
color = colorRampPalette(c("#3885B6","white","#FA7F2E"))(50),
main = "Microbiome (CLR taxa) vs Metabolites",
fontsize_row = 6, fontsize_col = 8,
clustering_method = "complete")
# Metabolites vs gene set scores
df_met <- as_tibble(X_metab_bc) %>% mutate(sample_id = meta$sample_id)
df_long <- df_met %>%
pivot_longer(-sample_id, names_to = "metabolite", values_to = "value") %>%
left_join(gene_scores, by = "sample_id")
cor_met_genes <- df_long %>%
group_by(metabolite) %>%
summarise(
cor_pro = cor(value, pro_infl_score, method = "spearman"),
cor_anti = cor(value, anti_infl_score, method = "spearman"),
.groups = "drop"
)
print(cor_met_genes)
## # A tibble: 10 × 3
## metabolite cor_pro cor_anti
## <chr> <dbl> <dbl>
## 1 Acetate -0.945 0.928
## 2 Butyrate -0.949 0.939
## 3 Formate 0.815 -0.816
## 4 Isobutyrate 0.783 -0.779
## 5 Isovalerate 0.709 -0.700
## 6 Lactate -0.486 0.469
## 7 PhenylaceticAcid 0.602 -0.606
## 8 Propionate -0.948 0.942
## 9 Succinate 0.617 -0.582
## 10 Valerate -0.698 0.700
#Heatmap metabolites vs gene scores
mat_met_gene <- cor_met_genes %>%
column_to_rownames("metabolite") %>%
as.matrix()
B <- pheatmap(mat_met_gene,
color = colorRampPalette(c("#3885B6","white","#FA7F2E"))(50),
main = "Metabolites vs Gene Set Scores",
display_numbers = TRUE)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
g <- arrangeGrob(A[[4]], B[[4]], ncol = 2)
ggsave("cor_heatmaps.png", g, width = 12, height = 6, dpi = 300)