library(phyloseq)
library(ggplot2)
library(microbiome)
##
## microbiome R package (microbiome.github.com)
##
##
##
## Copyright (C) 2011-2022 Leo Lahti,
## Sudarshan Shetty et al. <microbiome.github.io>
##
## Attaching package: 'microbiome'
## The following object is masked from 'package:ggplot2':
##
## alpha
## The following object is masked from 'package:base':
##
## transform
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(vegan)
## Loading required package: permute
##
## Attaching package: 'vegan'
## The following object is masked from 'package:microbiome':
##
## diversity
library(fantaxtic)
##
## Attaching package: 'fantaxtic'
## The following object is masked from 'package:microbiome':
##
## top_taxa
library(forcats)
library(picante)
## Loading required package: ape
##
## Attaching package: 'ape'
## The following object is masked from 'package:dplyr':
##
## where
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
library(phytools)
## Loading required package: maps
##
## Attaching package: 'phytools'
## The following object is masked from 'package:vegan':
##
## scores
library(rstatix)
##
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
##
## filter
seqtab.nochim <- readRDS("../00_InputFiles/04_RDSfilesFromWynton/Figure01_CME_Stab_seqtab.rds")
taxa <- readRDS("../00_InputFiles/04_RDSfilesFromWynton/Figure01_CME_Stab_taxonomy.rds")
fitGTR <- readRDS("../00_InputFiles/04_RDSfilesFromWynton/Figure01_CME_Stab_fitGTR.rds")
meta_table <- read.csv("../00_InputFiles/05_MetdataFiles/metadata.csv", row.names=1, check.names=FALSE)
# 2. Build and Filter Phyloseq Object
samdf <- sample_data(meta_table)
# Extract the unrooted tree from the fitGTR object
unrooted_tree <- fitGTR$tree
ps1 <- phyloseq(otu_table(seqtab.nochim, taxa_are_rows=FALSE),
tax_table(taxa),
phy_tree(unrooted_tree),
samdf)
#ps1 <- subset_samples(phylo, SampleType != "Grazed_Lawn")
pseq0 <- subset_taxa(ps1, Family != "Mitochondria")
pseq1 <- prune_taxa(taxa_sums(pseq0) > 9, pseq0)
pseq <- prune_samples(sample_sums(pseq1) >= 9, pseq1)
# Save the final pseq object
saveRDS(pseq, "Figure01_CEP_Age_pseq.rds")
pseq
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 6956 taxa and 18 samples ]
## sample_data() Sample Data: [ 18 samples by 7 sample variables ]
## tax_table() Taxonomy Table: [ 6956 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 6956 tips and 6954 internal nodes ]
#Alpha Diversity
rm(list = ls())
pseq <- readRDS("Figure01_CEP_Age_pseq.rds")
#sample_data(pseq)
richness_data <- plot_richness(pseq, x="Replicate", measures=c("Observed", "Shannon"))$data
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the phyloseq package.
## Please report the issue at <https://github.com/joey711/phyloseq/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# 2. Summarize grouped by Day, Category, AND the variable (metric)
averages_data <- richness_data %>%
group_by(Day, Category, variable) %>%
dplyr::summarize(
avg_value = mean(value, na.rm = TRUE),
sd_value = sd(value, na.rm = TRUE),
.groups = "drop"
)
# 3. Plot everything in one go using facets
p <- ggplot(averages_data, aes(x = Day, y = avg_value, group = Category, color = Category, fill = Category)) +
geom_line() +
geom_errorbar(aes(ymin = avg_value - sd_value, ymax = avg_value + sd_value), width = 0.2) +
geom_point(size = 2, shape = 21) +
facet_wrap(variable ~ Category, scales = "free") +
labs(title = "Bacterial Alpha Diversity",
x = "Day",
y = "Mean Index Value") +
theme_classic(base_size = 12) +
scale_color_manual(values = c("#800000", "darkgreen", "darkblue")) +
scale_fill_manual(values = c("#800000", "darkgreen", "darkblue"))
print(p)
ggsave("Figure01_Alpha_Diversity.pdf", height = 5, width = 8)
##Statistics
day_levels <- c("D00", "Wk01", "Wk02")
richness_data$Day <- factor(richness_data$Day, levels = day_levels)
# 3. Perform T-Tests: Compare each Day to the one immediately following it
# We group by Category and Metric (variable) to isolate the trends
sequential_stats <- richness_data %>%
group_by(Category, variable) %>%
t_test(value ~ Day, comparisons = list(
c("D00", "Wk01"),
c("D00", "Wk02")
)) %>%
# Filter for clarity and adjust p-values for multiple comparisons if desired
select(Category, variable, group1, group2, n1, n2, p, p.adj) %>%
arrange(Category, variable, group1)
# 4. Print the statistical results table
print("--- Sequential T-Test Results (Bacterial Diversity Change Over Time) ---")
## [1] "--- Sequential T-Test Results (Bacterial Diversity Change Over Time) ---"
print(as.data.frame(sequential_stats))
## Category variable group1 group2 n1 n2 p p.adj
## 1 Fresh Observed D00 Wk01 3 3 0.524 0.524
## 2 Fresh Observed D00 Wk02 3 3 0.261 0.522
## 3 Fresh Shannon D00 Wk01 3 3 0.994 1.000
## 4 Fresh Shannon D00 Wk02 3 3 0.965 1.000
## 5 post 24 hrs Observed D00 Wk01 3 3 0.987 1.000
## 6 post 24 hrs Observed D00 Wk02 3 3 0.625 1.000
## 7 post 24 hrs Shannon D00 Wk01 3 3 0.778 1.000
## 8 post 24 hrs Shannon D00 Wk02 3 3 0.731 1.000
#Beta_Diversity ##PCoA Bray
rm(list = ls())
pseq <- readRDS("Figure01_CEP_Age_pseq.rds")
pseq.rel.2 <- transform(pseq, "compositional")
ord_mds_bray1 <- ordinate(pseq.rel.2, "PCoA", "bray")
p_bray<- plot_ordination(pseq.rel.2,
ord_mds_bray1,
shape = "Category",
color = "Day") +
geom_point(size = 4) +
# Centering title and using bold face
ggtitle("PCoA (Rel-transformed)") +
theme_classic(base_size = 12) +
theme(plot.title = element_text(hjust = 0.5, face = "bold")) +
scale_color_manual(values = c("#73C474", "#157E3B", "#F89452", "red", "#DE402E", "#A50026","darkblue", "black"))
print(p_bray)
#ggsave("Figure01_PCoA_Bray.pdf", height = 3.5, width = 5)
dist_matrix <- phyloseq::distance(pseq.rel.2, method = "bray")
metadata <- data.frame(sample_data(pseq.rel.2))
adonis2(dist_matrix ~ Category * Day, by = "term", data = metadata)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_matrix ~ Category * Day, data = metadata, by = "term")
## Df SumOfSqs R2 F Pr(>F)
## Category 1 0.34535 0.29460 6.8782 0.001 ***
## Day 2 0.11502 0.09811 1.1454 0.266
## Category:Day 2 0.10940 0.09332 1.0894 0.277
## Residual 12 0.60252 0.51397
## Total 17 1.17228 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##PCA CLR
pseq.clr <- transform(pseq, "clr")
ord_pca_clr <- ordinate(pseq.clr, "RDA", "euclidean")
p_clr <- plot_ordination(pseq.clr,
ord_pca_clr,
shape = "Category",
color = "Day") +
geom_point(size = 4) +
# Centering title and using bold face
ggtitle("PCA (CLR-transformed)") +
theme_classic(base_size = 12) +
theme(plot.title = element_text(hjust = 0.5, face = "bold")) +
scale_color_manual(values = c("#73C474", "#157E3B", "#F89452", "red", "#DE402E", "#A50026","darkblue", "black"))
print(p_clr)
ggsave("Figure01_PCA_CLR.pdf", height = 3.5, width = 5)
##Distance within Stock CME ###Bray PERMANOVA
rm(list = ls())
ps <- readRDS("Figure01_CEP_Age_pseq.rds")
ps1 <- subset_samples(ps, Category == c("Fresh"))
pseq <- prune_taxa(taxa_sums(ps1)>1, ps1)
pseq.rel.2 <- transform(pseq, "compositional")
dist_matrix <- phyloseq::distance(pseq.rel.2, method = "bray")
metadata <- data.frame(sample_data(pseq.rel.2))
adonis2(dist_matrix ~ Day, data = metadata)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_matrix ~ Day, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## Model 2 0.09220 0.24547 0.976 0.703
## Residual 6 0.28342 0.75453
## Total 8 0.37563 1.00000
###CLR Euclidean PERMANOVA
set.seed(123)
pseq.clr <- transform(pseq, "clr")
dist_matrix_clr <- phyloseq::distance(pseq.clr, method = "euclidean")
metadata <- data.frame(sample_data(pseq.clr))
adonis_results <- adonis2(dist_matrix_clr ~ Day,
data = metadata,
by = "term", permutations = 999)
print(adonis_results)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_matrix_clr ~ Day, data = metadata, permutations = 999, by = "term")
## Df SumOfSqs R2 F Pr(>F)
## Day 2 19739 0.24954 0.9975 0.522
## Residual 6 59362 0.75046
## Total 8 79100 1.00000
##Distance within Post 24hrs ###Bray PERMANOVA
rm(list = ls())
ps <- readRDS("Figure01_CEP_Age_pseq.rds")
ps1 <- subset_samples(ps, Category != c("Fresh"))
pseq <- prune_taxa(taxa_sums(ps1)>1, ps1)
pseq.rel.2 <- transform(pseq, "compositional")
dist_matrix <- phyloseq::distance(pseq.rel.2, method = "bray")
metadata <- data.frame(sample_data(pseq.rel.2))
set.seed(123)
adonis2(dist_matrix ~ Day, data = metadata)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_matrix ~ Day, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## Model 2 0.13218 0.29296 1.243 0.002 **
## Residual 6 0.31902 0.70704
## Total 8 0.45120 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
###CLR Euclidean PERMANOVA
set.seed(123865985)
pseq.clr <- transform(pseq, "clr")
dist_matrix_clr <- phyloseq::distance(pseq.clr, method = "euclidean")
metadata <- data.frame(sample_data(pseq.clr))
adonis_results <- adonis2(dist_matrix_clr ~ Day,
data = metadata,
by = "term", permutations = 999)
print(adonis_results)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_matrix_clr ~ Day, data = metadata, permutations = 999, by = "term")
## Df SumOfSqs R2 F Pr(>F)
## Day 2 20153 0.25695 1.0374 0.109
## Residual 6 58277 0.74305
## Total 8 78430 1.00000
#Alpha faith Index
rm(list=ls())
pseq <- readRDS("Figure01_CEP_Age_pseq.rds")
rooted_tree <- midpoint.root(phy_tree(pseq))
# 4. Calculate Faith's PD
# t(otu_table) ensures samples are rows as expected by picante
otu_mat <- as(otu_table(pseq), "matrix")
pd_results <- pd(otu_mat, rooted_tree, include.root = TRUE)
# 5. Prepare Data for Statistics
# Combine PD results with sample metadata
richness_data <- data.frame(sample_data(pseq))
richness_data$Faith_PD <- pd_results$PD
# Ensure Day is an ordered factor for sequential comparison
# Adjust the levels vector if you have more timepoints
day_levels <- c("D00", "Wk01", "Wk02")
richness_data$Day <- factor(richness_data$Day, levels = day_levels)
# 6. Perform Sequential T-Tests for Faith's PD
# Compares D00 vs Wk01, and Wk01 vs Wk02 within each Category
faith_stats <- richness_data %>%
group_by(Category) %>%
t_test(Faith_PD ~ Day, comparisons = list(
c("D00", "Wk01"),
c("D00", "Wk02")
)) %>%
select(Category, group1, group2, n1, n2, p, p.adj) %>%
arrange(Category, group1)
# 7. Print Results
print("--- Sequential T-Test Results: Faith's Phylogenetic Diversity ---")
## [1] "--- Sequential T-Test Results: Faith's Phylogenetic Diversity ---"
print(as.data.frame(faith_stats))
## Category group1 group2 n1 n2 p p.adj
## 1 Fresh D00 Wk01 3 3 0.651 1
## 2 Fresh D00 Wk02 3 3 0.640 1
## 3 post 24 hrs D00 Wk01 3 3 0.900 1
## 4 post 24 hrs D00 Wk02 3 3 0.957 1
# 8. Plot results for visualization
averages_pd <- richness_data %>%
group_by(Day, Category) %>%
dplyr::summarize(
avg_pd = mean(Faith_PD, na.rm = TRUE),
sd_pd = sd(Faith_PD, na.rm = TRUE),
.groups = "drop"
)
p_pd <- ggplot(averages_pd, aes(x = Day, y = avg_pd, group = Category, color = Category, fill = Category)) +
geom_line() +
geom_errorbar(aes(ymin = avg_pd - sd_pd, ymax = avg_pd + sd_pd), width = 0.2) +
geom_point(size = 3, shape = 21) +
labs(title = "Faith's Phylogenetic Diversity (PD) Over Time",
x = "Day",
y = "Mean Faith Index") +
theme_classic(base_size = 12) +
scale_color_manual(values = c("#800000", "darkgreen", "darkblue")) +
scale_fill_manual(values = c("#800000", "darkgreen", "darkblue")) + facet_wrap(~Category, scales ="free_y")
print(p_pd)
ggsave("Figure01_faith.pdf", height = 2.5, width = 8)
#Beta Unifrac PCoA
rm(list = ls())
ps1 <- readRDS("Figure01_CEP_Age_pseq.rds")
set.seed(123)
pseq <- prune_taxa(taxa_sums(ps1) > 1, ps1)
phy_tree(pseq) <- midpoint.root(phy_tree(pseq))
pseq_rel <- transform_sample_counts(pseq, function(x) x / sum(x))
dist_unweighted <- distance(pseq_rel, method = "unifrac", weighted = F)
ord_unweighted <- ordinate(pseq_rel, method = "PCoA", distance = dist_unweighted)
p_unweighted <- plot_ordination(pseq_rel, ord_unweighted, shape = "Category", color = "Day") +
geom_point(size = 4) +
# Centering title and using bold face
ggtitle("PCoA (wunifrac)") +
theme_classic(base_size = 12) +
theme(plot.title = element_text(hjust = 0.5, face = "bold")) +
scale_color_manual(values = c("#73C474", "#157E3B", "#F89452", "red", "#DE402E", "#A50026","darkblue", "black"))
p_unweighted
ggsave("Figure01_PCoA_Weighted_UniFrac.pdf", height = 3.5, width = 5)
##Distance within stock CME
rm(list = ls())
ps <- readRDS("Figure01_CEP_Age_pseq.rds")
ps1 <- subset_samples(ps, Category == "Fresh")
pseq <- prune_taxa(taxa_sums(ps1) > 1, ps1)
# 2. Root the Tree
# UniFrac calculation absolutely requires a rooted tree
phy_tree(pseq) <- midpoint.root(phy_tree(pseq))
# 3. Transform to Proportional Abundance (Total Sum Scaling)
# This converts counts to percentages (e.g., 0.05 instead of 500 reads)
pseq_rel <- transform_sample_counts(pseq, function(x) x / sum(x))
# 4. Calculate Weighted UniFrac Distance
# We use the transformed object; weighted=TRUE accounts for the percentages
dist_wunifrac <- phyloseq::distance(pseq_rel, method = "unifrac", weighted = F)
# 5. Extract metadata and run PERMANOVA
metadata <- data.frame(sample_data(pseq_rel))
metadata$Day <- as.factor(metadata$Day)
set.seed(123) # For reproducible results
adonis_results <- adonis2(dist_wunifrac ~ Day,
data = metadata,
permutations = 999,
by = "margin")
# 6. Print results
print("--- PERMANOVA Results: Proportional Unweighted UniFrac ---")
## [1] "--- PERMANOVA Results: Proportional Unweighted UniFrac ---"
print(adonis_results)
## Permutation test for adonis under reduced model
## Marginal effects of terms
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_wunifrac ~ Day, data = metadata, permutations = 999, by = "margin")
## Df SumOfSqs R2 F Pr(>F)
## Day 2 0.022274 0.23123 0.9023 0.842
## Residual 6 0.074054 0.76877
## Total 8 0.096327 1.00000
##Distance within post 24hrs
rm(list = ls())
ps <- readRDS("Figure01_CEP_Age_pseq.rds")
ps1 <- subset_samples(ps, Category != "Fresh")
pseq <- prune_taxa(taxa_sums(ps1) > 1, ps1)
# 2. Root the Tree
# UniFrac calculation absolutely requires a rooted tree
phy_tree(pseq) <- midpoint.root(phy_tree(pseq))
# 3. Transform to Proportional Abundance (Total Sum Scaling)
# This converts counts to percentages (e.g., 0.05 instead of 500 reads)
pseq_rel <- transform_sample_counts(pseq, function(x) x / sum(x))
# 4. Calculate Weighted UniFrac Distance
# We use the transformed object; weighted=TRUE accounts for the percentages
dist_wunifrac <- phyloseq::distance(pseq_rel, method = "unifrac", weighted = F)
# 5. Extract metadata and run PERMANOVA
metadata <- data.frame(sample_data(pseq_rel))
metadata$Day <- as.factor(metadata$Day)
set.seed(123) # For reproducible results
adonis_results <- adonis2(dist_wunifrac ~ Day,
data = metadata,
permutations = 999,
by = "margin")
# 6. Print results
print("--- PERMANOVA Results: Proportional Unweighted UniFrac ---")
## [1] "--- PERMANOVA Results: Proportional Unweighted UniFrac ---"
print(adonis_results)
## Permutation test for adonis under reduced model
## Marginal effects of terms
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_wunifrac ~ Day, data = metadata, permutations = 999, by = "margin")
## Df SumOfSqs R2 F Pr(>F)
## Day 2 0.028049 0.26296 1.0703 0.317
## Residual 6 0.078619 0.73704
## Total 8 0.106668 1.00000
#Stack plots family level
rm(list = ls())
pseq <- readRDS("Figure01_CEP_Age_pseq.rds")
pseq
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 6956 taxa and 18 samples ]
## sample_data() Sample Data: [ 18 samples by 7 sample variables ]
## tax_table() Taxonomy Table: [ 6956 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 6956 tips and 6954 internal nodes ]
y1 <- tax_glom(pseq, taxrank = 'Family') # agglomerate taxa
y3 <- transform_sample_counts(y1, function(x) x/sum(x)) #get abundance in %
y4 <- psmelt(y3)
y4$Family <- as.character(y4$Family) #convert to character
y4$Family[y4$Abundance < 0.02] <- "Family < 2% abund." #rename genera with < 1% abundance
my_colors <- c("Family < 2% abund." = "lightgrey",
"Acetobacteraceae" = "#781156",
"Acholeplasmataceae" = "lightyellow",
"Alcaligenaceae" = "#FF00FF",
"Alcanivoracaceae1" = "#E43FAD",
"Alteromonadaceae" = "#EA6CC0",
"Azospirillaceae" = "#666600",
"Bacillaceae" = "#4B0082",
"Brevibacillaceae" = "#fb9a99",
"Brevibacteriaceae" = "#114578",
"Caulobacteraceae" = "#185EA5",
"Cellvibrionaceae" = "#1E78D2",
"Chitinophagaceae" = "#FFFF00",
"Clostridiaceae" = "#6CABEA",
"Comamonadaceae" = "#FFD700",
"Corynebacteriaceae" = "#117878",
"Crocinitomicaceae" = "#18A5A5",
"Cyclobacteriaceae" = "#3FE4E4",
"Cytophagaceae" = "#D1A33D",
"Dermabacteraceae" = "#666600",
"Devosiaceae" = "orange",
"Dietziaceae" = "lightcyan",
"Enterobacteriaceae" = "black",
"Flavobacteriaceae" = "#D1A33D",
"Flavobacteriaceae" = "#3FE491",
"Gemmatimonadaceae" = "darkorange1",
"JG30−KF−CM45" = "#8A2BE2",
"Kaistiaceae" = "#c51b7d",
"Lachnospiraceae" = "lightcoral",
"Lysobacteraceae" = "#98F0C4",
"Microbacteriaceae" = "#1E90FF",
"Micrococcaceae" = "orange",
"Moraxellaceae" = "#D2D21E",
"Myxococcaceae" = "#E4E43F",
"Nocardiaceae" = "#A5A518",
"Oxalobacteraceae" = "#F0F098",
"Paenibacillaceae" = "red",
"Pirellulaceae" = "#784511",
"Planococcaceae" = "#A55E18",
"Pseudomonadaceae" = "lightblue",
"Rhizobiaceae" = "brown",
"Rhizobiaceae" = "#E4913F",
"Rhodocyclaceae" = "#800080",
"Rhodocyclaceae" = "#EAAB6C",
"Rubinisphaeraceae" = "#1E90FF",
"Rubinisphaeraceae" = "#F0C498",
"Saprospiraceae" = "darkseagreen",
"Solirubrobacteraceae" = "#A5182F",
"Sphingobacteriaceae" = "#D21E2C",
"Spirosomaceae" = "#599861",
"Sporolactobacillaceae" = "purple",
"Staphylococcaceae" = "#D2781E",
"Thermoactinomycetaceae" = "#771155",
"Thermomonosporaceae" = "#AA4488",
"Verrucomicrobiaceae" = "#228B22",
"Xanthomonadaceae" = "darkolivegreen1",
"Arcobacteraceae" = "lightpink")
p <- ggplot(y4, aes(x = Replicate, y = Abundance, fill = fct_reorder(Family, Abundance))) +
geom_bar(stat = "identity",position="fill") + # to equal 1
theme_classic() + scale_fill_manual(values= my_colors) +
theme(text = element_text(size=12),axis.text.y = element_text(hjust=1)) +
scale_x_discrete(drop=TRUE) +theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))+
guides(fill = guide_legend( keywidth = 0.4, keyheight = .7)) +
ylab("Distribution of ASVs at Family level") + theme(axis.text = element_text(face="bold"))
p + theme(panel.grid.major = element_blank()) + theme(strip.text.x = element_text(size =12), legend.text = element_text(size=13)) + facet_grid(cols = vars(Category, Day)) +guides(fill=guide_legend(ncol=2))
ggsave("Figure01StackPlot.pdf", height = 5, width = 20)