#Figure 01
rm(list=ls())
worm_data <- read_excel("Supplementary_Data.xlsx",
sheet = "Supplementary_Table_2", skip = 1)
# Keep only certain days
worm_data <- worm_data %>%
filter(Day %in% c("Day0", "Day03", "Day06", "Day09"))
#remove superscripts like a/b that was added for footnotes
worm_data <- worm_data %>%
mutate(
Total = gsub("[^0-9.]", "", Total),
Total = as.numeric(Total))
# Fix factor order
worm_data$Day <- factor(worm_data$Day,
levels = c("Day0", "Day03", "Day06", "Day09"))
# Summary
average_worms <- worm_data %>%
group_by(Day, CME) %>%
summarise(
Average_Worms = mean(Total, na.rm = TRUE),
SD_Worms = sd(Total, na.rm = TRUE),
.groups = 'drop')
# Plot
p <- ggplot(average_worms,
aes(x = Day, y = Average_Worms,
group = CME,
color = CME)) +
geom_point(size = 2) +
geom_errorbar(aes(ymin = Average_Worms - SD_Worms,
ymax = Average_Worms + SD_Worms),
width = 0.2, size = 0.6, color = "gray30",
show.legend = FALSE) +
geom_line(size = 1.3) +
theme_classic(base_size = 12) +
labs(x = "Day",
y = "Average Worms Count",
title = "Average Worm Counts by Day") +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
axis.text.y = element_text(size = 7),
axis.title.x = element_text(size = 7),
axis.title.y = element_text(size = 7)
) +
facet_wrap(~ CME, scales = "free_y") +
scale_color_manual(values = c("ACME" = "black",
"BCME" = "black",
"PCME" = "black"))
ggsave("WormNumber.pdf", p, height = 3, width = 9, dpi = 300)
print(p)
rm(list=ls())
worm_data <- read_excel("Supplementary_Data.xlsx",
sheet = "Supplementary_Table_2", skip = 1, n_max = 72)
worm_long <- worm_data %>%
pivot_longer(
cols = c(Gravids_and_PostGravids, L1_L3, Dauers),
names_to = "Stage_of_worm",
values_to = "Count"
) %>%
mutate(Proportion = Count /`worm count in sample`)
summary_df <- worm_long %>%
group_by(Day, CME, Stage_of_worm) %>%
summarise(
Mean = mean(Proportion),
SD = sd(Proportion),
.groups = "drop"
) %>%
mutate(Experimental_Day = Day)
p <- ggplot(summary_df,
aes(x = Experimental_Day,
y = Mean,
color = Stage_of_worm,
group = Stage_of_worm)) +
geom_point(size = 2) +
geom_errorbar(aes(ymin = Mean - SD, ymax = Mean + SD),
width = 0.2, size = 0.6) +
geom_line(size = 1) +
facet_wrap(~ CME, scales = "free_y") +
labs(title = "Proportion of Worm Types Over Days",
x = "Day",
y = "Proportion ± SD") +
theme_classic2() +
scale_color_manual(values = c(
"Gravids_and_PostGravids" = "black",
"L1_L3" = "blue",
"Dauers" = "#e31a1c"
)) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
axis.text.y = element_text(size = 7),
axis.title.x = element_text(size = 7),
axis.title.y = element_text(size = 7))
print(p)
ggsave("WormProportion2.pdf", p, height = 3, width = 12, dpi = 300)
#Figure 2A
rm(list = ls())
pseq_all <- readRDS("Supplementary File 1.rds")
my_colors <- c("Family < 2% abund." = "grey", "Enterococcaceae" = "#1E90FF", "Pseudomonadaceae" = "darkred","Microbacteriaceae" = "#D1A33D","Enterobacteriaceae" = "#0000CD","Shewanellaceae" = "#6B8E23","Erwiniaceae" = "#32CD32","Clostridiaceae" = "darkgoldenrod1","Rhizobiaceae" = "#CD5C5C","Arcobacteraceae" = "red","Acetobacteraceae" = "#20B2AA","Comamonadaceae" = "#556B2F","Bacteroidaceae" = "#FF6347", "Chitinophagaceae" = "dodgerblue3", "Flavobacteriaceae" = "#FFE4B5", "Xanthomonadaceae" = "#20B2AA","Lactobacillaceae" = "#5F7FC7","Moraxellaceae" = "#00CED1","Dysgonomonadaceae" = "#00FA9A", "Paenibacillaceae" = "#666600","Kaistiaceae" = "#D2691E","Aeromonadaceae" = "#7FFFD4","Lachnospiraceae" = "#2E8B57","Weeksellaceae" = "#4B0082", "Tannerellaceae" = "#FFA07A","Azospirillaceae" = "#1E90FF", "Desulfovibrionaceae" = "#DC143C", "Terrimicrobiaceae" = "darkblue", "Sphingomonadaceae" = "#00FFFF", "Spirosomaceae" = "darkorange1","Sphingobacteriaceae" = "#00FFFF","Peptostreptococcaceae" = "#2E8B57", "Verrucomicrobiaceae" = "#FF00FF", "Xanthobacteriaceae" = "#FFB6C1","Cellvibrionaceae" = "black")
my_colors <- my_colors[!duplicated(names(my_colors), fromLast = TRUE)]
# --- CME list
CMEs <- c("ACME", "PCME", "BCME")
dfs <- list()
for (cme in CMEs) {
ps <- pseq_all %>%
subset_samples(CME == cme &
!Category %in% c("Control", "Control_Slice", "Grazed_Slice") &
Day != "Day-02")
if (any(sample_sums(ps) >= 20)) ps <- prune_samples(sample_sums(ps) >= 20, ps)
if (any(taxa_sums(ps) > 2)) ps <- prune_taxa(taxa_sums(ps) > 2, ps)
ps <- tax_glom(ps, taxrank = "Family")
ps <- transform_sample_counts(ps, function(x) x / sum(x))
df <- psmelt(ps)
df$Family <- as.character(df$Family)
df$Family[df$Abundance < 0.02] <- "Family < 2% abund."
df <- df %>%
mutate(Family = fct_reorder(Family, Abundance, .fun = sum, .desc = TRUE)) %>%
mutate(Family = fct_relevel(Family, "Family < 2% abund.", after = 0))
dfs[[cme]] <- df
}
make_plot <- function(df, y_label) {
ggplot(df, aes(x = Replicate, y = Abundance, fill = Family)) +
geom_bar(stat = "identity", position = "fill") +
facet_grid(rows = vars(Category), cols = vars(Days), scales = "free_x", space = "free_x") +
scale_fill_manual(values = my_colors, na.value = "grey") +
theme_classic() +
scale_x_discrete(drop = TRUE) +
guides(fill = guide_legend(ncol = 2, keywidth = 0.4, keyheight = 0.7)) +
ylab(y_label) +
theme(
text = element_text(size = 8),
axis.text = element_text(face = "bold"),
axis.text.y = element_text(size = 10, hjust = 1),
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_text(size = 10, face = "bold"),
axis.title.y = element_text(size = 8, face = "bold"),
strip.text = element_text(size = 8, face = "bold"),
legend.position = "right",
legend.text = element_text(size = 6),
legend.title = element_text(size = 8, face = "bold"),
plot.margin = unit(c(-1, 0, -1, 0), "lines")
)
}
plots <- imap(dfs, ~ make_plot(.x, .y))
combined <- wrap_plots(plotlist = plots, ncol = 1, guides = "collect") & theme(legend.position = "right")
ggsave("Figure2A.pdf", combined, width = 12, height = 9 * length(plots), units = "in")
print(combined)
#Figure 2B
rm(list = ls())
pseq1 <- readRDS("Supplementary File 1.rds")
pseq2 <- subset_samples(pseq1, Day != "Day-02" & !Category %in% c("Worm", "Control"))
pseq <- prune_taxa(taxa_sums(pseq2) > 200, pseq2)
#Get Shannon diversity data
richness_data <- plot_richness(pseq, measures = c("Shannon"))$data
#Step 1: Compute baseline from Day00 of Control_Lawn
baseline_value <- richness_data %>%
filter(Category == "Control_Lawn", Day == "Day00") %>%
summarise(mean_day00 = mean(value, na.rm = TRUE)) %>%
pull(mean_day00)
#Normalize all values by Control_Lawn Day00
richness_data <- richness_data %>%
mutate(delta_value = value / baseline_value)
#write.csv(richness_data, "allcmerichnes.csv")
#Compute summary statistics
summary_data <- richness_data %>%
group_by(Day, Category) %>%
summarise(
mean_delta = mean(delta_value, na.rm = TRUE),
sd_delta = sd(delta_value, na.rm = TRUE),
.groups = "drop"
)
#Manually add Day00 entry for Grazed_Lawn using Control_Lawn baseline
grazed_day00 <- data.frame(
Day = "Day00",
Category = "Grazed_Lawn",
mean_delta = 1, #normalized to Control_Lawn Day00
sd_delta = 0)
#Add it to summary_data
summary_data <- bind_rows(summary_data, grazed_day00)
#Ensure Day is treated as an ordered factor if needed
summary_data$Day <- factor(summary_data$Day,
levels = sort(unique(summary_data$Day)),
ordered = TRUE)
p <- ggplot(summary_data, aes(x = Day, y = mean_delta, group = Category)) +
geom_point(size = 2) +
geom_line(aes(linetype = Category), size = 1) +
geom_errorbar(aes(ymin = mean_delta - sd_delta, ymax = mean_delta + sd_delta),
width = 0.2, color = "gray30") +
scale_linetype_manual(values = c("solid", "longdash", "solid", "longdash")) +
theme_classic2() +
theme(
strip.text = element_text(size = 14, face = "bold"),
legend.position = "right")
print(p)
ggsave("LawnShannon_diversity_plot.pdf", plot = p, device = "pdf", width = 4, height = 3, dpi = 300)
#Figure 2C revised Unifrac
rm(list=ls())
pseq <- readRDS("PhyloSeqWithTree.rds")
pseq2 <- prune_taxa(taxa_sums(pseq) > 200, pseq)
days <- c("Day03", "Day06", "Day09", "Day12", "Day15", "Day19", "Day22")
cme_types <- c("ACME", "BCME", "PCME")
library(phytools)
## Loading required package: maps
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
##
## Attaching package: 'phytools'
## The following object is masked from 'package:scales':
##
## rescale
## The following object is masked from 'package:vegan':
##
## scores
# Midpoint root the tree
rooted_tree <- midpoint.root(phy_tree(pseq2))
phy_tree(pseq2) <- rooted_tree
all_distances_combined <- data.frame()
set.seed(123)
for (cme in cme_types) {
all_distances <- data.frame()
for (day in days) {
# Filter data for CME and day
pseq1 <- subset_samples(pseq2, CME == cme)
pseq0 <- subset_samples(pseq1, Day %in% day)
pseq.rel.2 <- transform_sample_counts(pseq0, function(x) x / sum(x))
set.seed(1234)
bc_dist <- phyloseq::distance(pseq.rel.2, method = "unifrac", weighted= TRUE)
sample_data <- as.data.frame(sample_data(pseq.rel.2))
sample_data$SampleID <- rownames(sample_data)
# Define the replicates for each category (Control_Lawn, Grazed_Lawn)
control_samples <- sample_data$SampleID[sample_data$Category == "Control_Lawn"]
grazed_samples <- sample_data$SampleID[sample_data$Category == "Grazed_Lawn"]
bc_dist_matrix <- as.matrix(bc_dist)
pairwise_distances <- c()
for (control in control_samples) {
for (grazed in grazed_samples) {
dist_value <- bc_dist_matrix[control, grazed]
pairwise_distances <- c(pairwise_distances, dist_value)}
}
stats <- data.frame(
Day = day,
CME = cme,
Comparison = "Control_Lawn vs Grazed_Lawn",
Pairwise_Distances = pairwise_distances)
all_distances <- bind_rows(all_distances, stats)
}
all_distances_combined <- bind_rows(all_distances_combined, all_distances)
}
average_across_days <- all_distances_combined %>%
group_by(Day) %>%
summarise(
mean = mean(Pairwise_Distances),
sd = sd(Pairwise_Distances),
.groups = "drop")
print(average_across_days)
## # A tibble: 7 × 3
## Day mean sd
## <chr> <dbl> <dbl>
## 1 Day03 0.102 0.0571
## 2 Day06 0.112 0.0494
## 3 Day09 0.171 0.0630
## 4 Day12 0.167 0.0447
## 5 Day15 0.189 0.0772
## 6 Day19 0.185 0.0459
## 7 Day22 0.209 0.0840
average_across_days$Day <- as.factor(average_across_days$Day)
p <- ggplot(average_across_days, aes(x = Day, y = mean, group = 1)) + geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd), width = 0.2, size = 0.8) +
geom_line(color = "black", size = 1.5) +
geom_point(color = "black", size = 3) +
theme_classic(base_size = 14) +
labs(x = "Day", y = "Mean Weighted Bray Distance") +
theme(legend.position = "none")
print(p)
ggsave("Bray_ControlVSGrazed.pdf", plot = p, device = "pdf", width = 4, height = 3, dpi = 300)
#Figure 2D
rm(list=ls())
pseq2 <- readRDS("PhyloSeqWithTree.rds")
pseq2 <- subset_samples(pseq2, Day %in% c("Day03", "Day06", "Day19", "Day22") )
pseq1 <- prune_taxa(taxa_sums(pseq2) > 200, pseq2)
phase_list <- c("Boom", "Bust")
cat_list <- c("Control_Lawn", "Grazed_Lawn")
# List to collect distance data frames
all_dists_153 <- list()
set.seed(1234)
for (phs in phase_list) {
for (cat in cat_list) {
message("Processing group: ", phs, " | ", cat)
# Subset samples for this Phase and Category
sdf <- as.data.frame(sample_data(pseq1))
keep_samples <- rownames(sdf[sdf$Phase == phs & sdf$Category == cat, ])
pseq_subset <- prune_samples(keep_samples, pseq1)
# Check sample count after pruning
n_samples <- nsamples(pseq_subset)
message("Samples retained after filtering: ", n_samples)
if (n_samples < 2) {
message("Not enough samples to calculate distances. Skipping group.")
next
}
# Compute weighted UniFrac distance matrix
dist_matrix <- phyloseq::distance(pseq_subset, method = "unifrac", weighted = TRUE)
dist_mat <- as.matrix(dist_matrix)
# Extract lower triangle (all pairwise distances without repeats/self)
dist_vals <- dist_mat[lower.tri(dist_mat)]
# Get sample pairs for each distance (optional, useful for later)
sample_names <- sample_names(pseq_subset)
pairs <- t(combn(sample_names, 2))
pairs_df <- as.data.frame(pairs)
colnames(pairs_df) <- c("Sample1", "Sample2")
# Build data frame of distances with metadata
df <- data.frame(
Sample1 = pairs_df$Sample1,
Sample2 = pairs_df$Sample2,
Distance = dist_vals,
Phase = phs,
Category = cat,
Group = paste(phs, cat, sep = "_")
)
all_dists_153[[paste(phs, cat, sep = "_")]] <- df
}
}
## Processing group: Boom | Control_Lawn
## Samples retained after filtering: 18
## Processing group: Boom | Grazed_Lawn
## Samples retained after filtering: 18
## Processing group: Bust | Control_Lawn
## Samples retained after filtering: 18
## Processing group: Bust | Grazed_Lawn
## Samples retained after filtering: 18
# Combine all groups into one data frame
df_all_153 <- bind_rows(all_dists_153)
# Add Group factor with desired levels for consistent ordering
df_all_153$Group <- factor(df_all_153$Group, levels = c(
"Boom_Control_Lawn",
"Bust_Control_Lawn",
"Boom_Grazed_Lawn",
"Bust_Grazed_Lawn"
))
# Add Color column based on Phase
df_all_153$Color <- ifelse(df_all_153$Phase == "Boom", "darkgreen", "darkred")
# Add Linetype column based on Category
df_all_153$Linetype <- ifelse(df_all_153$Category == "Grazed_Lawn", "dashed", "solid")
# Plot using your aesthetics and style
p_box <- ggplot(df_all_153, aes(x = Group, y = Distance)) +
geom_boxplot(aes(color = Phase, fill = Phase, linetype = Category),
outlier.shape = NA, alpha = 0.5, size = 1) +
geom_point(position = position_jitter(width = 0.1), shape = 1, size = 2) +
scale_fill_manual(values = c("Boom" = "darkgreen", "Bust" = "darkred")) +
scale_color_manual(values = c("Boom" = "darkgreen", "Bust" = "darkred")) +
scale_linetype_manual(values = c("Control_Lawn" = "solid", "Grazed_Lawn" = "dashed")) +
theme_classic() +
labs(title = "Pairwise Weighted UniFrac Distances",
x = "Group",
y = "Weighted UniFrac Distance") +
theme(
axis.text.x = element_text(angle = 0, hjust = 1, size = 8),
axis.text.y = element_text(size = 8),
axis.title = element_text(size = 10),
plot.title = element_text(size = 10, face = "bold"),
legend.position = "none"
)
print(p_box)
ggsave("Figure2_NewDADA2.pdf", plot = p_box, device = "pdf", width = 4, height = 4, dpi = 300)
#Figure 3 code available at https://rpubs.com/MicroRB/BoomBust2 #Figure4A_Revised unifrac
rm(list=ls())
pseq <- readRDS("PhyloSeqWithTree.rds")
pseq <- subset_samples(pseq, Day %in% c("Day00", "Day03", "Day06", "Day09","Day12", "Day15", "Day19", "Day22"))
pseq <- prune_taxa(taxa_sums(pseq) > 200, pseq)
set.seed(123)
pseq_rel <- transform(pseq, "compositional") # each sample sums to 1
set.seed(123)
ord_pcoa_rel <- ordinate(pseq_rel, method = "PCoA", distance = "unifrac", weighted = TRUE)
# Plot PCoA
pcoa_plot_rel <- plot_ordination(pseq_rel, ord_pcoa_rel, color = "Category") +
geom_point(size = 4) +
theme_classic2() +
scale_color_manual(values = c("black", "#33a02c", "#e31a1c")) +
labs(title = "PCoA (unifrac, Relative Abundance)")
pcoa_plot_rel
ggsave("WormUnifrac.pdf", plot = pcoa_plot_rel, device = "pdf", width = 6, height = 4, dpi = 300)
#Figure 4B
rm(list=ls())
pseq1 <- readRDS("Supplementary File 1.rds")
pseq <- subset_samples(pseq1, Category == "Worm")
pseq_family <- tax_glom(pseq, taxrank = "Family")
pseq_relabund <- transform_sample_counts(pseq_family, function(x) x / sum(x)) # relative abundance
otu_mat <- as(otu_table(pseq_relabund), "matrix")
if(taxa_are_rows(pseq_relabund)) otu_mat <- t(otu_mat)
tax_df <- as.data.frame(tax_table(pseq_relabund))
colnames(otu_mat) <- tax_df$Family
meta <- as.data.frame(sample_data(pseq_relabund))
meta$SampleID <- rownames(meta)
filtered_list <- lapply(unique(meta$Day), function(day) {
samples_day <- meta$SampleID[meta$Day == day]
otu_day <- otu_mat[samples_day, , drop=FALSE]
# Day-wise mean abundance filter
mean_abund_day <- colMeans(otu_day)
keep_families <- names(mean_abund_day[mean_abund_day >= 0.02])
otu_day <- otu_day[, keep_families, drop=FALSE]
# Day-wise prevalence filter (>=50% of samples)
prevalence <- colSums(otu_day > 0) / nrow(otu_day)
keep_families <- names(prevalence[prevalence >= 1])
otu_day <- otu_day[, keep_families, drop=FALSE]
# Convert to long format
df_long_day <- as.data.frame(otu_day)
df_long_day$SampleID <- rownames(df_long_day)
df_long_day <- df_long_day %>%
pivot_longer(-SampleID, names_to="Family", values_to="Abundance") %>%
left_join(meta, by="SampleID") %>%
group_by(Day, Family) %>%
summarise(MeanAbundance = mean(Abundance), nSamples = n(), .groups="drop")
return(df_long_day)
})
df_long <- bind_rows(filtered_list)
mat_day_mean <- df_long %>%
select(Day, Family, MeanAbundance) %>%
pivot_wider(names_from = Day, values_from = MeanAbundance, values_fill = 0) %>%
as.data.frame()
rownames(mat_day_mean) <- mat_day_mean$Family
mat_day_mean <- mat_day_mean[, c("Day03", "Day06", "Day09", "Day12", "Day15", "Day19")]
my_palette <- colorRampPalette(c( "lightyellow", "red"))
# Breaks: smooth transition from 0 to maximum abundance
max_val <- max(mat_day_mean, na.rm = TRUE)
breaks <- seq(0, max_val, length.out = 100)
my_colors <- my_palette(length(breaks) - 1)
p <- pheatmap(
mat_day_mean,
cluster_rows = TRUE,
cluster_cols = FALSE,
color = my_colors,
breaks = breaks,
main = " ",
border_color = NA,
cellwidth = 30,
cellheight = 10,
fontsize_row = 8,
fontsize_col = 10,
legend = TRUE
)
pdf("Figure4B.pdf", width = 8, height = 6)
print(p)
dev.off()
## quartz_off_screen
## 2
#Figure 4C
rm(list=ls())
pseq1 <- readRDS("Supplementary File 1.rds")
pseq <- subset_samples(pseq1, Category == "Worm")
richness_data <- plot_richness(pseq, x = "Replicate", measures = c("Shannon"))$data
averaged_data <- richness_data %>%
group_by(Day, Category) %>%
dplyr::summarize(
avg_value = mean(value, na.rm = TRUE),
sd_value = sd(value, na.rm = TRUE))
## `summarise()` has grouped output by 'Day'. You can override using the `.groups`
## argument.
p <- ggplot(averaged_data, aes(x = Day, y = avg_value, group = Category, linetype = Category)) +
geom_point(size = 2, color = "red") + #Plot points
geom_line(size = 1.5, color = "red") + #Plot lines instead of smooth
geom_errorbar(aes(ymin = avg_value - sd_value, ymax = avg_value + sd_value),
width = 0.2, size = 0.6, color = "red", show.legend = FALSE) + #Error bars for SD
scale_linetype_manual(values = c("solid", "dashed")) + #Custom line styles
theme_classic2() + theme(
axis.text.y = element_text(size = 14),
strip.text = element_text(size = 14, face = "bold")
)
#Print the plot
print(p)
ggsave("Figure4C.pdf", plot = p, device = "pdf", width = 6, height = 4, dpi = 300)
#Figure 4D
rm(list = ls())
pseq2 <- readRDS("PhyloSeqWithTree.rds")
days <- c("Day03", "Day06", "Day09", "Day12", "Day15", "Day19")
# Subset once for relevant CME types
pseq <- subset_samples(pseq2, CME %in% c("ACME", "PCME", "BCME"))
pseq1 <- prune_samples(sample_sums(pseq) >= 200, pseq)
# Initialize result storage
all_distances_list <- list()
for (current_day in days) {
pseq_current <- subset_samples(pseq1, Day == current_day)
pseq_current <- prune_samples(sample_sums(pseq_current) > 1, pseq_current)
pseq_rel <- transform_sample_counts(pseq_current, function(x) x / sum(x))
bc_dist <- phyloseq::distance(pseq_rel, method = "unifrac")
sample_df <- as.data.frame(sample_data(pseq_rel))
sample_df$SampleID <- rownames(sample_df)
group1 <- sample_df$SampleID[sample_df$Category == "Worm"]
dist_mat <- as.matrix(bc_dist)[group1, group1]
dist_values <- dist_mat[lower.tri(dist_mat)]
dist_values <- dist_values[!is.na(dist_values)]
if (length(dist_values) > 0) {
# store all distances (not the mean)
temp_df <- data.frame(
Day = current_day,
Category = "Worm",
Distance = dist_values
)
all_distances_list[[length(all_distances_list) + 1]] <- temp_df
}
}
# combine into one data frame
all_distances <- do.call(rbind, all_distances_list)
library(dplyr)
summary_data <- all_distances %>%
group_by(Day, Category) %>%
summarise(
MeanDistance = mean(Distance),
SD = sd(Distance),
.groups = "drop"
)
plot_data <- summary_data
plot_data$Day <- factor(plot_data$Day, levels = days)
custom_colors <- c("Worm" = "#e31a1c")
p_line <- ggplot(plot_data, aes(x = Day, y = MeanDistance,
color = Category, group = Category)) +
geom_point(size = 3) +
geom_line(size = 1) +
geom_errorbar(aes(ymin = MeanDistance - SD, ymax = MeanDistance + SD),
width = 0.2, size = 0.8) +
scale_color_manual(values = custom_colors) +
theme_classic(base_size = 14) +
labs(x = "Day", y = "Mean unifrac Dist ± SD", color = "Category") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5))
print(p_line)
ggsave("Figure4D.pdf", plot = p_line, device = "pdf",
width = 6, height = 4, dpi = 300)
#Figure 4E
rm(list=ls())
pseq <- readRDS("PhyloSeqWithTree.rds")
# Define days and CME groups
days <- c("Day03","Day06","Day09","Day12","Day15","Day19")
cme_list <- c("ACME","BCME","PCME")
# Filter samples by minimum depth
pseq <- prune_samples(sample_sums(pseq) >= 200, pseq)
# Weighted UniFrac function
get_wUF <- function(ps) phyloseq::distance(ps, method="unifrac", weighted=TRUE)
all_distances <- list()
set.seed(1234)
for (cme in cme_list) {
for (day in days) {
ps_sub <- subset_samples(pseq, CME == cme & Day == day)
if (nsamples(ps_sub) < 2) next
meta <- as.data.frame(sample_data(ps_sub))
worms <- meta$SampleID[meta$Category == "Worm"]
graz <- meta$SampleID[meta$Category == "Grazed_Lawn"]
if (length(worms) == 0 | length(graz) == 0) next
# Weighted UniFrac
dist_mat <- as.matrix(get_wUF(ps_sub))
pw_vals <- unlist(lapply(worms, function(w) dist_mat[w, graz]))
all_distances[[paste(cme,day,sep="_")]] <- data.frame(
Day = day,
Distance = pw_vals
)
}
}
# Combine distances
df <- bind_rows(all_distances)
write.csv(df, "DistanceUnifrac.csv")
# Order days
df$Day <- factor(df$Day, levels = days)
# Summarize across CME groups
df_summary <- df %>%
group_by(Day) %>%
summarise(mean = mean(Distance),
sd = sd(Distance),
.groups = "drop")
print(df_summary)
## # A tibble: 6 × 3
## Day mean sd
## <fct> <dbl> <dbl>
## 1 Day03 0.197 0.152
## 2 Day06 0.125 0.0404
## 3 Day09 0.219 0.111
## 4 Day12 0.256 0.106
## 5 Day15 0.205 0.0774
## 6 Day19 0.211 0.0379
# Plot ONE line
p <- ggplot(df_summary, aes(x = Day, y = mean, group = 1)) +
geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd), width = 0.2, size = 0.8) +
geom_line(size = 1.5, color = "black") +
geom_point(size = 3, color = "black") +
theme_classic(base_size = 14) +
labs(x = "Day",
y = "Weighted UniFrac Distance (Worm vs Grazed) ± SD") +
theme(legend.position = "none")
print(p)
ggsave("Figure4E.pdf",
plot = p, device = "pdf", width = 6, height = 4, dpi = 300)
#Supplementary Figure 2A
y4$Family[y4$Abundance < 0.02] <- "Family < 2% abund." #rename genera with < 1% abundance
my_colors <- c("Family < 2% abund." = "grey","Pseudomonadaceae" = "darkred","Arcobacteraceae" = "red","Enterobacteriaceae" = "#0000CD","Shewanellaceae" = "#6B8E23","Moraxellaceae" = "#00CED1","Aeromonadaceae" = "#7FFFD4","Acetobacteraceae" = "#20B2AA","Tannerellaceae" = "#FFA07A","Comamonadaceae" = "#556B2F","Rhizobiaceae" = "#CD5C5C","Flavobacteriaceae" = "#FFE4B5","Clostridiaceae" = "darkgoldenrod1","Xanthomonadaceae" = "#20B2AA", "Bacteroidaceae" = "#FF6347", "Sphingobacteriaceae" = "#00FFFF","Peptostreptococcaceae" = "#2E8B57", "Sphingomonadaceae" = "#00FFFF", "Weeksellaceae" = "#4B0082", "Enterococcaceae" = "#1E90FF","Microbacteriaceae" = "#D1A33D","Erwiniaceae" = "#32CD32")
y4 <- y4 %>%
mutate(Family = fct_reorder(Family, Abundance, .fun = sum, .desc = TRUE)) %>%
mutate(Family = fct_relevel(Family, "Family < 2% abund.", after = Inf))
p <- ggplot(y4, aes(x = Replicate, y = Abundance, fill = Family)) +
geom_bar(stat = "identity", position = "fill") +
theme_classic() +
scale_fill_manual(values = my_colors) +
theme(
text = element_text(size = 12),
axis.text.y = element_text(hjust = 1),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
panel.grid.major = element_blank(),
strip.text.x = element_text(size = 12),
legend.text = element_text(size = 13),
axis.text = element_text(face = "bold")
) +
scale_x_discrete(drop = TRUE) +
guides(fill = guide_legend(keywidth = 0.4, keyheight = 0.7, ncol = 1)) +
ylab("Distribution of ASVs at Family level") +
facet_grid(cols = vars(Day), rows = vars(CME)) +
guides(fill = guide_legend(ncol = 1))
print(p)
# Save the plot as a high-quality .pdf
ggsave("Supplementary_Figure2A.pdf", plot = p, device = "pdf", width = 8, height = 6, dpi = 300)
#Supplementary Figure 2B
## $Day00
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_matrix ~ CME, data = sample_data_df)
## Df SumOfSqs R2 F Pr(>F)
## Model 2 1.55875 0.86183 18.712 0.004 **
## Residual 6 0.24991 0.13817
## Total 8 1.80866 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $`Day-02`
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_matrix ~ CME, data = sample_data_df)
## Df SumOfSqs R2 F Pr(>F)
## Model 2 1.15631 0.92848 38.947 0.004 **
## Residual 6 0.08907 0.07152
## Total 8 1.24538 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Supplementary Figure 4
#Supplementary Figure 3 left panel
rm(list=ls())
pseq <- readRDS("Supplementary File 1.rds")
pseq1 <- subset_samples(pseq, Day %in% c( "Day03", "Day06", "Day09","Day12", "Day15", "Day19", "Day22"))
pseq <- subset_samples(pseq1, Category == "Control_Lawn")
pseq <- prune_taxa(taxa_sums(pseq) > 200, pseq)
set.seed(123)
pseq_rel <- transform(pseq, "compositional") # each sample sums to 1
cme_colors <- c("greenyellow", "chartreuse2", "chartreuse3", "springgreen3", "yellowgreen",
"olivedrab", "darkolivegreen", "cyan", "deepskyblue","dodgerblue3",
"royalblue3", "blue", "mediumblue","darkblue", "bisque", "#f1a1a1", "lightcoral",
"tomato","brown3", "firebrick3", "darkred","#660000")
shape_values <- c(1) # Category shapes
set.seed(123)
ord_pcoa_rel <- ordinate(pseq_rel, method = "PCoA", distance = "bray", weighted = TRUE)
# Plot PCoA
pcoa_plot_rel <- plot_ordination(pseq_rel, ord_pcoa_rel, color = "CMEDays", shape = "Category") +
geom_point(size = 4) +
theme_classic2() +
scale_color_manual(values = cme_colors) +
scale_shape_manual(values = c(16)) +
labs(title = "PCoA Control Lawn")
print(pcoa_plot_rel)
ggsave("Control_Lawn_AllCME.pdf", plot = pcoa_plot_rel, device = "pdf", width = 7, height = 4, dpi = 300)
#Supplementary Figure 3 right panel
rm(list=ls())
pseq <- readRDS("Supplementary File 1.rds")
pseq1 <- subset_samples(pseq, Day %in% c( "Day03", "Day06", "Day09","Day12", "Day15", "Day19", "Day22"))
pseq <- subset_samples(pseq1, Category == "Grazed_Lawn")
pseq <- prune_taxa(taxa_sums(pseq) > 200, pseq)
set.seed(123)
pseq_rel <- transform(pseq, "compositional") # each sample sums to 1
cme_colors <- c("greenyellow", "chartreuse2", "chartreuse3", "springgreen3", "yellowgreen",
"olivedrab", "darkolivegreen", "cyan", "deepskyblue","dodgerblue3",
"royalblue3", "blue", "mediumblue","darkblue", "bisque", "#f1a1a1", "lightcoral",
"tomato","brown3", "firebrick3", "darkred","#660000")
shape_values <- c(1) # Category shapes
set.seed(123)
ord_pcoa_rel <- ordinate(pseq_rel, method = "PCoA", distance = "bray", weighted = TRUE)
# Plot PCoA
pcoa_plot_rel <- plot_ordination(pseq_rel, ord_pcoa_rel, color = "CMEDays", shape = "Category") +
geom_point(size = 4) +
theme_classic2() +
scale_color_manual(values = cme_colors) +
scale_shape_manual(values = c(16)) +
labs(title = "PCoA Grazed Lawn")
print(pcoa_plot_rel)
ggsave("Grazed_Lawn_AllCME.pdf", plot = pcoa_plot_rel, device = "pdf", width = 7, height = 4, dpi = 300)