Caenorhabditis elegans populations shape their microbial environment

#Figure 01

rm(list=ls())

worm_data <- read_excel("Supplementary_Data.xlsx",
                        sheet = "Supplementary_Table_2", skip = 1)
# Keep only certain days
worm_data <- worm_data %>%
  filter(Day %in% c("Day0", "Day03", "Day06", "Day09"))

#remove superscripts like a/b that was added for footnotes
worm_data <- worm_data %>%
  mutate(
    Total = gsub("[^0-9.]", "", Total),
    Total = as.numeric(Total))

# Fix factor order
worm_data$Day <- factor(worm_data$Day,
                        levels = c("Day0", "Day03", "Day06", "Day09"))

# Summary
average_worms <- worm_data %>%
  group_by(Day, CME) %>%
  summarise(
    Average_Worms = mean(Total, na.rm = TRUE),
    SD_Worms = sd(Total, na.rm = TRUE),
    .groups = 'drop')

# Plot
p <- ggplot(average_worms,
            aes(x = Day, y = Average_Worms,
                group = CME,
                color = CME)) +
  geom_point(size = 2) +
  geom_errorbar(aes(ymin = Average_Worms - SD_Worms,
                    ymax = Average_Worms + SD_Worms),
                width = 0.2, size = 0.6, color = "gray30",
                show.legend = FALSE) +
  geom_line(size = 1.3) +
  theme_classic(base_size = 12) +
  labs(x = "Day",
       y = "Average Worms Count",
       title = "Average Worm Counts by Day") +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
    axis.text.y = element_text(size = 7),
    axis.title.x = element_text(size = 7),
    axis.title.y = element_text(size = 7)
  ) +
  facet_wrap(~ CME, scales = "free_y") +
  scale_color_manual(values = c("ACME" = "black",
                                "BCME" = "black",
                                "PCME" = "black"))
ggsave("WormNumber.pdf", p, height = 3, width = 9, dpi = 300)
print(p)

rm(list=ls())
worm_data <- read_excel("Supplementary_Data.xlsx",
                        sheet = "Supplementary_Table_2", skip = 1, n_max = 72)
worm_long <- worm_data %>%
  pivot_longer(
    cols = c(Gravids_and_PostGravids,   L1_L3,  Dauers),
    names_to = "Stage_of_worm",
    values_to = "Count"
  ) %>%
  mutate(Proportion = Count /`worm count in sample`)

summary_df <- worm_long %>%
  group_by(Day, CME, Stage_of_worm) %>%
  summarise(
    Mean = mean(Proportion),
    SD = sd(Proportion),
    .groups = "drop"
  ) %>%
  mutate(Experimental_Day = Day)

p <- ggplot(summary_df,
            aes(x = Experimental_Day,
                y = Mean,
                color = Stage_of_worm,
                group = Stage_of_worm)) +
  geom_point(size = 2) +
  geom_errorbar(aes(ymin = Mean - SD, ymax = Mean + SD),
                width = 0.2, size = 0.6) +
  geom_line(size = 1) +
  facet_wrap(~ CME, scales = "free_y") +
  labs(title = "Proportion of Worm Types Over Days",
       x = "Day",
       y = "Proportion ± SD") +
  theme_classic2() +
  scale_color_manual(values = c(
    "Gravids_and_PostGravids" = "black",
    "L1_L3" =  "blue",
    "Dauers" =  "#e31a1c"
  )) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
    axis.text.y = element_text(size = 7),
    axis.title.x = element_text(size = 7),
    axis.title.y = element_text(size = 7))

print(p)

ggsave("WormProportion2.pdf", p, height = 3, width = 12, dpi = 300)

#Figure 2A

rm(list = ls())
pseq_all <- readRDS("Supplementary File 1.rds")
my_colors <- c("Family < 2% abund." = "grey", "Enterococcaceae" = "#1E90FF", "Pseudomonadaceae" =  "darkred","Microbacteriaceae" = "#D1A33D","Enterobacteriaceae" = "#0000CD","Shewanellaceae" = "#6B8E23","Erwiniaceae" = "#32CD32","Clostridiaceae" = "darkgoldenrod1","Rhizobiaceae" = "#CD5C5C","Arcobacteraceae" = "red","Acetobacteraceae" = "#20B2AA","Comamonadaceae" = "#556B2F","Bacteroidaceae" = "#FF6347", "Chitinophagaceae" = "dodgerblue3", "Flavobacteriaceae"  = "#FFE4B5",  "Xanthomonadaceae" = "#20B2AA","Lactobacillaceae" = "#5F7FC7","Moraxellaceae" = "#00CED1","Dysgonomonadaceae" = "#00FA9A",  "Paenibacillaceae" = "#666600","Kaistiaceae" = "#D2691E","Aeromonadaceae" = "#7FFFD4","Lachnospiraceae" = "#2E8B57","Weeksellaceae" = "#4B0082", "Tannerellaceae" = "#FFA07A","Azospirillaceae" = "#1E90FF", "Desulfovibrionaceae" = "#DC143C", "Terrimicrobiaceae" = "darkblue", "Sphingomonadaceae" = "#00FFFF",  "Spirosomaceae" = "darkorange1","Sphingobacteriaceae" = "#00FFFF","Peptostreptococcaceae" = "#2E8B57", "Verrucomicrobiaceae" = "#FF00FF", "Xanthobacteriaceae" = "#FFB6C1","Cellvibrionaceae" = "black")

my_colors <- my_colors[!duplicated(names(my_colors), fromLast = TRUE)]

# --- CME list
CMEs <- c("ACME", "PCME", "BCME")
dfs <- list()

for (cme in CMEs) {
  ps <- pseq_all %>%
    subset_samples(CME == cme &
                   !Category %in% c("Control", "Control_Slice", "Grazed_Slice") &
                   Day != "Day-02")

  if (any(sample_sums(ps) >= 20)) ps <- prune_samples(sample_sums(ps) >= 20, ps)
  if (any(taxa_sums(ps) > 2))     ps <- prune_taxa(taxa_sums(ps) > 2, ps)

  ps <- tax_glom(ps, taxrank = "Family")
  ps <- transform_sample_counts(ps, function(x) x / sum(x))
  df <- psmelt(ps)
  df$Family <- as.character(df$Family)
  df$Family[df$Abundance < 0.02] <- "Family < 2% abund."
  df <- df %>%
    mutate(Family = fct_reorder(Family, Abundance, .fun = sum, .desc = TRUE)) %>%
    mutate(Family = fct_relevel(Family, "Family < 2% abund.", after = 0))

  dfs[[cme]] <- df
}

make_plot <- function(df, y_label) {
  ggplot(df, aes(x = Replicate, y = Abundance, fill = Family)) +
    geom_bar(stat = "identity", position = "fill") +
    facet_grid(rows = vars(Category), cols = vars(Days), scales = "free_x", space = "free_x") +
    scale_fill_manual(values = my_colors, na.value = "grey") +
    theme_classic() +
    scale_x_discrete(drop = TRUE) +
    guides(fill = guide_legend(ncol = 2, keywidth = 0.4, keyheight = 0.7)) +
    ylab(y_label) +
    theme(
      text = element_text(size = 8),
      axis.text = element_text(face = "bold"),
      axis.text.y = element_text(size = 10, hjust = 1),
      axis.ticks.x = element_blank(),
      axis.text.x = element_blank(),
      axis.title.x = element_text(size = 10, face = "bold"),
      axis.title.y = element_text(size = 8, face = "bold"),
      strip.text = element_text(size = 8, face = "bold"),
      legend.position = "right",
      legend.text = element_text(size = 6),
      legend.title = element_text(size = 8, face = "bold"),
      plot.margin = unit(c(-1, 0, -1, 0), "lines")
    )
}

plots <- imap(dfs, ~ make_plot(.x, .y))
combined <- wrap_plots(plotlist = plots, ncol = 1, guides = "collect") & theme(legend.position = "right")
ggsave("Figure2A.pdf", combined, width = 12, height = 9 * length(plots), units = "in")
print(combined)

#Figure 2B

rm(list = ls())
pseq1 <- readRDS("Supplementary File 1.rds")
pseq2 <- subset_samples(pseq1, Day != "Day-02" & !Category %in% c("Worm", "Control"))
pseq <- prune_taxa(taxa_sums(pseq2) > 200, pseq2)

#Get Shannon diversity data
richness_data <- plot_richness(pseq, measures = c("Shannon"))$data

#Step 1: Compute baseline from Day00 of Control_Lawn
baseline_value <- richness_data %>%
  filter(Category == "Control_Lawn", Day == "Day00") %>%
  summarise(mean_day00 = mean(value, na.rm = TRUE)) %>%
  pull(mean_day00)

#Normalize all values by Control_Lawn Day00
richness_data <- richness_data %>%
  mutate(delta_value = value / baseline_value)

#write.csv(richness_data, "allcmerichnes.csv")
#Compute summary statistics
summary_data <- richness_data %>%
  group_by(Day, Category) %>%
  summarise(
    mean_delta = mean(delta_value, na.rm = TRUE),
    sd_delta = sd(delta_value, na.rm = TRUE),
    .groups = "drop"
  )

#Manually add Day00 entry for Grazed_Lawn using Control_Lawn baseline
grazed_day00 <- data.frame(
  Day = "Day00",
  Category = "Grazed_Lawn",
  mean_delta = 1,  #normalized to Control_Lawn Day00
  sd_delta = 0)
#Add it to summary_data
summary_data <- bind_rows(summary_data, grazed_day00)

#Ensure Day is treated as an ordered factor if needed
summary_data$Day <- factor(summary_data$Day,
                           levels = sort(unique(summary_data$Day)),
                           ordered = TRUE)

p <- ggplot(summary_data, aes(x = Day, y = mean_delta, group = Category)) +
  geom_point(size = 2) +
  geom_line(aes(linetype = Category), size = 1) +
  geom_errorbar(aes(ymin = mean_delta - sd_delta, ymax = mean_delta + sd_delta),
                width = 0.2, color = "gray30") +
  scale_linetype_manual(values = c("solid", "longdash", "solid", "longdash")) +
  theme_classic2() +
  theme(
    strip.text = element_text(size = 14, face = "bold"),
    legend.position = "right")
print(p)

ggsave("LawnShannon_diversity_plot.pdf", plot = p, device = "pdf", width = 4, height = 3, dpi = 300)

#Figure 2C revised Unifrac

rm(list=ls())
pseq <- readRDS("PhyloSeqWithTree.rds")
pseq2 <- prune_taxa(taxa_sums(pseq) > 200, pseq)
days <- c("Day03", "Day06", "Day09", "Day12", "Day15", "Day19", "Day22")
cme_types <- c("ACME", "BCME", "PCME")
library(phytools)

## Loading required package: maps

## 
## Attaching package: 'maps'

## The following object is masked from 'package:purrr':
## 
##     map

## 
## Attaching package: 'phytools'

## The following object is masked from 'package:scales':
## 
##     rescale

## The following object is masked from 'package:vegan':
## 
##     scores

# Midpoint root the tree
rooted_tree <- midpoint.root(phy_tree(pseq2))
phy_tree(pseq2) <- rooted_tree
all_distances_combined <- data.frame()
set.seed(123)
for (cme in cme_types) {
  all_distances <- data.frame()  
  for (day in days) {
    # Filter data for CME and day
    pseq1 <- subset_samples(pseq2, CME == cme)
    pseq0 <- subset_samples(pseq1, Day %in% day)
    pseq.rel.2 <- transform_sample_counts(pseq0, function(x) x / sum(x)) 
    set.seed(1234)
    bc_dist <- phyloseq::distance(pseq.rel.2, method = "unifrac", weighted= TRUE)
    
    sample_data <- as.data.frame(sample_data(pseq.rel.2))
    sample_data$SampleID <- rownames(sample_data)
    # Define the replicates for each category (Control_Lawn, Grazed_Lawn)
    control_samples <- sample_data$SampleID[sample_data$Category == "Control_Lawn"]
    grazed_samples <- sample_data$SampleID[sample_data$Category == "Grazed_Lawn"]
    bc_dist_matrix <- as.matrix(bc_dist)
  
    pairwise_distances <- c()
    for (control in control_samples) {
      for (grazed in grazed_samples) {
        dist_value <- bc_dist_matrix[control, grazed]
        pairwise_distances <- c(pairwise_distances, dist_value)}
    }
    
    stats <- data.frame(
      Day = day,
      CME = cme,
      Comparison = "Control_Lawn vs Grazed_Lawn",
      Pairwise_Distances = pairwise_distances)

    all_distances <- bind_rows(all_distances, stats)
  }
  all_distances_combined <- bind_rows(all_distances_combined, all_distances)
}

average_across_days <- all_distances_combined %>%
  group_by(Day) %>%
  summarise(
    mean = mean(Pairwise_Distances),
    sd = sd(Pairwise_Distances),
    .groups = "drop")
print(average_across_days)

## # A tibble: 7 × 3
##   Day    mean     sd
##   <chr> <dbl>  <dbl>
## 1 Day03 0.102 0.0571
## 2 Day06 0.112 0.0494
## 3 Day09 0.171 0.0630
## 4 Day12 0.167 0.0447
## 5 Day15 0.189 0.0772
## 6 Day19 0.185 0.0459
## 7 Day22 0.209 0.0840

average_across_days$Day <- as.factor(average_across_days$Day)

p <- ggplot(average_across_days, aes(x = Day, y = mean, group = 1)) + geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd), width = 0.2, size = 0.8) +  
     geom_line(color = "black", size = 1.5) + 
     geom_point(color = "black", size = 3) +   
     theme_classic(base_size = 14) + 
     labs(x = "Day", y = "Mean Weighted Bray Distance") + 
     theme(legend.position = "none")
print(p)

ggsave("Bray_ControlVSGrazed.pdf", plot = p, device = "pdf", width = 4, height = 3, dpi = 300)

#Figure 2D

rm(list=ls())
pseq2 <- readRDS("PhyloSeqWithTree.rds")
pseq2 <- subset_samples(pseq2, Day %in% c("Day03", "Day06", "Day19", "Day22") )
pseq1 <- prune_taxa(taxa_sums(pseq2) > 200, pseq2)

phase_list <- c("Boom", "Bust")
cat_list <- c("Control_Lawn", "Grazed_Lawn")

# List to collect distance data frames
all_dists_153 <- list()
set.seed(1234)
for (phs in phase_list) {
  for (cat in cat_list) {
    message("Processing group: ", phs, " | ", cat)
    
    # Subset samples for this Phase and Category
    sdf <- as.data.frame(sample_data(pseq1))
    keep_samples <- rownames(sdf[sdf$Phase == phs & sdf$Category == cat, ])
    pseq_subset <- prune_samples(keep_samples, pseq1)
    
    # Check sample count after pruning
    n_samples <- nsamples(pseq_subset)
    message("Samples retained after filtering: ", n_samples)
    if (n_samples < 2) {
      message("Not enough samples to calculate distances. Skipping group.")
      next
    }
    
    # Compute weighted UniFrac distance matrix
    dist_matrix <- phyloseq::distance(pseq_subset, method = "unifrac", weighted = TRUE)
    dist_mat <- as.matrix(dist_matrix)
    
    # Extract lower triangle (all pairwise distances without repeats/self)
    dist_vals <- dist_mat[lower.tri(dist_mat)]
    
    # Get sample pairs for each distance (optional, useful for later)
    sample_names <- sample_names(pseq_subset)
    pairs <- t(combn(sample_names, 2))
    pairs_df <- as.data.frame(pairs)
    colnames(pairs_df) <- c("Sample1", "Sample2")
    
    # Build data frame of distances with metadata
    df <- data.frame(
      Sample1 = pairs_df$Sample1,
      Sample2 = pairs_df$Sample2,
      Distance = dist_vals,
      Phase = phs,
      Category = cat,
      Group = paste(phs, cat, sep = "_")
    )
    
    all_dists_153[[paste(phs, cat, sep = "_")]] <- df
  }
}

## Processing group: Boom | Control_Lawn

## Samples retained after filtering: 18

## Processing group: Boom | Grazed_Lawn

## Samples retained after filtering: 18

## Processing group: Bust | Control_Lawn

## Samples retained after filtering: 18

## Processing group: Bust | Grazed_Lawn

## Samples retained after filtering: 18

# Combine all groups into one data frame
df_all_153 <- bind_rows(all_dists_153)

# Add Group factor with desired levels for consistent ordering
df_all_153$Group <- factor(df_all_153$Group, levels = c(
  "Boom_Control_Lawn",
  "Bust_Control_Lawn",
  "Boom_Grazed_Lawn",
  "Bust_Grazed_Lawn"
))

# Add Color column based on Phase
df_all_153$Color <- ifelse(df_all_153$Phase == "Boom", "darkgreen", "darkred")

# Add Linetype column based on Category
df_all_153$Linetype <- ifelse(df_all_153$Category == "Grazed_Lawn", "dashed", "solid")

# Plot using your aesthetics and style
p_box <- ggplot(df_all_153, aes(x = Group, y = Distance)) +
  geom_boxplot(aes(color = Phase, fill = Phase, linetype = Category),
               outlier.shape = NA, alpha = 0.5, size = 1) +
  geom_point(position = position_jitter(width = 0.1), shape = 1, size = 2) +
  scale_fill_manual(values = c("Boom" = "darkgreen", "Bust" = "darkred")) +
  scale_color_manual(values = c("Boom" = "darkgreen", "Bust" = "darkred")) +
  scale_linetype_manual(values = c("Control_Lawn" = "solid", "Grazed_Lawn" = "dashed")) +
  theme_classic() +
  labs(title = "Pairwise Weighted UniFrac Distances",
       x = "Group",
       y = "Weighted UniFrac Distance") +
  theme(
    axis.text.x = element_text(angle = 0, hjust = 1, size = 8),
    axis.text.y = element_text(size = 8),
    axis.title = element_text(size = 10),
    plot.title = element_text(size = 10, face = "bold"),
    legend.position = "none"
  )

print(p_box)

ggsave("Figure2_NewDADA2.pdf", plot = p_box, device = "pdf", width = 4, height = 4, dpi = 300)

#Figure 3 code available at https://rpubs.com/MicroRB/BoomBust2 #Figure4A_Revised unifrac

rm(list=ls())
pseq <- readRDS("PhyloSeqWithTree.rds")
pseq <- subset_samples(pseq, Day %in% c("Day00", "Day03", "Day06", "Day09","Day12",  "Day15", "Day19", "Day22"))

pseq <- prune_taxa(taxa_sums(pseq) > 200, pseq)
set.seed(123)
pseq_rel <- transform(pseq, "compositional")  # each sample sums to 1


set.seed(123)
ord_pcoa_rel <- ordinate(pseq_rel, method = "PCoA", distance = "unifrac", weighted = TRUE)

# Plot PCoA
pcoa_plot_rel <- plot_ordination(pseq_rel, ord_pcoa_rel, color = "Category") +
  geom_point(size = 4) +
  theme_classic2() +
   scale_color_manual(values = c("black", "#33a02c", "#e31a1c")) +
  labs(title = "PCoA (unifrac, Relative Abundance)")
pcoa_plot_rel

ggsave("WormUnifrac.pdf", plot = pcoa_plot_rel, device = "pdf", width = 6, height = 4, dpi = 300)

#Figure 4B

rm(list=ls())
pseq1 <- readRDS("Supplementary File 1.rds")
pseq <- subset_samples(pseq1, Category == "Worm")
pseq_family <- tax_glom(pseq, taxrank = "Family")
pseq_relabund <- transform_sample_counts(pseq_family, function(x) x / sum(x))  # relative abundance

otu_mat <- as(otu_table(pseq_relabund), "matrix")
if(taxa_are_rows(pseq_relabund)) otu_mat <- t(otu_mat)

tax_df <- as.data.frame(tax_table(pseq_relabund))
colnames(otu_mat) <- tax_df$Family

meta <- as.data.frame(sample_data(pseq_relabund))
meta$SampleID <- rownames(meta)

filtered_list <- lapply(unique(meta$Day), function(day) {
  
  samples_day <- meta$SampleID[meta$Day == day]
  otu_day <- otu_mat[samples_day, , drop=FALSE]
  
  # Day-wise mean abundance filter
  mean_abund_day <- colMeans(otu_day)
  keep_families <- names(mean_abund_day[mean_abund_day >= 0.02])
  otu_day <- otu_day[, keep_families, drop=FALSE]
  
  # Day-wise prevalence filter (>=50% of samples)
  prevalence <- colSums(otu_day > 0) / nrow(otu_day)
  keep_families <- names(prevalence[prevalence >= 1])
  otu_day <- otu_day[, keep_families, drop=FALSE]
  
  # Convert to long format
  df_long_day <- as.data.frame(otu_day)
  df_long_day$SampleID <- rownames(df_long_day)
  df_long_day <- df_long_day %>%
    pivot_longer(-SampleID, names_to="Family", values_to="Abundance") %>%
    left_join(meta, by="SampleID") %>%
    group_by(Day, Family) %>%
    summarise(MeanAbundance = mean(Abundance), nSamples = n(), .groups="drop")
  
  return(df_long_day)
})

df_long <- bind_rows(filtered_list)

mat_day_mean <- df_long %>%
  select(Day, Family, MeanAbundance) %>%
  pivot_wider(names_from = Day, values_from = MeanAbundance, values_fill = 0) %>%
  as.data.frame()

rownames(mat_day_mean) <- mat_day_mean$Family
mat_day_mean <- mat_day_mean[, c("Day03", "Day06", "Day09", "Day12", "Day15", "Day19")]

my_palette <- colorRampPalette(c( "lightyellow", "red"))

# Breaks: smooth transition from 0 to maximum abundance
max_val <- max(mat_day_mean, na.rm = TRUE)
breaks <- seq(0, max_val, length.out = 100)

my_colors <- my_palette(length(breaks) - 1)
p <- pheatmap(
  mat_day_mean,
  cluster_rows = TRUE,
  cluster_cols = FALSE,
  color = my_colors,
  breaks = breaks,
  main = " ",
  border_color = NA,
  cellwidth = 30,
  cellheight = 10,
  fontsize_row = 8,
  fontsize_col = 10,
  legend = TRUE
)

pdf("Figure4B.pdf", width = 8, height = 6)
print(p)
dev.off()

## quartz_off_screen 
##                 2

#Figure 4C

rm(list=ls()) 
pseq1 <- readRDS("Supplementary File 1.rds") 
pseq <- subset_samples(pseq1, Category == "Worm")  
richness_data <- plot_richness(pseq, x = "Replicate", measures = c("Shannon"))$data  
averaged_data <- richness_data %>%
  group_by(Day, Category) %>%
  dplyr::summarize(
    avg_value = mean(value, na.rm = TRUE),
    sd_value = sd(value, na.rm = TRUE))

## `summarise()` has grouped output by 'Day'. You can override using the `.groups`
## argument.

p <- ggplot(averaged_data, aes(x = Day, y = avg_value, group = Category, linetype = Category)) +
  geom_point(size = 2, color = "red") +  #Plot points
  geom_line(size = 1.5, color = "red") +  #Plot lines instead of smooth
  geom_errorbar(aes(ymin = avg_value - sd_value, ymax = avg_value + sd_value), 
                width = 0.2, size = 0.6, color = "red", show.legend = FALSE) +  #Error bars for SD
  scale_linetype_manual(values = c("solid", "dashed")) +  #Custom line styles
  theme_classic2() + theme(
    axis.text.y = element_text(size = 14),
    strip.text = element_text(size = 14, face = "bold")
  )

#Print the plot
print(p)

ggsave("Figure4C.pdf", plot = p, device = "pdf", width = 6, height = 4, dpi = 300)

#Figure 4D

rm(list = ls())
pseq2 <- readRDS("PhyloSeqWithTree.rds")
days <- c("Day03", "Day06", "Day09", "Day12", "Day15", "Day19")

# Subset once for relevant CME types
pseq <- subset_samples(pseq2, CME %in% c("ACME", "PCME", "BCME"))

pseq1 <- prune_samples(sample_sums(pseq) >= 200, pseq)

# Initialize result storage
all_distances_list <- list()

for (current_day in days) {
  pseq_current <- subset_samples(pseq1, Day == current_day)
  pseq_current <- prune_samples(sample_sums(pseq_current) > 1, pseq_current)
  
  pseq_rel <- transform_sample_counts(pseq_current, function(x) x / sum(x))
  bc_dist <- phyloseq::distance(pseq_rel, method = "unifrac")
  
  sample_df <- as.data.frame(sample_data(pseq_rel))
  sample_df$SampleID <- rownames(sample_df)
  
  group1 <- sample_df$SampleID[sample_df$Category == "Worm"]
  dist_mat <- as.matrix(bc_dist)[group1, group1]
  dist_values <- dist_mat[lower.tri(dist_mat)]
  dist_values <- dist_values[!is.na(dist_values)]
 
  if (length(dist_values) > 0) {
    # store all distances (not the mean)
    temp_df <- data.frame(
      Day = current_day,
      Category = "Worm",
      Distance = dist_values
    )
    all_distances_list[[length(all_distances_list) + 1]] <- temp_df
  }
}

# combine into one data frame
all_distances <- do.call(rbind, all_distances_list)

library(dplyr)

summary_data <- all_distances %>%
  group_by(Day, Category) %>%
  summarise(
    MeanDistance = mean(Distance),
    SD = sd(Distance),
    .groups = "drop"
  )

plot_data <- summary_data
plot_data$Day <- factor(plot_data$Day, levels = days)

custom_colors <- c("Worm" = "#e31a1c")

p_line <- ggplot(plot_data, aes(x = Day, y = MeanDistance, 
                                color = Category, group = Category)) +
  geom_point(size = 3) +
  geom_line(size = 1) +
  geom_errorbar(aes(ymin = MeanDistance - SD, ymax = MeanDistance + SD),
                width = 0.2, size = 0.8) +
  scale_color_manual(values = custom_colors) +
  theme_classic(base_size = 14) +
  labs(x = "Day", y = "Mean unifrac Dist ± SD", color = "Category") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        plot.title = element_text(hjust = 0.5))

print(p_line)

ggsave("Figure4D.pdf", plot = p_line, device = "pdf",
       width = 6, height = 4, dpi = 300)

#Figure 4E

rm(list=ls())
pseq <- readRDS("PhyloSeqWithTree.rds")

# Define days and CME groups
days     <- c("Day03","Day06","Day09","Day12","Day15","Day19")
cme_list <- c("ACME","BCME","PCME")

# Filter samples by minimum depth
pseq <- prune_samples(sample_sums(pseq) >= 200, pseq)

# Weighted UniFrac function
get_wUF <- function(ps) phyloseq::distance(ps, method="unifrac", weighted=TRUE)

all_distances <- list()

set.seed(1234)
for (cme in cme_list) {
  for (day in days) {
    
    ps_sub <- subset_samples(pseq, CME == cme & Day == day)
    if (nsamples(ps_sub) < 2) next
    
    meta <- as.data.frame(sample_data(ps_sub))
    worms <- meta$SampleID[meta$Category == "Worm"]
    graz  <- meta$SampleID[meta$Category == "Grazed_Lawn"]
    if (length(worms) == 0 | length(graz) == 0) next
    
    # Weighted UniFrac
    dist_mat <- as.matrix(get_wUF(ps_sub))
    
    pw_vals <- unlist(lapply(worms, function(w) dist_mat[w, graz]))
    
    all_distances[[paste(cme,day,sep="_")]] <- data.frame(
      Day  = day,
      Distance = pw_vals
    )
  }
}

# Combine distances
df <- bind_rows(all_distances)
write.csv(df, "DistanceUnifrac.csv")


# Order days
df$Day <- factor(df$Day, levels = days)

# Summarize across CME groups
df_summary <- df %>%
  group_by(Day) %>%
  summarise(mean = mean(Distance),
            sd   = sd(Distance),
            .groups = "drop")

print(df_summary)

## # A tibble: 6 × 3
##   Day    mean     sd
##   <fct> <dbl>  <dbl>
## 1 Day03 0.197 0.152 
## 2 Day06 0.125 0.0404
## 3 Day09 0.219 0.111 
## 4 Day12 0.256 0.106 
## 5 Day15 0.205 0.0774
## 6 Day19 0.211 0.0379

# Plot ONE line
p <- ggplot(df_summary, aes(x = Day, y = mean, group = 1)) +
  geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd), width = 0.2, size = 0.8) +
  geom_line(size = 1.5, color = "black") +
  geom_point(size = 3, color = "black") +
  theme_classic(base_size = 14) +
  labs(x = "Day",
       y = "Weighted UniFrac Distance (Worm vs Grazed) ± SD") +
  theme(legend.position = "none")

print(p)

ggsave("Figure4E.pdf",
       plot = p, device = "pdf", width = 6, height = 4, dpi = 300)

#Supplementary Figure 2A

y4$Family[y4$Abundance < 0.02] <- "Family < 2% abund." #rename genera with < 1% abundance
my_colors <- c("Family < 2% abund." = "grey","Pseudomonadaceae" =  "darkred","Arcobacteraceae" = "red","Enterobacteriaceae" = "#0000CD","Shewanellaceae" = "#6B8E23","Moraxellaceae" = "#00CED1","Aeromonadaceae" = "#7FFFD4","Acetobacteraceae" = "#20B2AA","Tannerellaceae" = "#FFA07A","Comamonadaceae" = "#556B2F","Rhizobiaceae" = "#CD5C5C","Flavobacteriaceae"  = "#FFE4B5","Clostridiaceae" = "darkgoldenrod1","Xanthomonadaceae" = "#20B2AA",  "Bacteroidaceae" = "#FF6347", "Sphingobacteriaceae" = "#00FFFF","Peptostreptococcaceae" = "#2E8B57", "Sphingomonadaceae" = "#00FFFF", "Weeksellaceae" = "#4B0082",  "Enterococcaceae" = "#1E90FF","Microbacteriaceae" = "#D1A33D","Erwiniaceae" = "#32CD32")
y4 <- y4 %>% 
  mutate(Family = fct_reorder(Family, Abundance, .fun = sum, .desc = TRUE)) %>% 
  mutate(Family = fct_relevel(Family, "Family < 2% abund.", after = Inf)) 

p <- ggplot(y4, aes(x = Replicate, y = Abundance, fill = Family)) +
  geom_bar(stat = "identity", position = "fill") + 
  theme_classic() + 
  scale_fill_manual(values = my_colors) +
  theme(
    text = element_text(size = 12),
    axis.text.y = element_text(hjust = 1),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
    panel.grid.major = element_blank(),
    strip.text.x = element_text(size = 12),
    legend.text = element_text(size = 13),
    axis.text = element_text(face = "bold")
  ) + 
  scale_x_discrete(drop = TRUE) + 
  guides(fill = guide_legend(keywidth = 0.4, keyheight = 0.7, ncol = 1)) + 
  ylab("Distribution of ASVs at Family level") + 
  facet_grid(cols = vars(Day), rows = vars(CME)) + 
  guides(fill = guide_legend(ncol = 1))  
print(p)

# Save the plot as a high-quality .pdf
ggsave("Supplementary_Figure2A.pdf", plot = p, device = "pdf", width = 8, height = 6, dpi = 300)

#Supplementary Figure 2B

## $Day00
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_matrix ~ CME, data = sample_data_df)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     2  1.55875 0.86183 18.712  0.004 **
## Residual  6  0.24991 0.13817                 
## Total     8  1.80866 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $`Day-02`
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_matrix ~ CME, data = sample_data_df)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     2  1.15631 0.92848 38.947  0.004 **
## Residual  6  0.08907 0.07152                 
## Total     8  1.24538 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Supplementary Figure 4 #Supplementary Figure 3 left panel

rm(list=ls())
pseq <- readRDS("Supplementary File 1.rds")
pseq1 <- subset_samples(pseq, Day %in% c( "Day03", "Day06", "Day09","Day12",  "Day15", "Day19", "Day22"))
pseq <- subset_samples(pseq1, Category == "Control_Lawn")
pseq <- prune_taxa(taxa_sums(pseq) > 200, pseq)
set.seed(123)
pseq_rel <- transform(pseq, "compositional")  # each sample sums to 1

cme_colors <- c("greenyellow", "chartreuse2", "chartreuse3",  "springgreen3", "yellowgreen",
                "olivedrab", "darkolivegreen", "cyan", "deepskyblue","dodgerblue3", 
                "royalblue3", "blue", "mediumblue","darkblue", "bisque", "#f1a1a1", "lightcoral",
                "tomato","brown3", "firebrick3", "darkred","#660000")  
shape_values <- c(1)  # Category shapes

set.seed(123)
ord_pcoa_rel <- ordinate(pseq_rel, method = "PCoA", distance = "bray", weighted = TRUE)

# Plot PCoA
pcoa_plot_rel <- plot_ordination(pseq_rel, ord_pcoa_rel, color = "CMEDays", shape = "Category") +
  geom_point(size = 4) +
  theme_classic2() +
  scale_color_manual(values = cme_colors) +
  scale_shape_manual(values = c(16)) +
  labs(title = "PCoA Control Lawn")

print(pcoa_plot_rel)

ggsave("Control_Lawn_AllCME.pdf", plot = pcoa_plot_rel, device = "pdf", width = 7, height = 4, dpi = 300)

#Supplementary Figure 3 right panel

rm(list=ls())
pseq <- readRDS("Supplementary File 1.rds")
pseq1 <- subset_samples(pseq, Day %in% c( "Day03", "Day06", "Day09","Day12",  "Day15", "Day19", "Day22"))
pseq <- subset_samples(pseq1, Category == "Grazed_Lawn")
pseq <- prune_taxa(taxa_sums(pseq) > 200, pseq)
set.seed(123)
pseq_rel <- transform(pseq, "compositional")  # each sample sums to 1

cme_colors <- c("greenyellow", "chartreuse2", "chartreuse3",  "springgreen3", "yellowgreen",
                "olivedrab", "darkolivegreen", "cyan", "deepskyblue","dodgerblue3", 
                "royalblue3", "blue", "mediumblue","darkblue", "bisque", "#f1a1a1", "lightcoral",
                "tomato","brown3", "firebrick3", "darkred","#660000")  
shape_values <- c(1)  # Category shapes

set.seed(123)
ord_pcoa_rel <- ordinate(pseq_rel, method = "PCoA", distance = "bray", weighted = TRUE)

# Plot PCoA
pcoa_plot_rel <- plot_ordination(pseq_rel, ord_pcoa_rel, color = "CMEDays", shape = "Category") +
  geom_point(size = 4) +
  theme_classic2() +
  scale_color_manual(values = cme_colors) +
  scale_shape_manual(values = c(16)) +
  labs(title = "PCoA Grazed Lawn")

print(pcoa_plot_rel)

ggsave("Grazed_Lawn_AllCME.pdf", plot = pcoa_plot_rel, device = "pdf", width = 7, height = 4, dpi = 300)

Caenorhabditis elegans populations shape their microbial environment

Rahul Bodkhe PhD

2025-11-15