#Figure 5A

rm(list=ls())
pseq0 <- readRDS("../input files/Phyloseq_Object_with_Tax.rds")  
meta_table <- read.csv("../input files/Metadata.csv", row.names=1, check.names=FALSE)  
sample_data(pseq0) <- meta_table  
pseq <- subset_samples(pseq0, 
                       Age != "Week3" & 
                       !Samples %in% c("DISP2-1", "AZPA2-2") & 
                       !Category %in% c("AZ2", "AZP2", "DIS2", "DISP2"))
div.all <- estimate_richness(pseq)
## Warning in estimate_richness(pseq): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
alpha.ps1 <- plot_richness(pseq, x="Category3", measures=c( "Observed","Shannon")) + geom_boxplot(aes(fill=Category3)) 
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
write.csv(div.all, "alphadiv.csv")
p1 <- alpha.ps1 + 
  geom_boxplot(alpha = 0.3) +  # Set transparency and outlier shape
  theme_classic() + 
  ggtitle("Bacterial Diversity in the gut of post-treatment groups") + 
  theme(plot.title = element_text(size = 10)) + 
  scale_fill_manual(values = c("darkgreen","red","#800000",  "purple", "blue")) + 
  theme(axis.title.x = element_blank(), 
        axis.text.x = element_blank(), 
        axis.ticks.x = element_blank(), 
        legend.position = "right")

print(p1)

ggsave("Figure5A.tiff", height = 6, width = 9)

#Figure 5B

pseq.rel.2 <- transform(pseq, "compositional")
ord_mds_bray1 <- ordinate(pseq.rel.2, method = "PCoA", distance = "bray")
beta.ps1 <- plot_ordination(pseq.rel.2, 
                            ord_mds_bray1, 
                            color="Category3") 
p9 <- beta.ps1 + ggtitle("PCoA based on bray distance") + 
  theme_classic() + 
  theme(
    plot.title = element_text(size = 12, face = "bold"),
    axis.title = element_text(size = 14),  # Increase axis title size
    axis.text = element_text(size = 12),   # Increase tick label size
    panel.border = element_rect(color = "black", fill = NA, size = 1)) + 
scale_color_manual(values = c("darkgreen","red","#800000",  "purple", "blue")) + 
  geom_point(size = 5)
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p9 +  stat_ellipse(linetype = "dashed", linewidth = 0.5) 

ggsave("Figure5B.tiff", height = 5, width = 7)
dist_bray <- phyloseq::distance(pseq.rel.2, method = "bray")
metadata <- as(sample_data(pseq.rel.2), "data.frame")
adonis2_res <- adonis2(dist_bray ~ Category3, data = metadata)
print(adonis2_res)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist_bray ~ Category3, data = metadata)
##          Df SumOfSqs      R2     F Pr(>F)    
## Model     4  1.02163 0.52343 5.217  0.001 ***
## Residual 19  0.93017 0.47657                 
## Total    23  1.95180 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Figure 5C

## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:Biostrings':
## 
##     collapse, intersect, setdiff, setequal, union
## The following object is masked from 'package:GenomeInfoDb':
## 
##     intersect
## The following object is masked from 'package:XVector':
## 
##     slice
## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## [1] "1.NC2"    "2.HP2"    "3.AB2"    "7.DIS2"   "8.DISP2"  "9.DISPA2"

#Figure 5D

rm(list=ls())  
pseq <- readRDS("../input files/Phyloseq_Object_with_Tax.rds")  
meta_table <- read.csv("../input files/Metadata.csv", row.names=1, check.names=FALSE)  
sample_data(pseq) <- meta_table  

pseq <- subset_samples(pseq, 
                       Age != "Week3" & 
                       Samples != "DISP2-1" & 
                       !Category %in% c("DIS2", "DISP2", "DISPA2"))
pseq.rel.2 <- transform(pseq, "compositional")
sample_data(pseq.rel.2)$Category3 <- factor(sample_data(pseq.rel.2)$Category3)

# Check the levels of Category3 after conversion
levels(sample_data(pseq.rel.2)$Category3)
## [1] "1.NC2"   "2.HP2"   "3.AB2"   "4.AZ2"   "5.AZP2"  "6.AZPA2"
selected_samples <- pseq.rel.2 %>%
  subset_samples(Category3 %in% c("1.NC2", "2.HP2", "3.AB2", "4.AZ2", "5.AZP2" , "6.AZPA2"))

# Compute Bray-Curtis distance once for all selected samples
dist_bray <- distance(selected_samples, method = "bray")

# Convert the distance object to a matrix and then to a data frame
dist_bray_df <- as.data.frame(as.matrix(dist_bray))
dist_bray_df$Sample1 <- rownames(dist_bray_df)

# Reshape the distance matrix into long format
dist_long <- dist_bray_df %>%
  pivot_longer(cols = -Sample1, names_to = "Sample2", values_to = "Distance")
metadata <- data.frame(SampleID = sample_names(selected_samples),
                       Category3 = sample_data(selected_samples)$Category3)

dist_long <- dist_long %>%
  left_join(metadata, by = c("Sample1" = "SampleID")) %>%
  rename(Group1 = Category3) %>%
  left_join(metadata, by = c("Sample2" = "SampleID")) %>%
  rename(Group2 = Category3)

# Filter to keep only specified group comparisons
group_combinations <- data.frame(
  Group1 = c("1.NC2", "1.NC2", "1.NC2", "1.NC2",   "3.AB2", "3.AB2", "3.AB2"),
  Group2 = c( "3.AB2", "4.AZ2", "5.AZP2" , "6.AZPA2", "4.AZ2", "5.AZP2" , "6.AZPA2"))

#Keep only distances for the specified pairs
distance_data <- dist_long %>%
  semi_join(group_combinations, by = c("Group1", "Group2"))
# Plot Bray-Curtis distances
ggplot(distance_data, aes(x = interaction(Group1, Group2), y = Distance, fill = interaction(Group1, Group2))) +
  geom_boxplot() +
  labs(x = "Group Comparisons", y = "Bray-Curtis Distance", title = "Bray-Curtis Distance Between Group Comparisons") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_fill_manual(values = c("#1F77B4", "#FF7F0E", "#2CA02C", "#D62728", "#9467BD", "#8C564B", "#E377C2", "#7F7F7F"))

#Figure 5E

pseq <- prune_taxa(taxa_sums(pseq) > 10, pseq)
taxic <- as.data.frame(pseq@tax_table) 
taxic$OTU <- row.names(taxic)
taxmat <- as.matrix(taxic)
new.tax <- tax_table(taxmat)  
tax_table(pseq) <- new.tax 
pseq.phy <- aggregate_taxa(pseq, "Genus")
pseq.phy.rel<- transform(pseq.phy, "compositional")
ps.m <- psmelt(pseq.phy.rel)
desired_order <- c("1.NC", "2.HP", "3.AB", "6.AZPA", "9.DISPA") 
my_comparisons<- list( c("2.HP", "6.AZPA"), c("2.HP", "9.DISPA"), c("3.AB", "2.HP"), c("3.AB", "6.AZPA"), c("3.AB", "9.DISPA"),  c("1.NC", "2.HP"), c("1.NC", "6.AZPA"), c("1.NC", "9.DISPA"))

# List of bacteria
bacteria_list <- c("Akkermansia", "Bifidobacterium", "Lactobacillus", "Dubosiella", "Turicibacter", "Bacteroides", "Faecalibaculum", "Ruminococcus", "A2", "Lachnoclostridium", "Jeotgalicoccus")
ps.m$Groups <- factor(ps.m$Groups, levels = desired_order)
for (bacterium in bacteria_list) {
  
  # Subset data for the current bacterium
  ps.m2 <- subset(ps.m, Genus == bacterium)
  
  # Create the bar plot
  p <- ggplot(ps.m2, aes(x = Groups, y = Abundance, fill = Groups)) +
    geom_bar(stat = "summary", fun = "mean", alpha = 0.7) +  # Bar plot (mean abundance)
    geom_point(color = "black", position = position_jitter(width = 0.1, height = 0), 
               size = 2, shape = 1) + theme_classic() +
    scale_fill_manual(values = c("darkgreen","red","#800000",  "purple", "blue")) + stat_compare_means(comparisons = my_comparisons, method = "wilcox.test",  p.adjust.method = "BH", label = "p.signif")+ 
    facet_wrap(~Genus, scales = "free_y") +  # Wilcoxon test
    theme(strip.text = element_text(size = 10, face = "bold"),
          axis.title.x = element_blank(),
          axis.text.x = element_blank(),
          axis.title.y = element_text(size = 12, face = "bold"),
          axis.text.y = element_text(size = 12, face = "bold"),
          legend.position = "none")
  print(p)
  ggsave(paste0("AZ_Week4_withoutP value_", bacterium, ".tiff"), plot = p, height = 3, width = 3)
}
## Warning: Computation failed in `stat_signif()`.
## Computation failed in `stat_signif()`.
## Caused by error in `wilcox.test.default()`:
## ! not enough 'y' observations

## Warning: Computation failed in `stat_signif()`.
## Caused by error in `wilcox.test.default()`:
## ! not enough 'y' observations
## Warning: Computation failed in `stat_signif()`.
## Caused by error in `wilcox.test.default()`:
## ! not enough 'y' observations

## Warning: Computation failed in `stat_signif()`.
## Caused by error in `wilcox.test.default()`:
## ! not enough 'y' observations
## Warning: Computation failed in `stat_signif()`.
## Caused by error in `wilcox.test.default()`:
## ! not enough 'y' observations

## Warning: Computation failed in `stat_signif()`.
## Caused by error in `wilcox.test.default()`:
## ! not enough 'y' observations
## Warning: Computation failed in `stat_signif()`.
## Caused by error in `wilcox.test.default()`:
## ! not enough 'y' observations

## Warning: Computation failed in `stat_signif()`.
## Caused by error in `wilcox.test.default()`:
## ! not enough 'y' observations
## Warning: Computation failed in `stat_signif()`.
## Caused by error in `wilcox.test.default()`:
## ! not enough 'y' observations

## Warning: Computation failed in `stat_signif()`.
## Caused by error in `wilcox.test.default()`:
## ! not enough 'y' observations
## Warning: Computation failed in `stat_signif()`.
## Caused by error in `wilcox.test.default()`:
## ! not enough 'y' observations

## Warning in wilcox.test.default(c(0.00889832786708963, 0.00201972894652109, :
## cannot compute exact p-value with ties
## Warning: Computation failed in `stat_signif()`.
## Caused by error in `wilcox.test.default()`:
## ! not enough 'y' observations
## Warning in wilcox.test.default(c(0.00889832786708963, 0.00201972894652109, :
## cannot compute exact p-value with ties
## Warning: Computation failed in `stat_signif()`.
## Caused by error in `wilcox.test.default()`:
## ! not enough 'y' observations

## Warning: Computation failed in `stat_signif()`.
## Caused by error in `wilcox.test.default()`:
## ! not enough 'y' observations
## Warning: Computation failed in `stat_signif()`.
## Caused by error in `wilcox.test.default()`:
## ! not enough 'y' observations

## Warning in wilcox.test.default(c(0.00611772970757837, 0.0055817947617003, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.00611772970757837, 0.0055817947617003, :
## cannot compute exact p-value with ties
## Warning: Computation failed in `stat_signif()`.
## Caused by error in `wilcox.test.default()`:
## ! not enough 'y' observations
## Warning in wilcox.test.default(c(0.00611772970757837, 0.0055817947617003, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.00611772970757837, 0.0055817947617003, :
## cannot compute exact p-value with ties
## Warning: Computation failed in `stat_signif()`.
## Caused by error in `wilcox.test.default()`:
## ! not enough 'y' observations

## Warning: Computation failed in `stat_signif()`.
## Caused by error in `wilcox.test.default()`:
## ! not enough 'y' observations
## Warning: Computation failed in `stat_signif()`.
## Caused by error in `wilcox.test.default()`:
## ! not enough 'y' observations

## Warning in wilcox.test.default(c(0.0234313672778106, 0.00280950910774931, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.0234313672778106, 0.00280950910774931, :
## cannot compute exact p-value with ties
## Warning: Computation failed in `stat_signif()`.
## Caused by error in `wilcox.test.default()`:
## ! not enough 'y' observations
## Warning in wilcox.test.default(c(0.0234313672778106, 0.00280950910774931, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(c(0.0234313672778106, 0.00280950910774931, :
## cannot compute exact p-value with ties
## Warning: Computation failed in `stat_signif()`.
## Caused by error in `wilcox.test.default()`:
## ! not enough 'y' observations

#Supplementary Figures ##Stack Plots Genus Level

rm(list=ls())
pseq0<- readRDS("../input files/Phyloseq_Object_with_Tax.rds")
meta_table<-read.csv("../input files/Metadata.csv",row.names=1,check.names=FALSE)
sample_data(pseq0) <- meta_table
pseq <- subset_samples(pseq0, 
                       Age != "Week3" & 
                       Samples != "DISP2-1" & 
                       !Category %in% c("AZ2", "AZP2", "DIS2", "DISP2")) %>%
       prune_taxa(taxa_sums(.) > 1, .)
y1 <- tax_glom(pseq, taxrank = 'Genus') # agglomerate taxa
y3 <- transform_sample_counts(y1, function(x) x/sum(x)) #get abundance in %
y4 <- psmelt(y3) 
y4$Genus <- as.character(y4$Genus) #convert to character
y4$Genus[y4$Abundance < 0.02] <- "Genera < 2% abund." #rename genera with < 1% abundance
library(fantaxtic)
my_colors <- c("#B0C4DE","red","#66CDAA","#8B0000","#556B2F","#FFE4B5","black","#800080","#DC143C","#00FA9A","#0000CD","darkgoldenrod1","#D2691E","#2E8B57","#5F7FC7","#BA55D3","#D1A33D","#800000","#00CED1","#40E0D0","#CCCCFF","#2E8B57","#666600","darkred","#8A2BE2","#4B0082","#CD5C5C","darkorange1","green","#FF00FF","#4B0082","#B0E0E6","#8FBC8F","#7FFFD4","darkblue","#ADFF2F","#FFB6C1","#6A5ACD","#00008B","#B22222","#FF6347","#4682B4","#4169E1","#D8BFD8","#DA70D6","#7B68EE","#8A2BE2")

p <- ggplot(y4, aes(x = Replicates, y = Abundance, fill = fct_reorder(Genus, Abundance))) + 
  geom_bar(stat = "identity", position = "fill") +  # Stacked bar to equal 1
  theme_classic() +
  scale_fill_manual(values = my_colors) + 
  theme(
    text = element_text(size = 8),  # General text size
    axis.text.y = element_text(hjust = 1),  # Y-axis text alignment
    axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, face = "bold"),  # X-axis text with angle and bold
    axis.line.x = element_line(size = 1.5),  # Make x-axis line thicker
    strip.text.x = element_text(size = 4, face = "bold"),  # Larger facet labels
    panel.grid.major = element_blank(),  # Remove major grid lines
    legend.position = "right",  # Move legend to bottom
    legend.key.width = unit(0.5, "cm"),  # Adjust legend key width
    legend.key.height = unit(0.5, "cm"),  # Adjust legend key height
    legend.title = element_text(size = 4),  # Legend title size
    legend.text = element_text(size = 4)  # Legend text size
  ) + 
  scale_x_discrete(drop = TRUE) + 
  ylab("Distribution of ASVs at Genus level") + 
  guides(fill = guide_legend(keywidth = 0.1, keyheight = 1)) +  # Custom legend appearance
  facet_grid(cols = vars(Category3)) +  # Facet by Category3
  guides(fill = guide_legend(ncol = 1))  # Make legend into one column
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(p)

##Stack Plots Family Level

y1 <- tax_glom(pseq, taxrank = 'Family') # agglomerate taxa
y3 <- transform_sample_counts(y1, function(x) x/sum(x)) #get abundance in %
y4 <- psmelt(y3) 
y4$Family <- as.character(y4$Family) #convert to character
y4$Family[y4$Abundance < 0.02] <- "Genera < 2% abund." #rename genera with < 1% abundance
p <- ggplot(y4, aes(x = Replicates, y = Abundance, fill = fct_reorder(Family, Abundance))) + 
  geom_bar(stat = "identity", position = "fill") +  # Stacked bar to equal 1
  theme_classic() +
 scale_fill_manual(values = my_colors) + 
  theme(
    text = element_text(size = 8),  # General text size
    axis.text.y = element_text(hjust = 1),  # Y-axis text alignment
    axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, face = "bold"),  # X-axis text with angle and bold
    axis.line.x = element_line(size = 1.5),  # Make x-axis line thicker
    strip.text.x = element_text(size = 8, face = "bold"),  # Larger facet labels
    panel.grid.major = element_blank(),  # Remove major grid lines
    legend.position = "right",  # Move legend to bottom
    legend.key.width = unit(0.5, "cm"),  # Adjust legend key width
    legend.key.height = unit(0.5, "cm"),  # Adjust legend key height
    legend.title = element_text(size = 12),  # Legend title size
    legend.text = element_text(size = 12)  # Legend text size
  ) + 
  scale_x_discrete(drop = TRUE) + 
  ylab("Distribution of ASVs at Phylum level") + 
  guides(fill = guide_legend(keywidth = 0.1, keyheight = 1)) +  
  facet_grid(cols = vars(Category3)) +  # Facet by Category3
  guides(fill = guide_legend(ncol = 1)) 

print(p)