Core Configuration & Libraries

library(dada2)
library(phyloseq)
library(DECIPHER)
library(phangorn)
library(biomformat)
library(ggplot2)
library(vegan)
library(ape)
library(writexl)
library(DESeq2)
library(dplyr)
library(DT)
library(microbiome)
library(Biostrings)
library(ggpubr)
library(indicspecies)
library(betapart)
library(igraph)
library(Hmisc)
library(reshape2)
library(ggrepel)

# ---- CORE CONFIGURATION ----
path <- "C:/Users/Owner/Documents/BSP_2022/BSP_2025/Metabarcoding_training"
cutadapt_path <- "C:/Users/Owner/Downloads/cutadapt" # Verify this matches your system installation
system2(cutadapt_path, args = "--version") # Run shell commands from R

# Standard Universal Primers (To be tested via Cutadapt)
fwd_16S <- "CCTACGGGNGGCWGCAG"        # V3 Forward
rev_16S <- "GACTACHVGGGTATCTAATCC"   # V4 Reverse
fwd_ITS <- "GCATCGATGAAGAACGCAGC"    # ITS3 Forward
rev_ITS <- "TCCTCCGCTTATTGATATGC"    # ITS4 Reverse

PHASE 1: BACTERIAL 16S PIPELINE

cat("\n\n======================================================\n")
cat("STARTING 16S PIPELINE - WATCH CUTADAPT RETENTION RATES\n")
cat("======================================================\n\n")

fnFs_16S <- sort(list.files(path, pattern = "__1_1\\.fastq\\.gz$", full.names = TRUE))
fnRs_16S <- sort(list.files(path, pattern = "__1_2\\.fastq\\.gz$", full.names = TRUE))
sample.names_16S <- sapply(strsplit(basename(fnFs_16S), "__1"), `[`, 1)

path_cut_16S <- file.path(path, "cutadapt_16S")
dir.create(path_cut_16S, showWarnings = FALSE)
fnFs_cut_16S <- file.path(path_cut_16S, paste0(sample.names_16S, "_F_cut.fastq.gz"))
fnRs_cut_16S <- file.path(path_cut_16S, paste0(sample.names_16S, "_R_cut.fastq.gz"))

# Cutadapt Trimming
for(i in seq_along(fnFs_16S)) {
  system2(cutadapt_path, args = c("-g", fwd_16S, "-G", rev_16S,
                                  "-o", shQuote(fnFs_cut_16S[i]), "-p", shQuote(fnRs_cut_16S[i]),
                                  shQuote(fnFs_16S[i]), shQuote(fnRs_16S[i]),
                                  "--discard-untrimmed", "--minimum-length=100"))
}

# DADA2 Filtering
filtFs_16S <- file.path(path, "filtered_16S", paste0(sample.names_16S, "_F_filt.fastq.gz"))
filtRs_16S <- file.path(path, "filtered_16S", paste0(sample.names_16S, "_R_filt.fastq.gz"))
names(filtFs_16S) <- sample.names_16S; names(filtRs_16S) <- sample.names_16S

out_16S <- filterAndTrim(fnFs_cut_16S, filtFs_16S, fnRs_cut_16S, filtRs_16S, 
                         truncLen=c(250, 200), maxN=0, maxEE=c(2,2), truncQ=2, 
                         rm.phix=TRUE, compress=TRUE, multithread=TRUE)
# 16S Quality Control Plots
print(plotQualityProfile(fnFs_16S[1:2]) + ggtitle("RAW Forward Reads (16S)"))

print(plotQualityProfile(filtFs_16S[1:2]) + ggtitle("FILTERED Forward Reads (16S)"))

# Error Learning & Core Algorithm
errF_16S <- learnErrors(filtFs_16S, multithread=TRUE)
errR_16S <- learnErrors(filtRs_16S, multithread=TRUE)
mergers_16S <- mergePairs(dada(filtFs_16S, err=errF_16S, multithread=TRUE), filtFs_16S, 
                          dada(filtRs_16S, err=errR_16S, multithread=TRUE), filtRs_16S, verbose=TRUE)
seqtab_nochim_16S <- removeBimeraDenovo(rowsum(makeSequenceTable(mergers_16S), group = sample.names_16S), method="consensus", multithread=TRUE)

# Taxonomy (SILVA)
seqtab_final_16S <- seqtab_nochim_16S[, nchar(getSequences(seqtab_nochim_16S)) >= 350]
taxa_16S <- assignTaxonomy(seqtab_final_16S, file.path(path, "silva_nr99_v138.2_toGenus_trainset.fa.gz"), multithread=TRUE)
taxa_16S <- addSpecies(taxa_16S, file.path(path, "silva_v138.2_assignSpecies.fa.gz"))

PHASE 2: FUNGAL / YEAST ITS PIPELINE

cat("\n\n======================================================\n")
cat("STARTING ITS PIPELINE - WATCH CUTADAPT RETENTION RATES\n")
cat("======================================================\n\n")

fnFs_ITS <- sort(list.files(path, pattern = "__2_1\\.fastq\\.gz$", full.names = TRUE))
fnRs_ITS <- sort(list.files(path, pattern = "__2_2\\.fastq\\.gz$", full.names = TRUE))
sample.names_ITS <- sapply(strsplit(basename(fnFs_ITS), "__2"), `[`, 1)

path_cut_ITS <- file.path(path, "cutadapt_ITS")
dir.create(path_cut_ITS, showWarnings = FALSE)
fnFs_cut_ITS <- file.path(path_cut_ITS, paste0(sample.names_ITS, "_F_cut.fastq.gz"))
fnRs_cut_ITS <- file.path(path_cut_ITS, paste0(sample.names_ITS, "_R_cut.fastq.gz"))

# Cutadapt Trimming
for(i in seq_along(fnFs_ITS)) {
  system2(cutadapt_path, args = c("-g", fwd_ITS, "-G", rev_ITS,
                                  "-o", shQuote(fnFs_cut_ITS[i]), "-p", shQuote(fnRs_cut_ITS[i]),
                                  shQuote(fnFs_ITS[i]), shQuote(fnRs_ITS[i]),
                                  "--discard-untrimmed", "--minimum-length=50"))
}

# DADA2 Filtering
filtFs_ITS <- file.path(path, "filtered_ITS", paste0(sample.names_ITS, "_F_filt.fastq.gz"))
filtRs_ITS <- file.path(path, "filtered_ITS", paste0(sample.names_ITS, "_R_filt.fastq.gz"))
names(filtFs_ITS) <- sample.names_ITS; names(filtRs_ITS) <- sample.names_ITS

out_ITS <- filterAndTrim(fnFs_cut_ITS, filtFs_ITS, fnRs_cut_ITS, filtRs_ITS, 
                         maxN=0, maxEE=c(2,2), truncQ=2, minLen=50, 
                         rm.phix=TRUE, compress=TRUE, multithread=TRUE)
# ITS Quality Control Plots
print(plotQualityProfile(fnFs_ITS[1:2]) + ggtitle("RAW Forward Reads (ITS)"))

print(plotQualityProfile(filtFs_ITS[1:2]) + ggtitle("FILTERED Forward Reads (ITS)"))

# Error Learning & Core Algorithm
errF_ITS <- learnErrors(filtFs_ITS, multithread=TRUE)
errR_ITS <- learnErrors(filtRs_ITS, multithread=TRUE)
mergers_ITS <- mergePairs(dada(filtFs_ITS, err=errF_ITS, multithread=TRUE), filtFs_ITS, 
                          dada(filtRs_ITS, err=errR_ITS, multithread=TRUE), filtRs_ITS, verbose=TRUE)
seqtab_final_ITS <- removeBimeraDenovo(rowsum(makeSequenceTable(mergers_ITS), group = sample.names_ITS), method="consensus", multithread=TRUE)

# Taxonomy (UNITE - Single Threaded for Windows Stability)
taxa_ITS <- assignTaxonomy(seqtab_final_ITS, file.path(path, "sh_general_release_dynamic_19.02.2025.fasta"), multithread=FALSE)

PHASE 3: PHYLOGENETIC TREES & PHYLOSEQ ASSEMBLY

cat("\n--- Building Phylogenetic Trees & Phyloseq Objects ---\n")
## 
## --- Building Phylogenetic Trees & Phyloseq Objects ---
metadata <- data.frame(
  SampleID = rownames(seqtab_final_16S),
  TimePoint = factor(ifelse(grepl("^0", rownames(seqtab_final_16S)), "Start", "Finish"), levels=c("Start", "Finish")),
  Treatment = factor(ifelse(grepl("CET", rownames(seqtab_final_16S)), "Controlled", "Spontaneous"))
)
rownames(metadata) <- metadata$SampleID

# 16S Assembly (Includes ML Tree)
generate_ml_tree <- function(seqtab_matrix) {
  seqs <- getSequences(seqtab_matrix); names(seqs) <- seqs
  phang_align <- phyDat(as(AlignSeqs(DNAStringSet(seqs), anchor=NA, verbose=FALSE), "matrix"), type="DNA")
  return(optim.pml(pml(NJ(dist.ml(phang_align)), data=phang_align), model="GTR", optInv=TRUE, optGamma=TRUE, rearrangement="stochastic", control=pml.control(trace=0))$tree)
}

ps_16S <- phyloseq(otu_table(seqtab_final_16S, taxa_are_rows=FALSE), sample_data(metadata), 
                   tax_table(taxa_16S), phy_tree(generate_ml_tree(seqtab_final_16S)))
dna_16S <- Biostrings::DNAStringSet(taxa_names(ps_16S)); names(dna_16S) <- taxa_names(ps_16S)
ps_16S <- merge_phyloseq(ps_16S, dna_16S)
taxa_names(ps_16S) <- paste0("ASV_16S_", seq(ntaxa(ps_16S)))

# Filter Host Contaminants
ps_16S <- subset_taxa(ps_16S, (Order != "Chloroplast") | is.na(Order))
ps_16S <- subset_taxa(ps_16S, (Family != "Mitochondria") | is.na(Family))

# ITS Assembly (No Tree)
ps_ITS <- phyloseq(otu_table(seqtab_final_ITS, taxa_are_rows=FALSE), sample_data(metadata), tax_table(taxa_ITS))
dna_ITS <- Biostrings::DNAStringSet(taxa_names(ps_ITS)); names(dna_ITS) <- taxa_names(ps_ITS)
ps_ITS <- merge_phyloseq(ps_ITS, dna_ITS)
taxa_names(ps_ITS) <- paste0("ASV_ITS_", seq(ntaxa(ps_ITS)))

# Overwrite the broken .rds files with the fixed, sequence-containing objects
saveRDS(ps_16S, file = file.path(path, "Saved_R_Data", "ps_16S_final.rds"))
saveRDS(ps_ITS, file = file.path(path, "Saved_R_Data", "ps_ITS_final.rds"))

cat("\nPhyloseq objects successfully rebuilt with embedded DNA sequences.\n")
## 
## Phyloseq objects successfully rebuilt with embedded DNA sequences.

5. Bacterial Community Analysis (16S)

5.1 16S Setup & Background Exports

# Clean and Transform
ps_clean_16S <- prune_samples(sample_sums(ps_16S) > 1000, ps_16S)
ps_rel_16S <- transform_sample_counts(ps_clean_16S, function(x) x / sum(x))

# Background File Exports
writeXStringSet(refseq(ps_clean_16S), file.path(path, "Bacteria_16S_rep.fasta"), format="fasta")
if (!is.null(phy_tree(ps_clean_16S, errorIfNULL = FALSE))) { 
  write.tree(phy_tree(ps_clean_16S), file.path(path, "Bacteria_16S_phylo.tre")) 
}

krona_16S <- psmelt(ps_clean_16S) %>% 
  filter(Abundance > 0) %>% 
  select(Abundance, Kingdom, Phylum, Class, Order, Family, Genus, Species) %>% 
  mutate(across(everything(), ~tidyr::replace_na(as.character(.), "Unclassified")))
write.table(krona_16S, file.path(path, "Bacteria_16S_Krona_Input.txt"), sep="\t", row.names=FALSE, col.names=FALSE, quote=FALSE)

5.2 16S Alpha Diversity

alpha_df_16S <- estimate_richness(ps_clean_16S, measures=c("Shannon", "Simpson", "Chao1", "Observed"))
write_xlsx(cbind(SampleID = rownames(alpha_df_16S), alpha_df_16S), file.path(path, "Bacteria_16S_Alpha_Diversity.xlsx"))

htmltools::tagList(datatable(alpha_df_16S, options = list(pageLength = 5, scrollX = TRUE), caption = "16S Alpha Diversity Indices"))
plot_richness(ps_clean_16S, x="TimePoint", color="Treatment", measures=c("Shannon", "Simpson")) +
  geom_boxplot(outlier.shape = NA, alpha=0.7) + 
  geom_jitter(width=0.1, alpha=0.6) + 
  theme_classic() + 
  labs(title="Bacteria 16S: Alpha Diversity Significance") +
  stat_compare_means(aes(group = Treatment), label = "p.signif", method = "wilcox.test")

5.3 16S Beta Diversity & PERMANOVA

dist_bray_16S <- phyloseq::distance(ps_clean_16S, method="bray")
write.csv(as.matrix(dist_bray_16S), file.path(path, "Bacteria_16S_BrayCurtis.csv"))

# PERMANOVA
sample_df_16S <- as(sample_data(ps_clean_16S), "data.frame")
permanova_16S <- adonis2(dist_bray_16S ~ Treatment * TimePoint, data = sample_df_16S, permutations = 999)
htmltools::tagList(datatable(as.data.frame(permanova_16S), options = list(dom = 't'), caption = "16S PERMANOVA Results"))
# PCoA Plot
plot_ordination(ps_rel_16S, ordinate(ps_rel_16S, method="PCoA", distance="bray"), color="TimePoint", shape="Treatment") +
  geom_point(size=4) + theme_minimal() + labs(title="Bacteria 16S: PCoA (Bray-Curtis)")

# UPGMA Tree
png(file.path(path, "Bacteria_16S_UPGMA.png"), width=800, height=600, res=100)
plot(hclust(dist_bray_16S, method="average"), main="Bacteria 16S: UPGMA Clustering", xlab="Samples", sub="")
dev.off()
## png 
##   2
plot(hclust(dist_bray_16S, method="average"), main="Bacteria 16S: UPGMA Clustering", xlab="Samples", sub="")

5.4 16S Taxonomic Composition

top40_16S <- names(sort(taxa_sums(ps_clean_16S), decreasing=TRUE))[1:40]

# Barplot (Top 20)
plot_bar(prune_taxa(top40_16S[1:20], ps_clean_16S), x="SampleID", fill="Genus") + 
  facet_wrap(~Treatment, scales="free_x") + theme_bw() + labs(title="Bacteria 16S: Top 20 Genera")

# Heatmap (Top 40)
plot_heatmap(prune_taxa(top40_16S, ps_clean_16S), method="NMDS", distance="bray", 
             sample.order="Treatment", taxa.label="Genus", taxa.order="Phylum",
             low="#000033", high="#FF3300", na.value="white") + labs(title="Bacteria 16S: Taxonomic Heatmap")

5.5 16S Core Microbiome & DESeq2

# Core Microbiome
ps_ctrl_16S <- subset_samples(ps_clean_16S, Treatment == "Controlled")
core_taxa_16S <- core_members(ps_ctrl_16S, detection = 0.001, prevalence = 0.8)

if(length(core_taxa_16S) > 0) {
  cat("\n**16S Core Taxa Detected:**\n")
  htmltools::tagList(datatable(as.data.frame(tax_table(ps_clean_16S)[core_taxa_16S, ]), options = list(pageLength = 5, scrollX = TRUE)))
}
## 
## **16S Core Taxa Detected:**
# DESeq2
ds2_16S <- DESeq(phyloseq_to_deseq2(ps_clean_16S, ~ TimePoint + Treatment), test="Wald", fitType="parametric", sfType="poscounts")
res_16S <- results(ds2_16S, contrast=c("Treatment", "Controlled", "Spontaneous"))
sigtab_16S <- res_16S[which(res_16S$padj < 0.05), ]

# DESeq2 Volcano Plot Visualization (With Taxonomy Annotations)
tax_df_16S <- as.data.frame(tax_table(ps_clean_16S))
volcano_data_16S <- cbind(as.data.frame(res_16S), tax_df_16S[rownames(res_16S), ])

volcano_data_16S <- volcano_data_16S[!is.na(volcano_data_16S$padj), ]

volcano_data_16S$Significance <- "Not Significant"
volcano_data_16S$Significance[volcano_data_16S$log2FoldChange > 1 & volcano_data_16S$padj < 0.05] <- "Significantly Increased (Controlled)"
volcano_data_16S$Significance[volcano_data_16S$log2FoldChange < -1 & volcano_data_16S$padj < 0.05] <- "Significantly Decreased (Spontaneous)"

volcano_data_16S$Label <- NA 
sig_rows_16S <- volcano_data_16S$Significance != "Not Significant"
volcano_data_16S$Label[sig_rows_16S] <- volcano_data_16S$Genus[sig_rows_16S] 

ggplot(volcano_data_16S, aes(x = log2FoldChange, y = -log10(padj), color = Significance)) +
  geom_point(alpha = 0.7, size = 2.5) +
  scale_color_manual(values = c("Not Significant" = "grey80", 
                                "Significantly Increased (Controlled)" = "#2CA02C", 
                                "Significantly Decreased (Spontaneous)" = "#D62728")) +
  geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black") +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black") +
  geom_text_repel(aes(label = Label), size = 3.5, max.overlaps = 20, 
                  box.padding = 0.5, point.padding = 0.3, color = "black", show.legend = FALSE) +
  theme_bw() +
  labs(title = "DESeq2 Volcano Plot: Controlled vs. Spontaneous Fermentation",
       x = "Log2 Fold Change (Biological Magnitude)",
       y = "-Log10 Adjusted p-value (Statistical Confidence)") +
  theme(legend.position = "bottom")

if(nrow(sigtab_16S) > 0) {
  sig_df_16S <- cbind(as(sigtab_16S, "data.frame"), as(tax_table(ps_clean_16S)[rownames(sigtab_16S), ], "matrix"))
  write_xlsx(sig_df_16S, file.path(path, "Bacteria_16S_DESeq2.xlsx"))
  cat("\n**16S Significant DESeq2 Taxa:**\n")
  htmltools::tagList(datatable(sig_df_16S, options = list(pageLength = 5, scrollX = TRUE)))
}
## 
## **16S Significant DESeq2 Taxa:**

5.6 16S Indicator Species Analysis (IndVal)

otu_mat_16S <- as(otu_table(ps_clean_16S), "matrix")
if(taxa_are_rows(ps_clean_16S)){ otu_mat_16S <- t(otu_mat_16S) }

ind_val_16S <- multipatt(otu_mat_16S, sample_data(ps_clean_16S)$Treatment, func = "IndVal.g", control = how(nperm=999))
ind_sum_16S <- ind_val_16S$sign %>% filter(p.value <= 0.05)

if(nrow(ind_sum_16S) > 0) {
  ind_final_16S <- cbind(as.data.frame(tax_table(ps_clean_16S)[rownames(ind_sum_16S), ]), ind_sum_16S)
  write_xlsx(ind_final_16S, file.path(path, "Bacteria_16S_Indicator.xlsx"))
  htmltools::tagList(datatable(ind_final_16S, options = list(pageLength = 5, scrollX = TRUE), caption = "16S Indicator Species"))
} else {
  cat("\n*No significant indicator species found for 16S.*\n")
}

5.7 16S Beta Diversity Partitioning (Turnover vs. Nestedness)

otu_mat_part <- as(otu_table(ps_clean_16S), "matrix")
if(taxa_are_rows(ps_clean_16S)){ otu_mat_part <- t(otu_mat_part) }

beta_part <- beta.pair.abund(otu_mat_part)

mean_turnover <- mean(as.matrix(beta_part$beta.bray.bal)[lower.tri(as.matrix(beta_part$beta.bray.bal))])
mean_nestedness <- mean(as.matrix(beta_part$beta.bray.gra)[lower.tri(as.matrix(beta_part$beta.bray.gra))])
mean_total <- mean_turnover + mean_nestedness

part_df <- data.frame(
  Component = c("Turnover (Species Replacement)", "Nestedness (Species Loss/Gain)"),
  Percentage = c((mean_turnover/mean_total)*100, (mean_nestedness/mean_total)*100)
)

ggplot(part_df, aes(x = "", y = Percentage, fill = Component)) +
  geom_bar(stat = "identity", width = 0.5, color="black") +
  coord_flip() + theme_minimal() + 
  scale_fill_manual(values=c("#E69F00", "#56B4E9")) +
  labs(title = "Bacteria 16S: Drivers of Community Dissimilarity", x = "", y = "% Contribution to Beta Diversity")

5.8 16S Target Genera Ratio Dynamics (LAB vs. Spoilage)

melt_16S <- psmelt(ps_rel_16S)

lab_genera <- c("Lactobacillus", "Leuconostoc", "Lactiplantibacillus", "Weissella", "Pediococcus")
spoilage_genera <- c("Acetobacter", "Gluconobacter", "Enterobacter", "Klebsiella", "Pantoea")

ratio_df <- melt_16S %>%
  group_by(SampleID, Treatment, TimePoint) %>%
  dplyr::summarize(
    LAB_Abundance = sum(Abundance[Genus %in% lab_genera], na.rm = TRUE),
    Spoilage_Abundance = sum(Abundance[Genus %in% spoilage_genera], na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(LAB_to_Spoilage_Ratio = (LAB_Abundance + 0.0001) / (Spoilage_Abundance + 0.0001))

ggplot(ratio_df, aes(x = TimePoint, y = log10(LAB_to_Spoilage_Ratio), fill = Treatment)) +
  geom_boxplot(alpha = 0.8, color="black", outlier.shape = NA) +
  geom_jitter(position = position_dodge(0.75), size = 2, alpha = 0.6) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  theme_bw() +
  scale_fill_manual(values=c("#2CA02C", "#D62728")) +
  labs(title = "16S Shift: LAB vs Spoilage/AAB Dynamics", 
       y = "Log10 (LAB / Spoilage Ratio)", 
       subtitle = "Values above 0 (red line) indicate Lactic Acid Bacteria dominance")


6. Fungal Community Analysis (ITS)

6.1 ITS Setup & Background Exports

ps_clean_ITS <- prune_samples(sample_sums(ps_ITS) > 1000, ps_ITS)
ps_rel_ITS <- transform_sample_counts(ps_clean_ITS, function(x) x / sum(x))

writeXStringSet(refseq(ps_clean_ITS), file.path(path, "Fungi_ITS_rep.fasta"), format="fasta")

krona_ITS <- psmelt(ps_clean_ITS) %>% 
  filter(Abundance > 0) %>% 
  select(Abundance, Kingdom, Phylum, Class, Order, Family, Genus, Species) %>% 
  mutate(across(everything(), ~tidyr::replace_na(as.character(.), "Unclassified")))
write.table(krona_ITS, file.path(path, "Fungi_ITS_Krona_Input.txt"), sep="\t", row.names=FALSE, col.names=FALSE, quote=FALSE)

6.2 ITS Alpha Diversity

alpha_df_ITS <- estimate_richness(ps_clean_ITS, measures=c("Shannon", "Simpson", "Chao1", "Observed"))
write_xlsx(cbind(SampleID = rownames(alpha_df_ITS), alpha_df_ITS), file.path(path, "Fungi_ITS_Alpha_Diversity.xlsx"))

htmltools::tagList(datatable(alpha_df_ITS, options = list(pageLength = 5, scrollX = TRUE), caption = "ITS Alpha Diversity Indices"))
plot_richness(ps_clean_ITS, x="TimePoint", color="Treatment", measures=c("Shannon", "Simpson")) +
  geom_boxplot(outlier.shape = NA, alpha=0.7) + 
  geom_jitter(width=0.1, alpha=0.6) + 
  theme_classic() + 
  labs(title="Fungi ITS: Alpha Diversity Significance") +
  stat_compare_means(aes(group = Treatment), label = "p.signif", method = "wilcox.test")

6.3 ITS Beta Diversity & PERMANOVA

dist_bray_ITS <- phyloseq::distance(ps_clean_ITS, method="bray")
write.csv(as.matrix(dist_bray_ITS), file.path(path, "Fungi_ITS_BrayCurtis.csv"))

# PERMANOVA
sample_df_ITS <- as(sample_data(ps_clean_ITS), "data.frame")
permanova_ITS <- adonis2(dist_bray_ITS ~ Treatment * TimePoint, data = sample_df_ITS, permutations = 999)
htmltools::tagList(datatable(as.data.frame(permanova_ITS), options = list(dom = 't'), caption = "ITS PERMANOVA Results"))
# PCoA Plot
plot_ordination(ps_rel_ITS, ordinate(ps_rel_ITS, method="PCoA", distance="bray"), color="TimePoint", shape="Treatment") +
  geom_point(size=4) + theme_minimal() + labs(title="Fungi ITS: PCoA (Bray-Curtis)")

# UPGMA Tree
png(file.path(path, "Fungi_ITS_UPGMA.png"), width=800, height=600, res=100)
plot(hclust(dist_bray_ITS, method="average"), main="Fungi ITS: UPGMA Clustering", xlab="Samples", sub="")
dev.off()
## png 
##   2
plot(hclust(dist_bray_ITS, method="average"), main="Fungi ITS: UPGMA Clustering", xlab="Samples", sub="")

6.4 ITS Taxonomic Composition

top40_ITS <- names(sort(taxa_sums(ps_clean_ITS), decreasing=TRUE))[1:40]

# Barplot (Top 20)
plot_bar(prune_taxa(top40_ITS[1:20], ps_clean_ITS), x="SampleID", fill="Genus") + 
  facet_wrap(~Treatment, scales="free_x") + theme_bw() + labs(title="Fungi ITS: Top 20 Genera")

# Heatmap (Top 40)
plot_heatmap(prune_taxa(top40_ITS, ps_clean_ITS), method="NMDS", distance="bray", 
             sample.order="Treatment", taxa.label="Genus", taxa.order="Phylum",
             low="#000033", high="#FF3300", na.value="white") + labs(title="Fungi ITS: Taxonomic Heatmap")

6.5 ITS Core Microbiome & DESeq2

# Core Microbiome
ps_ctrl_ITS <- subset_samples(ps_clean_ITS, Treatment == "Controlled")
core_taxa_ITS <- core_members(ps_ctrl_ITS, detection = 0.001, prevalence = 0.8)

if(length(core_taxa_ITS) > 0) {
  cat("\n**ITS Core Taxa Detected:**\n")
  htmltools::tagList(datatable(as.data.frame(tax_table(ps_clean_ITS)[core_taxa_ITS, ]), options = list(pageLength = 5, scrollX = TRUE)))
}
## 
## **ITS Core Taxa Detected:**
# DESeq2
ds2_ITS <- DESeq(phyloseq_to_deseq2(ps_clean_ITS, ~ TimePoint + Treatment), test="Wald", fitType="parametric", sfType="poscounts")
res_ITS <- results(ds2_ITS, contrast=c("Treatment", "Controlled", "Spontaneous"))
sigtab_ITS <- res_ITS[which(res_ITS$padj < 0.05), ]

if(nrow(sigtab_ITS) > 0) {
  sig_df_ITS <- cbind(as(sigtab_ITS, "data.frame"), as(tax_table(ps_clean_ITS)[rownames(sigtab_ITS), ], "matrix"))
  write_xlsx(sig_df_ITS, file.path(path, "Fungi_ITS_DESeq2.xlsx"))
  cat("\n**ITS Significant DESeq2 Taxa:**\n")
  htmltools::tagList(datatable(sig_df_ITS, options = list(pageLength = 5, scrollX = TRUE)))
}
## 
## **ITS Significant DESeq2 Taxa:**
# Annotated Volcano Plot
tax_df_ITS <- as.data.frame(tax_table(ps_clean_ITS))
volcano_data_ITS <- cbind(as.data.frame(res_ITS), tax_df_ITS[rownames(res_ITS), ])

volcano_data_ITS <- volcano_data_ITS[!is.na(volcano_data_ITS$padj), ]

volcano_data_ITS$Significance <- "Not Significant"
volcano_data_ITS$Significance[volcano_data_ITS$log2FoldChange > 1 & volcano_data_ITS$padj < 0.05] <- "Significantly Increased (Controlled)"
volcano_data_ITS$Significance[volcano_data_ITS$log2FoldChange < -1 & volcano_data_ITS$padj < 0.05] <- "Significantly Decreased (Spontaneous)"

volcano_data_ITS$Label <- NA 
sig_rows_ITS <- volcano_data_ITS$Significance != "Not Significant"
volcano_data_ITS$Label[sig_rows_ITS] <- volcano_data_ITS$Genus[sig_rows_ITS] 
volcano_data_ITS$Label[sig_rows_ITS] <- paste0(rownames(volcano_data_ITS)[sig_rows_ITS], " (", volcano_data_ITS$Genus[sig_rows_ITS], ")")

ggplot(volcano_data_ITS, aes(x = log2FoldChange, y = -log10(padj), color = Significance)) +
  geom_point(alpha = 0.7, size = 2.5) +
  scale_color_manual(values = c("Not Significant" = "grey80", 
                                "Significantly Increased (Controlled)" = "#2CA02C", 
                                "Significantly Decreased (Spontaneous)" = "#D62728")) +
  geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black") +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black") +
  geom_text_repel(aes(label = Label), size = 3.5, max.overlaps = 20, 
                  box.padding = 0.5, point.padding = 0.3, color = "black", show.legend = FALSE) +
  theme_bw() +
  labs(title = "ITS DESeq2 Volcano Plot: Fungal Shifts in Fermentation",
       x = "Log2 Fold Change (Biological Magnitude)",
       y = "-Log10 Adjusted p-value (Statistical Confidence)") +
  theme(legend.position = "bottom")

6.6 ITS Indicator Species Analysis (IndVal)

otu_mat_ITS <- as(otu_table(ps_clean_ITS), "matrix")
if(taxa_are_rows(ps_clean_ITS)){ otu_mat_ITS <- t(otu_mat_ITS) }

ind_val_ITS <- multipatt(otu_mat_ITS, sample_data(ps_clean_ITS)$Treatment, func = "IndVal.g", control = how(nperm=999))
ind_sum_ITS <- ind_val_ITS$sign %>% filter(p.value <= 0.05)

if(nrow(ind_sum_ITS) > 0) {
  ind_final_ITS <- cbind(as.data.frame(tax_table(ps_clean_ITS)[rownames(ind_sum_ITS), ]), ind_sum_ITS)
  write_xlsx(ind_final_ITS, file.path(path, "Fungi_ITS_Indicator.xlsx"))
  htmltools::tagList(datatable(ind_final_ITS, options = list(pageLength = 5, scrollX = TRUE), caption = "ITS Indicator Species"))
} else {
  cat("\n*No significant indicator species found for ITS.*\n")
}

6.7 ITS Beta Diversity Partitioning (Turnover vs. Nestedness)

otu_mat_part <- as(otu_table(ps_clean_ITS), "matrix")
if(taxa_are_rows(ps_clean_ITS)){ otu_mat_part <- t(otu_mat_part) }

beta_part <- beta.pair.abund(otu_mat_part)

mean_turnover <- mean(as.matrix(beta_part$beta.bray.bal)[lower.tri(as.matrix(beta_part$beta.bray.bal))])
mean_nestedness <- mean(as.matrix(beta_part$beta.bray.gra)[lower.tri(as.matrix(beta_part$beta.bray.gra))])
mean_total <- mean_turnover + mean_nestedness

part_df <- data.frame(
  Component = c("Turnover (Replacement)", "Nestedness (Loss/Gain)"),
  Percentage = c((mean_turnover/mean_total)*100, (mean_nestedness/mean_total)*100)
)

ggplot(part_df, aes(x = "", y = Percentage, fill = Component)) +
  geom_bar(stat = "identity", width = 0.5, color="black") +
  coord_flip() + theme_minimal() + 
  scale_fill_manual(values=c("#E69F00", "#56B4E9")) +
  labs(title = "Fungi ITS: Drivers of Community Dissimilarity", x = "", y = "% Contribution to Beta Diversity")

6.8 ITS Co-occurrence Network Analysis

top30_taxa_ITS <- names(sort(taxa_sums(ps_clean_ITS), decreasing=TRUE))[1:30]
ps_net_ITS <- prune_taxa(top30_taxa_ITS, ps_clean_ITS)

net_otu <- as(otu_table(ps_net_ITS), "matrix")
if(taxa_are_rows(ps_net_ITS)){ net_otu <- t(net_otu) }
colnames(net_otu) <- as.data.frame(tax_table(ps_net_ITS))$Genus

cor_res <- rcorr(as.matrix(net_otu), type="spearman")
R_mat <- cor_res$r
P_mat <- cor_res$P

edges <- melt(R_mat, varnames = c("Taxon_A", "Taxon_B"), value.name = "Correlation")
p_edges <- melt(P_mat, varnames = c("Taxon_A", "Taxon_B"), value.name = "p_value")
edges$p_value <- p_edges$p_value

edges_sig <- edges %>%
  filter(Taxon_A != Taxon_B) %>%
  filter(p_value < 0.05) %>%
  filter(abs(Correlation) > 0.6)

if(nrow(edges_sig) > 0) {
  net_graph <- graph_from_data_frame(edges_sig, directed = FALSE)
  net_graph <- simplify(net_graph, edge.attr.comb = "first") 
  
  E(net_graph)$color <- ifelse(E(net_graph)$Correlation > 0, "#0072B2", "#D55E00")
  E(net_graph)$weight <- abs(E(net_graph)$Correlation) * 8 
  
  set.seed(42) 
  plot(net_graph, 
       layout = layout_with_fr(net_graph), 
       vertex.size = 12, 
       vertex.label.cex = 0.8, 
       vertex.label.color = "black",
       vertex.color = "lightblue",
       edge.width = E(net_graph)$weight,
       main = "Fungi ITS: Co-occurrence Network (Top 30 Genera)",
       sub = "Blue = Positive Correlation | Red = Negative Correlation")
} else {
  cat("\n*No significant strong correlations found among the top taxa.*\n")
}