library(dada2)
library(phyloseq)
library(DECIPHER)
library(phangorn)
library(biomformat)
library(ggplot2)
library(vegan)
library(ape)
library(writexl)
library(DESeq2)
library(dplyr)
library(DT)
library(microbiome)
library(Biostrings)
library(ggpubr)
library(indicspecies)
library(betapart)
library(igraph)
library(Hmisc)
library(reshape2)
library(ggrepel)
# ---- CORE CONFIGURATION ----
path <- "C:/Users/Owner/Documents/BSP_2022/BSP_2025/Metabarcoding_training"
cutadapt_path <- "C:/Users/Owner/Downloads/cutadapt" # Verify this matches your system installation
system2(cutadapt_path, args = "--version") # Run shell commands from R
# Standard Universal Primers (To be tested via Cutadapt)
fwd_16S <- "CCTACGGGNGGCWGCAG" # V3 Forward
rev_16S <- "GACTACHVGGGTATCTAATCC" # V4 Reverse
fwd_ITS <- "GCATCGATGAAGAACGCAGC" # ITS3 Forward
rev_ITS <- "TCCTCCGCTTATTGATATGC" # ITS4 Reversecat("\n\n======================================================\n")
cat("STARTING 16S PIPELINE - WATCH CUTADAPT RETENTION RATES\n")
cat("======================================================\n\n")
fnFs_16S <- sort(list.files(path, pattern = "__1_1\\.fastq\\.gz$", full.names = TRUE))
fnRs_16S <- sort(list.files(path, pattern = "__1_2\\.fastq\\.gz$", full.names = TRUE))
sample.names_16S <- sapply(strsplit(basename(fnFs_16S), "__1"), `[`, 1)
path_cut_16S <- file.path(path, "cutadapt_16S")
dir.create(path_cut_16S, showWarnings = FALSE)
fnFs_cut_16S <- file.path(path_cut_16S, paste0(sample.names_16S, "_F_cut.fastq.gz"))
fnRs_cut_16S <- file.path(path_cut_16S, paste0(sample.names_16S, "_R_cut.fastq.gz"))
# Cutadapt Trimming
for(i in seq_along(fnFs_16S)) {
system2(cutadapt_path, args = c("-g", fwd_16S, "-G", rev_16S,
"-o", shQuote(fnFs_cut_16S[i]), "-p", shQuote(fnRs_cut_16S[i]),
shQuote(fnFs_16S[i]), shQuote(fnRs_16S[i]),
"--discard-untrimmed", "--minimum-length=100"))
}
# DADA2 Filtering
filtFs_16S <- file.path(path, "filtered_16S", paste0(sample.names_16S, "_F_filt.fastq.gz"))
filtRs_16S <- file.path(path, "filtered_16S", paste0(sample.names_16S, "_R_filt.fastq.gz"))
names(filtFs_16S) <- sample.names_16S; names(filtRs_16S) <- sample.names_16S
out_16S <- filterAndTrim(fnFs_cut_16S, filtFs_16S, fnRs_cut_16S, filtRs_16S,
truncLen=c(250, 200), maxN=0, maxEE=c(2,2), truncQ=2,
rm.phix=TRUE, compress=TRUE, multithread=TRUE)# 16S Quality Control Plots
print(plotQualityProfile(fnFs_16S[1:2]) + ggtitle("RAW Forward Reads (16S)"))# Error Learning & Core Algorithm
errF_16S <- learnErrors(filtFs_16S, multithread=TRUE)
errR_16S <- learnErrors(filtRs_16S, multithread=TRUE)
mergers_16S <- mergePairs(dada(filtFs_16S, err=errF_16S, multithread=TRUE), filtFs_16S,
dada(filtRs_16S, err=errR_16S, multithread=TRUE), filtRs_16S, verbose=TRUE)
seqtab_nochim_16S <- removeBimeraDenovo(rowsum(makeSequenceTable(mergers_16S), group = sample.names_16S), method="consensus", multithread=TRUE)
# Taxonomy (SILVA)
seqtab_final_16S <- seqtab_nochim_16S[, nchar(getSequences(seqtab_nochim_16S)) >= 350]
taxa_16S <- assignTaxonomy(seqtab_final_16S, file.path(path, "silva_nr99_v138.2_toGenus_trainset.fa.gz"), multithread=TRUE)
taxa_16S <- addSpecies(taxa_16S, file.path(path, "silva_v138.2_assignSpecies.fa.gz"))cat("\n\n======================================================\n")
cat("STARTING ITS PIPELINE - WATCH CUTADAPT RETENTION RATES\n")
cat("======================================================\n\n")
fnFs_ITS <- sort(list.files(path, pattern = "__2_1\\.fastq\\.gz$", full.names = TRUE))
fnRs_ITS <- sort(list.files(path, pattern = "__2_2\\.fastq\\.gz$", full.names = TRUE))
sample.names_ITS <- sapply(strsplit(basename(fnFs_ITS), "__2"), `[`, 1)
path_cut_ITS <- file.path(path, "cutadapt_ITS")
dir.create(path_cut_ITS, showWarnings = FALSE)
fnFs_cut_ITS <- file.path(path_cut_ITS, paste0(sample.names_ITS, "_F_cut.fastq.gz"))
fnRs_cut_ITS <- file.path(path_cut_ITS, paste0(sample.names_ITS, "_R_cut.fastq.gz"))
# Cutadapt Trimming
for(i in seq_along(fnFs_ITS)) {
system2(cutadapt_path, args = c("-g", fwd_ITS, "-G", rev_ITS,
"-o", shQuote(fnFs_cut_ITS[i]), "-p", shQuote(fnRs_cut_ITS[i]),
shQuote(fnFs_ITS[i]), shQuote(fnRs_ITS[i]),
"--discard-untrimmed", "--minimum-length=50"))
}
# DADA2 Filtering
filtFs_ITS <- file.path(path, "filtered_ITS", paste0(sample.names_ITS, "_F_filt.fastq.gz"))
filtRs_ITS <- file.path(path, "filtered_ITS", paste0(sample.names_ITS, "_R_filt.fastq.gz"))
names(filtFs_ITS) <- sample.names_ITS; names(filtRs_ITS) <- sample.names_ITS
out_ITS <- filterAndTrim(fnFs_cut_ITS, filtFs_ITS, fnRs_cut_ITS, filtRs_ITS,
maxN=0, maxEE=c(2,2), truncQ=2, minLen=50,
rm.phix=TRUE, compress=TRUE, multithread=TRUE)# ITS Quality Control Plots
print(plotQualityProfile(fnFs_ITS[1:2]) + ggtitle("RAW Forward Reads (ITS)"))# Error Learning & Core Algorithm
errF_ITS <- learnErrors(filtFs_ITS, multithread=TRUE)
errR_ITS <- learnErrors(filtRs_ITS, multithread=TRUE)
mergers_ITS <- mergePairs(dada(filtFs_ITS, err=errF_ITS, multithread=TRUE), filtFs_ITS,
dada(filtRs_ITS, err=errR_ITS, multithread=TRUE), filtRs_ITS, verbose=TRUE)
seqtab_final_ITS <- removeBimeraDenovo(rowsum(makeSequenceTable(mergers_ITS), group = sample.names_ITS), method="consensus", multithread=TRUE)
# Taxonomy (UNITE - Single Threaded for Windows Stability)
taxa_ITS <- assignTaxonomy(seqtab_final_ITS, file.path(path, "sh_general_release_dynamic_19.02.2025.fasta"), multithread=FALSE)##
## --- Building Phylogenetic Trees & Phyloseq Objects ---
metadata <- data.frame(
SampleID = rownames(seqtab_final_16S),
TimePoint = factor(ifelse(grepl("^0", rownames(seqtab_final_16S)), "Start", "Finish"), levels=c("Start", "Finish")),
Treatment = factor(ifelse(grepl("CET", rownames(seqtab_final_16S)), "Controlled", "Spontaneous"))
)
rownames(metadata) <- metadata$SampleID
# 16S Assembly (Includes ML Tree)
generate_ml_tree <- function(seqtab_matrix) {
seqs <- getSequences(seqtab_matrix); names(seqs) <- seqs
phang_align <- phyDat(as(AlignSeqs(DNAStringSet(seqs), anchor=NA, verbose=FALSE), "matrix"), type="DNA")
return(optim.pml(pml(NJ(dist.ml(phang_align)), data=phang_align), model="GTR", optInv=TRUE, optGamma=TRUE, rearrangement="stochastic", control=pml.control(trace=0))$tree)
}
ps_16S <- phyloseq(otu_table(seqtab_final_16S, taxa_are_rows=FALSE), sample_data(metadata),
tax_table(taxa_16S), phy_tree(generate_ml_tree(seqtab_final_16S)))
dna_16S <- Biostrings::DNAStringSet(taxa_names(ps_16S)); names(dna_16S) <- taxa_names(ps_16S)
ps_16S <- merge_phyloseq(ps_16S, dna_16S)
taxa_names(ps_16S) <- paste0("ASV_16S_", seq(ntaxa(ps_16S)))
# Filter Host Contaminants
ps_16S <- subset_taxa(ps_16S, (Order != "Chloroplast") | is.na(Order))
ps_16S <- subset_taxa(ps_16S, (Family != "Mitochondria") | is.na(Family))
# ITS Assembly (No Tree)
ps_ITS <- phyloseq(otu_table(seqtab_final_ITS, taxa_are_rows=FALSE), sample_data(metadata), tax_table(taxa_ITS))
dna_ITS <- Biostrings::DNAStringSet(taxa_names(ps_ITS)); names(dna_ITS) <- taxa_names(ps_ITS)
ps_ITS <- merge_phyloseq(ps_ITS, dna_ITS)
taxa_names(ps_ITS) <- paste0("ASV_ITS_", seq(ntaxa(ps_ITS)))
# Overwrite the broken .rds files with the fixed, sequence-containing objects
saveRDS(ps_16S, file = file.path(path, "Saved_R_Data", "ps_16S_final.rds"))
saveRDS(ps_ITS, file = file.path(path, "Saved_R_Data", "ps_ITS_final.rds"))
cat("\nPhyloseq objects successfully rebuilt with embedded DNA sequences.\n")##
## Phyloseq objects successfully rebuilt with embedded DNA sequences.
# Clean and Transform
ps_clean_16S <- prune_samples(sample_sums(ps_16S) > 1000, ps_16S)
ps_rel_16S <- transform_sample_counts(ps_clean_16S, function(x) x / sum(x))
# Background File Exports
writeXStringSet(refseq(ps_clean_16S), file.path(path, "Bacteria_16S_rep.fasta"), format="fasta")
if (!is.null(phy_tree(ps_clean_16S, errorIfNULL = FALSE))) {
write.tree(phy_tree(ps_clean_16S), file.path(path, "Bacteria_16S_phylo.tre"))
}
krona_16S <- psmelt(ps_clean_16S) %>%
filter(Abundance > 0) %>%
select(Abundance, Kingdom, Phylum, Class, Order, Family, Genus, Species) %>%
mutate(across(everything(), ~tidyr::replace_na(as.character(.), "Unclassified")))
write.table(krona_16S, file.path(path, "Bacteria_16S_Krona_Input.txt"), sep="\t", row.names=FALSE, col.names=FALSE, quote=FALSE)alpha_df_16S <- estimate_richness(ps_clean_16S, measures=c("Shannon", "Simpson", "Chao1", "Observed"))
write_xlsx(cbind(SampleID = rownames(alpha_df_16S), alpha_df_16S), file.path(path, "Bacteria_16S_Alpha_Diversity.xlsx"))
htmltools::tagList(datatable(alpha_df_16S, options = list(pageLength = 5, scrollX = TRUE), caption = "16S Alpha Diversity Indices"))plot_richness(ps_clean_16S, x="TimePoint", color="Treatment", measures=c("Shannon", "Simpson")) +
geom_boxplot(outlier.shape = NA, alpha=0.7) +
geom_jitter(width=0.1, alpha=0.6) +
theme_classic() +
labs(title="Bacteria 16S: Alpha Diversity Significance") +
stat_compare_means(aes(group = Treatment), label = "p.signif", method = "wilcox.test")dist_bray_16S <- phyloseq::distance(ps_clean_16S, method="bray")
write.csv(as.matrix(dist_bray_16S), file.path(path, "Bacteria_16S_BrayCurtis.csv"))
# PERMANOVA
sample_df_16S <- as(sample_data(ps_clean_16S), "data.frame")
permanova_16S <- adonis2(dist_bray_16S ~ Treatment * TimePoint, data = sample_df_16S, permutations = 999)
htmltools::tagList(datatable(as.data.frame(permanova_16S), options = list(dom = 't'), caption = "16S PERMANOVA Results"))# PCoA Plot
plot_ordination(ps_rel_16S, ordinate(ps_rel_16S, method="PCoA", distance="bray"), color="TimePoint", shape="Treatment") +
geom_point(size=4) + theme_minimal() + labs(title="Bacteria 16S: PCoA (Bray-Curtis)")# UPGMA Tree
png(file.path(path, "Bacteria_16S_UPGMA.png"), width=800, height=600, res=100)
plot(hclust(dist_bray_16S, method="average"), main="Bacteria 16S: UPGMA Clustering", xlab="Samples", sub="")
dev.off()## png
## 2
plot(hclust(dist_bray_16S, method="average"), main="Bacteria 16S: UPGMA Clustering", xlab="Samples", sub="")top40_16S <- names(sort(taxa_sums(ps_clean_16S), decreasing=TRUE))[1:40]
# Barplot (Top 20)
plot_bar(prune_taxa(top40_16S[1:20], ps_clean_16S), x="SampleID", fill="Genus") +
facet_wrap(~Treatment, scales="free_x") + theme_bw() + labs(title="Bacteria 16S: Top 20 Genera")# Heatmap (Top 40)
plot_heatmap(prune_taxa(top40_16S, ps_clean_16S), method="NMDS", distance="bray",
sample.order="Treatment", taxa.label="Genus", taxa.order="Phylum",
low="#000033", high="#FF3300", na.value="white") + labs(title="Bacteria 16S: Taxonomic Heatmap")# Core Microbiome
ps_ctrl_16S <- subset_samples(ps_clean_16S, Treatment == "Controlled")
core_taxa_16S <- core_members(ps_ctrl_16S, detection = 0.001, prevalence = 0.8)
if(length(core_taxa_16S) > 0) {
cat("\n**16S Core Taxa Detected:**\n")
htmltools::tagList(datatable(as.data.frame(tax_table(ps_clean_16S)[core_taxa_16S, ]), options = list(pageLength = 5, scrollX = TRUE)))
}##
## **16S Core Taxa Detected:**
# DESeq2
ds2_16S <- DESeq(phyloseq_to_deseq2(ps_clean_16S, ~ TimePoint + Treatment), test="Wald", fitType="parametric", sfType="poscounts")
res_16S <- results(ds2_16S, contrast=c("Treatment", "Controlled", "Spontaneous"))
sigtab_16S <- res_16S[which(res_16S$padj < 0.05), ]
# DESeq2 Volcano Plot Visualization (With Taxonomy Annotations)
tax_df_16S <- as.data.frame(tax_table(ps_clean_16S))
volcano_data_16S <- cbind(as.data.frame(res_16S), tax_df_16S[rownames(res_16S), ])
volcano_data_16S <- volcano_data_16S[!is.na(volcano_data_16S$padj), ]
volcano_data_16S$Significance <- "Not Significant"
volcano_data_16S$Significance[volcano_data_16S$log2FoldChange > 1 & volcano_data_16S$padj < 0.05] <- "Significantly Increased (Controlled)"
volcano_data_16S$Significance[volcano_data_16S$log2FoldChange < -1 & volcano_data_16S$padj < 0.05] <- "Significantly Decreased (Spontaneous)"
volcano_data_16S$Label <- NA
sig_rows_16S <- volcano_data_16S$Significance != "Not Significant"
volcano_data_16S$Label[sig_rows_16S] <- volcano_data_16S$Genus[sig_rows_16S]
ggplot(volcano_data_16S, aes(x = log2FoldChange, y = -log10(padj), color = Significance)) +
geom_point(alpha = 0.7, size = 2.5) +
scale_color_manual(values = c("Not Significant" = "grey80",
"Significantly Increased (Controlled)" = "#2CA02C",
"Significantly Decreased (Spontaneous)" = "#D62728")) +
geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black") +
geom_text_repel(aes(label = Label), size = 3.5, max.overlaps = 20,
box.padding = 0.5, point.padding = 0.3, color = "black", show.legend = FALSE) +
theme_bw() +
labs(title = "DESeq2 Volcano Plot: Controlled vs. Spontaneous Fermentation",
x = "Log2 Fold Change (Biological Magnitude)",
y = "-Log10 Adjusted p-value (Statistical Confidence)") +
theme(legend.position = "bottom")if(nrow(sigtab_16S) > 0) {
sig_df_16S <- cbind(as(sigtab_16S, "data.frame"), as(tax_table(ps_clean_16S)[rownames(sigtab_16S), ], "matrix"))
write_xlsx(sig_df_16S, file.path(path, "Bacteria_16S_DESeq2.xlsx"))
cat("\n**16S Significant DESeq2 Taxa:**\n")
htmltools::tagList(datatable(sig_df_16S, options = list(pageLength = 5, scrollX = TRUE)))
}##
## **16S Significant DESeq2 Taxa:**
otu_mat_16S <- as(otu_table(ps_clean_16S), "matrix")
if(taxa_are_rows(ps_clean_16S)){ otu_mat_16S <- t(otu_mat_16S) }
ind_val_16S <- multipatt(otu_mat_16S, sample_data(ps_clean_16S)$Treatment, func = "IndVal.g", control = how(nperm=999))
ind_sum_16S <- ind_val_16S$sign %>% filter(p.value <= 0.05)
if(nrow(ind_sum_16S) > 0) {
ind_final_16S <- cbind(as.data.frame(tax_table(ps_clean_16S)[rownames(ind_sum_16S), ]), ind_sum_16S)
write_xlsx(ind_final_16S, file.path(path, "Bacteria_16S_Indicator.xlsx"))
htmltools::tagList(datatable(ind_final_16S, options = list(pageLength = 5, scrollX = TRUE), caption = "16S Indicator Species"))
} else {
cat("\n*No significant indicator species found for 16S.*\n")
}otu_mat_part <- as(otu_table(ps_clean_16S), "matrix")
if(taxa_are_rows(ps_clean_16S)){ otu_mat_part <- t(otu_mat_part) }
beta_part <- beta.pair.abund(otu_mat_part)
mean_turnover <- mean(as.matrix(beta_part$beta.bray.bal)[lower.tri(as.matrix(beta_part$beta.bray.bal))])
mean_nestedness <- mean(as.matrix(beta_part$beta.bray.gra)[lower.tri(as.matrix(beta_part$beta.bray.gra))])
mean_total <- mean_turnover + mean_nestedness
part_df <- data.frame(
Component = c("Turnover (Species Replacement)", "Nestedness (Species Loss/Gain)"),
Percentage = c((mean_turnover/mean_total)*100, (mean_nestedness/mean_total)*100)
)
ggplot(part_df, aes(x = "", y = Percentage, fill = Component)) +
geom_bar(stat = "identity", width = 0.5, color="black") +
coord_flip() + theme_minimal() +
scale_fill_manual(values=c("#E69F00", "#56B4E9")) +
labs(title = "Bacteria 16S: Drivers of Community Dissimilarity", x = "", y = "% Contribution to Beta Diversity")melt_16S <- psmelt(ps_rel_16S)
lab_genera <- c("Lactobacillus", "Leuconostoc", "Lactiplantibacillus", "Weissella", "Pediococcus")
spoilage_genera <- c("Acetobacter", "Gluconobacter", "Enterobacter", "Klebsiella", "Pantoea")
ratio_df <- melt_16S %>%
group_by(SampleID, Treatment, TimePoint) %>%
dplyr::summarize(
LAB_Abundance = sum(Abundance[Genus %in% lab_genera], na.rm = TRUE),
Spoilage_Abundance = sum(Abundance[Genus %in% spoilage_genera], na.rm = TRUE),
.groups = "drop"
) %>%
mutate(LAB_to_Spoilage_Ratio = (LAB_Abundance + 0.0001) / (Spoilage_Abundance + 0.0001))
ggplot(ratio_df, aes(x = TimePoint, y = log10(LAB_to_Spoilage_Ratio), fill = Treatment)) +
geom_boxplot(alpha = 0.8, color="black", outlier.shape = NA) +
geom_jitter(position = position_dodge(0.75), size = 2, alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
theme_bw() +
scale_fill_manual(values=c("#2CA02C", "#D62728")) +
labs(title = "16S Shift: LAB vs Spoilage/AAB Dynamics",
y = "Log10 (LAB / Spoilage Ratio)",
subtitle = "Values above 0 (red line) indicate Lactic Acid Bacteria dominance")ps_clean_ITS <- prune_samples(sample_sums(ps_ITS) > 1000, ps_ITS)
ps_rel_ITS <- transform_sample_counts(ps_clean_ITS, function(x) x / sum(x))
writeXStringSet(refseq(ps_clean_ITS), file.path(path, "Fungi_ITS_rep.fasta"), format="fasta")
krona_ITS <- psmelt(ps_clean_ITS) %>%
filter(Abundance > 0) %>%
select(Abundance, Kingdom, Phylum, Class, Order, Family, Genus, Species) %>%
mutate(across(everything(), ~tidyr::replace_na(as.character(.), "Unclassified")))
write.table(krona_ITS, file.path(path, "Fungi_ITS_Krona_Input.txt"), sep="\t", row.names=FALSE, col.names=FALSE, quote=FALSE)alpha_df_ITS <- estimate_richness(ps_clean_ITS, measures=c("Shannon", "Simpson", "Chao1", "Observed"))
write_xlsx(cbind(SampleID = rownames(alpha_df_ITS), alpha_df_ITS), file.path(path, "Fungi_ITS_Alpha_Diversity.xlsx"))
htmltools::tagList(datatable(alpha_df_ITS, options = list(pageLength = 5, scrollX = TRUE), caption = "ITS Alpha Diversity Indices"))plot_richness(ps_clean_ITS, x="TimePoint", color="Treatment", measures=c("Shannon", "Simpson")) +
geom_boxplot(outlier.shape = NA, alpha=0.7) +
geom_jitter(width=0.1, alpha=0.6) +
theme_classic() +
labs(title="Fungi ITS: Alpha Diversity Significance") +
stat_compare_means(aes(group = Treatment), label = "p.signif", method = "wilcox.test")dist_bray_ITS <- phyloseq::distance(ps_clean_ITS, method="bray")
write.csv(as.matrix(dist_bray_ITS), file.path(path, "Fungi_ITS_BrayCurtis.csv"))
# PERMANOVA
sample_df_ITS <- as(sample_data(ps_clean_ITS), "data.frame")
permanova_ITS <- adonis2(dist_bray_ITS ~ Treatment * TimePoint, data = sample_df_ITS, permutations = 999)
htmltools::tagList(datatable(as.data.frame(permanova_ITS), options = list(dom = 't'), caption = "ITS PERMANOVA Results"))# PCoA Plot
plot_ordination(ps_rel_ITS, ordinate(ps_rel_ITS, method="PCoA", distance="bray"), color="TimePoint", shape="Treatment") +
geom_point(size=4) + theme_minimal() + labs(title="Fungi ITS: PCoA (Bray-Curtis)")# UPGMA Tree
png(file.path(path, "Fungi_ITS_UPGMA.png"), width=800, height=600, res=100)
plot(hclust(dist_bray_ITS, method="average"), main="Fungi ITS: UPGMA Clustering", xlab="Samples", sub="")
dev.off()## png
## 2
plot(hclust(dist_bray_ITS, method="average"), main="Fungi ITS: UPGMA Clustering", xlab="Samples", sub="")top40_ITS <- names(sort(taxa_sums(ps_clean_ITS), decreasing=TRUE))[1:40]
# Barplot (Top 20)
plot_bar(prune_taxa(top40_ITS[1:20], ps_clean_ITS), x="SampleID", fill="Genus") +
facet_wrap(~Treatment, scales="free_x") + theme_bw() + labs(title="Fungi ITS: Top 20 Genera")# Heatmap (Top 40)
plot_heatmap(prune_taxa(top40_ITS, ps_clean_ITS), method="NMDS", distance="bray",
sample.order="Treatment", taxa.label="Genus", taxa.order="Phylum",
low="#000033", high="#FF3300", na.value="white") + labs(title="Fungi ITS: Taxonomic Heatmap")# Core Microbiome
ps_ctrl_ITS <- subset_samples(ps_clean_ITS, Treatment == "Controlled")
core_taxa_ITS <- core_members(ps_ctrl_ITS, detection = 0.001, prevalence = 0.8)
if(length(core_taxa_ITS) > 0) {
cat("\n**ITS Core Taxa Detected:**\n")
htmltools::tagList(datatable(as.data.frame(tax_table(ps_clean_ITS)[core_taxa_ITS, ]), options = list(pageLength = 5, scrollX = TRUE)))
}##
## **ITS Core Taxa Detected:**
# DESeq2
ds2_ITS <- DESeq(phyloseq_to_deseq2(ps_clean_ITS, ~ TimePoint + Treatment), test="Wald", fitType="parametric", sfType="poscounts")
res_ITS <- results(ds2_ITS, contrast=c("Treatment", "Controlled", "Spontaneous"))
sigtab_ITS <- res_ITS[which(res_ITS$padj < 0.05), ]
if(nrow(sigtab_ITS) > 0) {
sig_df_ITS <- cbind(as(sigtab_ITS, "data.frame"), as(tax_table(ps_clean_ITS)[rownames(sigtab_ITS), ], "matrix"))
write_xlsx(sig_df_ITS, file.path(path, "Fungi_ITS_DESeq2.xlsx"))
cat("\n**ITS Significant DESeq2 Taxa:**\n")
htmltools::tagList(datatable(sig_df_ITS, options = list(pageLength = 5, scrollX = TRUE)))
}##
## **ITS Significant DESeq2 Taxa:**
# Annotated Volcano Plot
tax_df_ITS <- as.data.frame(tax_table(ps_clean_ITS))
volcano_data_ITS <- cbind(as.data.frame(res_ITS), tax_df_ITS[rownames(res_ITS), ])
volcano_data_ITS <- volcano_data_ITS[!is.na(volcano_data_ITS$padj), ]
volcano_data_ITS$Significance <- "Not Significant"
volcano_data_ITS$Significance[volcano_data_ITS$log2FoldChange > 1 & volcano_data_ITS$padj < 0.05] <- "Significantly Increased (Controlled)"
volcano_data_ITS$Significance[volcano_data_ITS$log2FoldChange < -1 & volcano_data_ITS$padj < 0.05] <- "Significantly Decreased (Spontaneous)"
volcano_data_ITS$Label <- NA
sig_rows_ITS <- volcano_data_ITS$Significance != "Not Significant"
volcano_data_ITS$Label[sig_rows_ITS] <- volcano_data_ITS$Genus[sig_rows_ITS]
volcano_data_ITS$Label[sig_rows_ITS] <- paste0(rownames(volcano_data_ITS)[sig_rows_ITS], " (", volcano_data_ITS$Genus[sig_rows_ITS], ")")
ggplot(volcano_data_ITS, aes(x = log2FoldChange, y = -log10(padj), color = Significance)) +
geom_point(alpha = 0.7, size = 2.5) +
scale_color_manual(values = c("Not Significant" = "grey80",
"Significantly Increased (Controlled)" = "#2CA02C",
"Significantly Decreased (Spontaneous)" = "#D62728")) +
geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "black") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black") +
geom_text_repel(aes(label = Label), size = 3.5, max.overlaps = 20,
box.padding = 0.5, point.padding = 0.3, color = "black", show.legend = FALSE) +
theme_bw() +
labs(title = "ITS DESeq2 Volcano Plot: Fungal Shifts in Fermentation",
x = "Log2 Fold Change (Biological Magnitude)",
y = "-Log10 Adjusted p-value (Statistical Confidence)") +
theme(legend.position = "bottom")otu_mat_ITS <- as(otu_table(ps_clean_ITS), "matrix")
if(taxa_are_rows(ps_clean_ITS)){ otu_mat_ITS <- t(otu_mat_ITS) }
ind_val_ITS <- multipatt(otu_mat_ITS, sample_data(ps_clean_ITS)$Treatment, func = "IndVal.g", control = how(nperm=999))
ind_sum_ITS <- ind_val_ITS$sign %>% filter(p.value <= 0.05)
if(nrow(ind_sum_ITS) > 0) {
ind_final_ITS <- cbind(as.data.frame(tax_table(ps_clean_ITS)[rownames(ind_sum_ITS), ]), ind_sum_ITS)
write_xlsx(ind_final_ITS, file.path(path, "Fungi_ITS_Indicator.xlsx"))
htmltools::tagList(datatable(ind_final_ITS, options = list(pageLength = 5, scrollX = TRUE), caption = "ITS Indicator Species"))
} else {
cat("\n*No significant indicator species found for ITS.*\n")
}otu_mat_part <- as(otu_table(ps_clean_ITS), "matrix")
if(taxa_are_rows(ps_clean_ITS)){ otu_mat_part <- t(otu_mat_part) }
beta_part <- beta.pair.abund(otu_mat_part)
mean_turnover <- mean(as.matrix(beta_part$beta.bray.bal)[lower.tri(as.matrix(beta_part$beta.bray.bal))])
mean_nestedness <- mean(as.matrix(beta_part$beta.bray.gra)[lower.tri(as.matrix(beta_part$beta.bray.gra))])
mean_total <- mean_turnover + mean_nestedness
part_df <- data.frame(
Component = c("Turnover (Replacement)", "Nestedness (Loss/Gain)"),
Percentage = c((mean_turnover/mean_total)*100, (mean_nestedness/mean_total)*100)
)
ggplot(part_df, aes(x = "", y = Percentage, fill = Component)) +
geom_bar(stat = "identity", width = 0.5, color="black") +
coord_flip() + theme_minimal() +
scale_fill_manual(values=c("#E69F00", "#56B4E9")) +
labs(title = "Fungi ITS: Drivers of Community Dissimilarity", x = "", y = "% Contribution to Beta Diversity")top30_taxa_ITS <- names(sort(taxa_sums(ps_clean_ITS), decreasing=TRUE))[1:30]
ps_net_ITS <- prune_taxa(top30_taxa_ITS, ps_clean_ITS)
net_otu <- as(otu_table(ps_net_ITS), "matrix")
if(taxa_are_rows(ps_net_ITS)){ net_otu <- t(net_otu) }
colnames(net_otu) <- as.data.frame(tax_table(ps_net_ITS))$Genus
cor_res <- rcorr(as.matrix(net_otu), type="spearman")
R_mat <- cor_res$r
P_mat <- cor_res$P
edges <- melt(R_mat, varnames = c("Taxon_A", "Taxon_B"), value.name = "Correlation")
p_edges <- melt(P_mat, varnames = c("Taxon_A", "Taxon_B"), value.name = "p_value")
edges$p_value <- p_edges$p_value
edges_sig <- edges %>%
filter(Taxon_A != Taxon_B) %>%
filter(p_value < 0.05) %>%
filter(abs(Correlation) > 0.6)
if(nrow(edges_sig) > 0) {
net_graph <- graph_from_data_frame(edges_sig, directed = FALSE)
net_graph <- simplify(net_graph, edge.attr.comb = "first")
E(net_graph)$color <- ifelse(E(net_graph)$Correlation > 0, "#0072B2", "#D55E00")
E(net_graph)$weight <- abs(E(net_graph)$Correlation) * 8
set.seed(42)
plot(net_graph,
layout = layout_with_fr(net_graph),
vertex.size = 12,
vertex.label.cex = 0.8,
vertex.label.color = "black",
vertex.color = "lightblue",
edge.width = E(net_graph)$weight,
main = "Fungi ITS: Co-occurrence Network (Top 30 Genera)",
sub = "Blue = Positive Correlation | Red = Negative Correlation")
} else {
cat("\n*No significant strong correlations found among the top taxa.*\n")
}