Analysis of 16S sequencing data obtained from anaerobic digesters treating swine wastewater and lignocellulosic biomass. CNTRL: swine wastewater only, WW+SG: swine wastewater and switchgrass, WW+CS: swine wastewater and corn stover. Goal is to identify microbes which may be important for lignocellulose hydrolysis.
Load packages
library(dada2); packageVersion("dada2")
## [1] '1.16.0'
library(phyloseq); packageVersion("phyloseq")
## [1] '1.32.0'
library(Biostrings); packageVersion("Biostrings")
## [1] '2.56.0'
library(decontam); packageVersion("decontam")
## [1] '1.8.0'
library(vegan); packageVersion("vegan")
## [1] '2.5.7'
library(DESeq2); packageVersion("DESeq2")
## [1] '1.28.1'
library(ANCOMBC); packageVersion("ANCOMBC")
## [1] '1.1.3'
source("ancom_v2.1.R")
library(ggordiplots)
library(nlme)
library(ggplot2)
library(dplyr)
library(compositions)
library(ggfortify)
library(readxl)
library(RColorBrewer)
theme_set(theme_bw())
Reads were first checked for quality in FastQC and leftover adapters were removed with BBduk. Pipeline follows DADA2 tutorial with slight modifications.
path <- "~/Google Drive/Research/DATA/16S analysis/fastq_clean"
list.files(path)
fnFs <- sort(list.files(path, pattern="_clean_R1.fastq", full.names = TRUE))
fnRs <- sort(list.files(path, pattern="_clean_R2.fastq", full.names = TRUE))
sample.names <- sapply(strsplit(basename(fnFs), "_"), `[`, 1)
filtFs <- file.path(path, "filtered", paste0(sample.names, "_F_filt.fastq.gz"))
filtRs <- file.path(path, "filtered", paste0(sample.names, "_R_filt.fastq.gz"))
names(filtFs) <- sample.names
names(filtRs) <- sample.names
Quality profile forward
plotQualityProfile(fnFs[7:8])
Quality profile reverse
plotQualityProfile(fnRs[7:8])
Filter and trim: Trim first 20 bp and truncate at 290 bp for forward reads, trim frist 10 bp and truncate a 200 bp for reverse reads.
out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs, trimLeft=c(20,10), truncLen=c(290,200),
maxN=0, maxEE=c(2,5), truncQ=2, rm.phix=TRUE,
compress=TRUE, multithread=TRUE)
errF <- learnErrors(filtFs, multithread=TRUE)
errR <- learnErrors(filtRs, multithread=TRUE)
Learn error rates: parametric error model unique to each amplicon dataset. from documentation: learns errors by alternating estimate of the error rates and inference of sample position until the converge on a jointly consistent solution.
plotErrors(errF, nominalQ=TRUE)
Error rates look good: red line shows error rates expected under nominal definition of the Q-score. Black line is estimated error rates and it fits well. (error rates drop with increased quality)
Sample inference: apply the core sample inference algorithm to filtered and trimmed dataset.
dadaFs <- dada(filtFs, err=errF, multithread=TRUE)
dadaRs <- dada(filtRs, err=errR, multithread=TRUE)
Merge paried reads: merge forward and reverse reads
mergers <- mergePairs(dadaFs, filtFs, dadaRs, filtRs, verbose=TRUE) #merge forward and reverse pairs
Construct sequence table
seqtab <- makeSequenceTable(mergers)
table(nchar(getSequences(seqtab)))
##
## 270 278 279 280 284 285 286 287 293 306 309 314 319
## 60 1 1 1 1 3 3 1 3 2 1 2 1
## 320 321 322 323 324 325 326 327 333 335 336 339 342
## 1 2 2 3 2 3 1 2 2 4 1 3 7
## 343 344 345 347 348 349 350 352 359 366 367 368 369
## 10 1 1 2 2 1 1 4 1 3 4 1 2
## 370 371 372 373 374 375 376 379 380 381 382 383 384
## 1 1 5 1 3 1 8 9 8 12 4 2 1
## 385 386 387 388 389 390 391 392 393 394 395 396 398
## 2 1 2 1 3 18 7 66 18 134 43 2 3
## 399 400 401 402 403 404 405 406 407 408 409 410 411
## 1 4 8 6 6 4 1 5 4 11 196 6744 7518
## 412 413 414 415 416 417 418 419 420 421 422 423 424
## 2416 4864 401 126 22 9 20 17 7 28 1047 113 21
## 425 426 427 428 429 430 431 432 433 434 435 436 437
## 17 5 64 74 621 3163 1962 784 114 659 10080 2175 156
## 438 439 440 441 442 443 445 446 447 448
## 11 2 3 1 8 3 2 1 6 1
Most sequences between 410 and 436 bp, makes sense
Remove chimeras: identified by determining if a sequence can be exactly reconstructed by combining a left-segment and a right-segment from two or more abundant “parent” sequences. (chimeras= artifact sequences form by two biological sequences that are incorrectly joined together usually as a consequence of PCR)
seqtab.nochim <- removeBimeraDenovo(seqtab, method="consensus", multithread=TRUE, verbose=TRUE) #remove chimeras
percentage of sequences leftover after chimera removal:
sum(seqtab.nochim)/sum(seqtab)
## [1] 0.9019542
Track reads through pipeline:
getN <- function(x) sum(getUniques(x))
track <- cbind(out, sapply(dadaFs, getN), sapply(dadaRs, getN), sapply(mergers, getN), rowSums(seqtab.nochim))
colnames(track) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim")
rownames(track) <- sample.names
track[7:12,]
## input filtered denoisedF denoisedR merged nonchim
## R1-022120-A 189690 138582 133118 136093 128772 116988
## R1-022120-B 226794 161407 154884 158442 149414 134179
## R1-022120-C 193742 140165 135468 137806 131601 120892
## R1-030220-A 140404 99006 94355 96820 91068 83743
## R1-030220-B 158358 115734 110783 113506 107025 98235
## R1-030220-C 187818 133261 128432 130980 124755 114158
looks good, no major loss of reads after trimming step.
Assign taxonomy
taxa <- assignTaxonomy(seqtab.nochim, "silva_nr99_v138_train_set.fa.gz", multithread=TRUE)
taxa.print <- taxa
rownames(taxa.print) <- NULL
head(taxa.print)
## Kingdom Phylum Class Order
## [1,] "Bacteria" "Spirochaetota" "Spirochaetia" "Spirochaetales"
## [2,] "Bacteria" "Thermotogota" "Thermotogae" "Thermotogales"
## [3,] "Bacteria" "Bacteroidota" "Bacteroidia" "Sphingobacteriales"
## [4,] "Bacteria" "Bacteroidota" "Bacteroidia" "Sphingobacteriales"
## [5,] "Bacteria" "Firmicutes" "Clostridia" "Hungateiclostridiaceae"
## [6,] "Bacteria" "Firmicutes" "Clostridia" "Hungateiclostridiaceae"
## Family Genus
## [1,] "Spirochaetaceae" NA
## [2,] "Fervidobacteriaceae" "Fervidobacterium"
## [3,] "Lentimicrobiaceae" "Lentimicrobium"
## [4,] "Lentimicrobiaceae" "Lentimicrobium"
## [5,] "Ruminiclostridium" NA
## [6,] "Pseudobacteroides" NA
Load metadata
sample_data <- read.csv("data_for_R.csv", header=TRUE, row.names = 1)
sample_data <- as.data.frame(sample_data)
sample_data$OLR <- as.factor(sample_data$OLR)
sample_data$Reactor <- as.factor(sample_data$Reactor)
sample_data$OLR_3 <- as.factor(sample_data$OLR_3)
sample_data$reactor_2 <- as.factor(sample_data$reactor_2)
head(sample_data)
Make phyloseq object
ASV <- otu_table(seqtab.nochim, taxa_are_rows = FALSE)
TAX <- tax_table(taxa)
META <- sample_data(sample_data)
ps <- phyloseq(ASV, TAX, META)
dna <- Biostrings::DNAStringSet(taxa_names(ps))
names(dna) <- taxa_names(ps)
ps <- merge_phyloseq(ps, dna)
taxa_names(ps) <- paste0("ASV", seq(ntaxa(ps)))
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 7922 taxa and 96 samples ]
## sample_data() Sample Data: [ 96 samples by 31 sample variables ]
## tax_table() Taxonomy Table: [ 7922 taxa by 6 taxonomic ranks ]
## refseq() DNAStringSet: [ 7922 reference sequences ]
Prevalence method: prevalence (presence/absence across samples) of each sequence in a true sample is compared to the prevalence in negative control samples.
ps_nomock <- prune_samples(sample_data(ps)$Reactor != "MOCK", ps) #remove mock samples
sample_data(ps_nomock)$is.neg <- sample_data(ps_nomock)$Reactor == "NEGCONTROL" #set negative control samples
contamdf.prev <- isContaminant(ps_nomock, method="prevalence", neg="is.neg")
table(contamdf.prev$contaminant)
##
## FALSE TRUE
## 7847 75
remove contamants from dataset
ps.nocontam <- prune_taxa(!contamdf.prev$contaminant, ps)
Mock community
ps_MOCK <- prune_samples(sample_data(ps)$Reactor == "MOCK", ps)
mock_class <- tax_glom(ps_MOCK, taxrank="Class")
mock_class <- transform_sample_counts(mock_class, function(x)100* x / sum(x))
mock_class <- filter_taxa(mock_class, function(x) mean(x) > 0.05, TRUE)
OTU_mock <- otu_table(mock_class, taxa_are_rows=TRUE)
TAX_mock <- tax_table(mock_class)[,"Class"]
OTU_mock <- as.data.frame(OTU_mock)
OTU_mock <- t(OTU_mock)
TAX_mock <- as.data.frame(TAX_mock)
mock_class <- merge(OTU_mock, TAX_mock, by="row.names")
mock_class
mock_class_actual <- read.csv("mock_class.csv", header=TRUE)
mock_class_actual
mock_order <- tax_glom(ps_MOCK, taxrank="Order")
mock_order <- transform_sample_counts(mock_order, function(x)100* x / sum(x))
mock_order = filter_taxa(mock_order, function(x) mean(x) > 0.05, TRUE)
OTU_mock.2 <- otu_table(mock_order, taxa_are_rows= TRUE)
TAX_mock.2 <- tax_table(mock_order)[,"Order"]
OTU_mock.2 <- as.data.frame(OTU_mock.2)
OTU_mock.2 <- t(OTU_mock.2)
TAX_mock.2 <- as.data.frame(TAX_mock.2)
mock_order <- merge(OTU_mock.2, TAX_mock.2, by="row.names")
mock_order
mock_order_actual <- read.csv("mock_order.csv", header=FALSE)
mock_order_actual
remove mock and control samples
ps.all <- prune_samples(sample_data(ps.nocontam)$Reactor != "MOCK", ps.nocontam)
ps.all <- prune_samples(sample_data(ps.all)$Reactor != "NEGCONTROL", ps.all)
ps.all_rel <- transform_sample_counts(ps.all, function(x) x/sum(x))
all samples, triplicates look good
p_unmerged <- tax_glom(ps.all_rel, taxrank="Phylum")
p_unmerged <- plot_bar(p_unmerged, fill = "Phylum")
data <- p_unmerged$data
data$Phylum <- as.character(data$Phylum)
data$Phylum[data$Abundance < 0.01] <- "< 1% abund."
p_unmerged$data <- data
p_unmerged <- ggplot(data=data, aes(x=Sample, y=Abundance, fill=Phylum))
p_unmerged <- p_unmerged + geom_bar(aes(), stat="identity", position="stack", colour="black") + theme(axis.text.x = element_text(angle = 270))
p_unmerged
merge replicates
ps_merged <- merge_samples(ps.all, "merge")
ps_merged <- prune_taxa(taxa_sums(ps_merged) > 1, ps_merged) #remove singletons
#fix merging turning factors into numbers
sample_data(ps_merged)$Sample <- sample_names(ps_merged)
#relative abundance, filter out taxa that have mean >0.00001
ps_merged.rel <- transform_sample_counts(ps_merged, function(x) x/sum(x))
ps_merged.filt <- filter_taxa(ps_merged.rel, function(x) mean(x) > 1e-5, TRUE)
bar plot- all samples, merged, phylum level
p_all <- tax_glom(ps_merged.rel, taxrank="Phylum")
p_all <- plot_bar(p_all, fill = "Phylum")
data <- p_all$data
data$Phylum <- as.character(data$Phylum)
data$Phylum[data$Abundance < 0.01] <- "< 1% abund."
data <- data %>%
mutate(OLR_3, OLR_3 = case_when(as.character(OLR_3) == "1"~"1_A", as.character(OLR_3) =="2"~"1_B", as.character(OLR_3) =="3"~"2_A", as.character(OLR_3) =="4"~"2_B", as.character(OLR_3) =="5"~"3_A", as.character(OLR_3) =="6"~"3_B", TRUE ~ as.character(OLR_3)))
data <- data %>%
mutate(reactor_2, reactor_2 = case_when(as.character(reactor_2) == "1"~"CNTRL", as.character(reactor_2) =="2"~"CS SOLIDS", as.character(reactor_2) =="3"~"SG SOLIDS", as.character(reactor_2) =="4"~"WW+CS", as.character(reactor_2) =="5"~"WW+SG", TRUE ~ as.character(reactor_2)))
data <- data %>%
mutate(reactor_2 = factor(reactor_2, levels = c("CNTRL", "WW+SG", "SG SOLIDS", "WW+CS", "CS SOLIDS")))
p_all$data <- data
p_all <- ggplot(data=data, aes(x=OLR_3, y=Abundance, fill=Phylum))
p_all <- p_all + geom_bar(aes(), stat="identity", position="stack", colour="black") + labs(title= "A)") + facet_wrap(~reactor_2, nrow=1) +
scale_fill_manual(values = cbPalette) + theme(axis.text.x = element_text(angle = 270)) + xlab("OLR") + theme(text = element_text(size =15))
p_all
bar plot- all samples, merged, class level
p_all_class <- tax_glom(ps_merged.rel, taxrank="Class")
p_all_class <- plot_bar(p_all_class, fill = "Class")
data <- p_all_class$data
data$Class <- as.character(data$Class)
data$Class[data$Abundance < 0.01] <- "< 1% abund."
data <- data %>%
mutate(OLR_3, OLR_3 = case_when(as.character(OLR_3) == "1"~"1_A", as.character(OLR_3) =="2"~"1_B", as.character(OLR_3) =="3"~"2_A", as.character(OLR_3) =="4"~"2_B", as.character(OLR_3) =="5"~"3_A", as.character(OLR_3) =="6"~"3_B", TRUE ~ as.character(OLR_3)))
data <- data %>%
mutate(reactor_2, reactor_2 = case_when(as.character(reactor_2) == "1"~"CNTRL", as.character(reactor_2) =="2"~"CS SOLIDS", as.character(reactor_2) =="3"~"SG SOLIDS", as.character(reactor_2) =="4"~"WW+CS", as.character(reactor_2) =="5"~"WW+SG", TRUE ~ as.character(reactor_2)))
data <- data %>%
mutate(reactor_2 = factor(reactor_2, levels = c("CNTRL", "WW+SG", "SG SOLIDS", "WW+CS", "CS SOLIDS")))
p_all_class$data <- data
p_all_class <- ggplot(data=data, aes(x=OLR_3, y=Abundance, fill=Class))
p_all_class <- p_all_class + labs(title= "B)") + geom_bar(aes(), stat="identity", position="stack", colour="black") + facet_wrap(~reactor_2, nrow=1) +
scale_fill_manual(values = cbPalette3) + theme(axis.text.x = element_text(angle = 270)) + xlab("OLR") + theme(text = element_text(size =15)) + theme(plot.title = element_text(hjust = 0))
p_all_class
p_all_order <- tax_glom(ps_merged.rel, taxrank="Order")
p_all_order <- plot_bar(p_all_order, fill = "Order")
data <- p_all_order$data
data$Order <- as.character(data$Order)
data$Order[data$Abundance < 0.01] <- "< 1% abund."
data <- data %>%
mutate(OLR_3, OLR_3 = case_when(as.character(OLR_3) == "1"~"OLR1_1", as.character(OLR_3) =="2"~"OLR1_2", as.character(OLR_3) =="3"~"OLR2_1", as.character(OLR_3) =="4"~"OLR2_2", as.character(OLR_3) =="5"~"OLR3_1", as.character(OLR_3) =="6"~"OLR3_2", TRUE ~ as.character(OLR_3)))
data <- data %>%
mutate(reactor_2, reactor_2 = case_when(as.character(reactor_2) == "1"~"CNTRL", as.character(reactor_2) =="2"~"CS SOLIDS", as.character(reactor_2) =="3"~"SG SOLIDS", as.character(reactor_2) =="4"~"WW+CS", as.character(reactor_2) =="5"~"WW+SG", TRUE ~ as.character(reactor_2)))
data <- data %>%
mutate(reactor_2 = factor(reactor_2, levels = c("CNTRL", "WW+SG", "SG SOLIDS", "WW+CS", "CS SOLIDS")))
p_all_order$data <- data
p_all_order <- ggplot(data=data, aes(x=OLR_3, y=Abundance, fill=Order))
p_all_order <- p_all_order + geom_bar(aes(), stat="identity", position="stack", colour="black") + facet_wrap(~Reactor, nrow=1) +
scale_fill_manual(values = cbPalette) + theme(axis.text.x = element_text(angle = 270))
p_all_order
Archaea (methanogens)
ps_methano <- subset_taxa(ps_merged, Kingdom == "Archaea")
ps_methano_rel <- transform_sample_counts(ps_methano, function(x) x/sum(x))
p_methano <- tax_glom(ps_methano_rel, taxrank="Class")
p_methano <- plot_bar(p_methano, fill = "Class")
data <- p_methano$data
data <- data %>%
mutate(OLR_3, OLR_3 = case_when(as.character(OLR_3) == "1"~"OLR1_1", as.character(OLR_3) =="2"~"OLR1_2", as.character(OLR_3) =="3"~"OLR2_1", as.character(OLR_3) =="4"~"OLR2_2", as.character(OLR_3) =="5"~"OLR3_1", as.character(OLR_3) =="6"~"OLR3_2", TRUE ~ as.character(OLR_3)))
data <- data %>%
mutate(reactor_2, reactor_2 = case_when(as.character(reactor_2) == "1"~"CNTRL", as.character(reactor_2) =="2"~"CS SOLIDS", as.character(reactor_2) =="3"~"SG SOLIDS", as.character(reactor_2) =="4"~"WW+CS", as.character(reactor_2) =="5"~"WW+SG", TRUE ~ as.character(reactor_2)))
data <- data %>%
mutate(reactor_2 = factor(reactor_2, levels = c("CNTRL", "WW+SG", "SG SOLIDS", "WW+CS", "CS SOLIDS")))
p_methano$data <- data
p_methano <- ggplot(data=data, aes(x=OLR_3, y=Abundance, fill=Class))
p_methano <- p_methano + geom_bar(aes(), stat="identity", position="stack", colour="black") + facet_wrap(~reactor_2, nrow=1) +
scale_fill_manual(values = cbPalette2) + theme(axis.text.x = element_text(angle = 270))
p_methano
clr transform the dataset to normalize– log ratio of sample divided by geometric mean of all samples. Ratio transformations capture relationship between features in the dataset and the ratios are the sample whether the data are counts or proportions. Log ratio makes the data symmetric and linearly related and places data in a log-ratio coordinate space. (Gloor et al., 2017)
ps.clr <- prune_samples(sample_data(ps.nocontam)$Reactor != "MOCK", ps.nocontam)
ps.clr <- prune_samples(sample_data(ps.clr)$Reactor != "NEGCONTROL", ps.clr)
asv.onlysamples <- as(otu_table(ps.clr), "matrix")
asv.onlysamples = as.data.frame(asv.onlysamples)
clr <- clr(asv.onlysamples)
clr <- as.data.frame(clr)
min(clr) #-3.37
## [1] -3.370143
clr <- clr + 4
NMDS using bray-curtis dissimilarity distance with clr-transformed data
ord1 <- metaMDS(clr, distance = "bray", trymax=99, autotransform = FALSE)
stressplot shows good fit of the data
stressplot(ord1)
p1_ord1 <- plot_ordination(ps.clr, ord1, type="samples", color="Reactor", shape="OLR")
data <- p1_ord1$data
data <- data %>%
mutate(Reactor, Reactor = case_when(as.character(Reactor) == "1"~"CNTRL", as.character(Reactor) =="2"~"WW+SG", as.character(Reactor) =="2S"~"SG SOLIDS", as.character(Reactor) =="3"~"WW+CS", as.character(Reactor) =="3S"~"CS SOLIDS", TRUE ~ as.character(Reactor)))
data <- data %>%
mutate(Reactor = factor(Reactor, levels = c("CNTRL", "WW+SG", "SG SOLIDS", "WW+CS", "CS SOLIDS")))
p1_ord1$data <- data
p1_ord1 <- p1_ord1 + geom_point(size=4) +
theme(text = element_text(size =15)) +
scale_color_manual(values = cbPalette2) + labs(title= "A)")
p1_ord1
Same for co-digester samples only
ps.co <- prune_samples(sample_data(ps.clr)$Reactor != "1", ps.clr) #remove control samples
asv.onlysamples <- as(otu_table(ps.co), "matrix")
asv.onlysamples = as.data.frame(asv.onlysamples)
clr <- clr(asv.onlysamples)
clr <- as.data.frame(clr)
min(clr)
## [1] -3.370143
clr <- clr + 4
ord2 <- metaMDS(clr, distance = "bray", trymax=99, autotransform = FALSE)
stressplot shows good fit of the data
stressplot(ord2)
p1_ord2 <- plot_ordination(ps.co, ord2, type="samples", color="Reactor", shape="OLR")
data <- p1_ord2$data
data <- data %>%
mutate(Reactor, Reactor = case_when(as.character(Reactor) =="2"~"WW+SG", as.character(Reactor) =="2S"~"SG SOLIDS", as.character(Reactor) =="3"~"WW+CS", as.character(Reactor) =="3S"~"CS SOLIDS", TRUE ~ as.character(Reactor)))
data <- data %>%
mutate(Reactor = factor(Reactor, levels = c("WW+SG", "SG SOLIDS", "WW+CS", "CS SOLIDS")))
p1_ord2$data <- data
p1_ord2 <- p1_ord2 + geom_point(size=4) +
theme(text = element_text(size =15)) +
scale_color_manual(values = cbPalette4) + labs(title= "B)")
p1_ord2
ANCOM (analysis of composition of microbiomes). Pair-wise tests on the log-ratio of ASVs. (Mandal et al., 2015)
#filter out unwanted samples
ps.SG.CNTRL <- prune_samples(sample_data(ps.all)$Reactor != "3S", ps.all)
ps.SG.CNTRL <- prune_samples(sample_data(ps.SG.CNTRL)$Reactor != "3", ps.SG.CNTRL)
ps.SG.CNTRL <- prune_samples(sample_data(ps.SG.CNTRL)$Reactor != "2S", ps.SG.CNTRL)
asv.SG.CNTRL <- t(as(otu_table(ps.SG.CNTRL), "matrix")) #format asv matrix
meta.R2.1 <- read.csv("R2_1_meta.csv", header=TRUE) #import metadata
meta_data <- (meta.R2.1)
feature_table = asv.SG.CNTRL; sample_var = "X.NAME"; group_var = NULL #sample_var=column in metadata table with sample names, group_var= OLR
out_cut = 0.05; zero_cut = 0.90; lib_cut = 0; neg_lb = FALSE #out_cut= each taxon with proportion of mixture distribution less than out_cut are outlier zeros and those with 1- out_cut are outlier values; zero_cut= taxa with proportion of 0s greater than zero_cut are not included; lib_cut= libraries size less than not included. Want to include all so =0
prepro = feature_table_pre_process(feature_table, meta_data, sample_var, group_var,
out_cut, zero_cut, lib_cut, neg_lb)
feature_table = prepro$feature_table # Preprocessed feature table
meta_data = prepro$meta_data # Preprocessed metadata
struc_zero = prepro$structure_zeros # Structural zero info
main_var = "Reactor"; p_adj_method = "BH"; alpha = 0.05 ; adj_formula = "OLR" ; rand_formula=NULL #adj_formula- account for differences in organic loading rate
res = ANCOM(feature_table, meta_data, struc_zero, main_var, p_adj_method,
alpha, adj_formula, rand_formula)
ps.CS.CNTRL <- prune_samples(sample_data(ps.all)$Reactor != "2S", ps.all)
ps.CS.CNTRL <- prune_samples(sample_data(ps.CS.CNTRL)$Reactor != "2", ps.CS.CNTRL)
ps.CS.CNTRL <- prune_samples(sample_data(ps.CS.CNTRL)$Reactor != "3S", ps.CS.CNTRL)
asv.CS.CNTRL <- t(as(otu_table(ps.CS.CNTRL), "matrix")) #format asv matrix
meta.R3.1 <- read.csv("R3_1_meta.csv", header=TRUE) #import metadata
meta_data <- (meta.R3.1)
feature_table = asv.CS.CNTRL; sample_var = "X.NAME"; group_var = NULL #sample_var=column in metadata table with sample names, group_var= OLR
out_cut = 0.05; zero_cut = 0.90; lib_cut = 0; neg_lb = FALSE #out_cut= each taxon with proportion of mixture distribution less than out_cut are outlier zeros and those with 1- out_cut are outlier values; zero_cut= taxa with proportion of 0s greater than zero_cut are not included; lib_cut= libraries size less than not included. Want to include all so =0; neg_lb=
prepro = feature_table_pre_process(feature_table, meta_data, sample_var, group_var,
out_cut, zero_cut, lib_cut, neg_lb)
feature_table = prepro$feature_table # Preprocessed feature table
meta_data = prepro$meta_data # Preprocessed metadata
struc_zero = prepro$structure_zeros # Structural zero info
main_var = "Reactor"; p_adj_method = "BH"; alpha = 0.05 ; adj_formula = "OLR" ; rand_formula=NULL
res = ANCOM(feature_table, meta_data, struc_zero, main_var, p_adj_method,
alpha, adj_formula, rand_formula)
ps.SG.SOLIDS <- prune_samples(sample_data(ps.all)$Reactor != "1", ps.all)
ps.SG.SOLIDS <- prune_samples(sample_data(ps.SG.SOLIDS)$Reactor != "3S", ps.SG.SOLIDS)
ps.SG.SOLIDS <- prune_samples(sample_data(ps.SG.SOLIDS)$Reactor != "3", ps.SG.SOLIDS)
asv.SG.SOLIDS <- t(as(otu_table(ps.SG.SOLIDS), "matrix"))
meta.R2 <- read.csv("R2_meta.csv", header=TRUE)
meta_data <- (meta.R2)
feature_table = asv.SG.SOLIDS; sample_var = "X.NAME"; group_var = NULL
out_cut = 0.05; zero_cut = 0.90; lib_cut = 0; neg_lb = FALSE
prepro = feature_table_pre_process(feature_table, meta_data, sample_var, group_var,
out_cut, zero_cut, lib_cut, neg_lb)
feature_table = prepro$feature_table # Preprocessed feature table
meta_data = prepro$meta_data # Preprocessed metadata
struc_zero = prepro$structure_zeros # Structural zero info
main_var = "Reactor"; p_adj_method = "BH"; alpha = 0.05 ; adj_formula = "OLR" ; rand_formula=NULL
res = ANCOM(feature_table, meta_data, struc_zero, main_var, p_adj_method,
alpha, adj_formula, rand_formula)
head(res$out)
ps.CS.SOLIDS <- prune_samples(sample_data(ps.all)$Reactor != "1", ps.all)
ps.CS.SOLIDS <- prune_samples(sample_data(ps.CS.SOLIDS)$Reactor != "2S", ps.CS.SOLIDS)
ps.CS.SOLIDS <- prune_samples(sample_data(ps.CS.SOLIDS)$Reactor != "2", ps.CS.SOLIDS)
asv.CS.SOLIDS <- t(as(otu_table(ps.CS.SOLIDS), "matrix"))
meta.R3 <- read.csv("R3_meta.csv", header=TRUE)
meta_data <- (meta.R3)
feature_table = asv.CS.SOLIDS; sample_var = "X.NAME"; group_var = NULL ;
out_cut = 0.05; zero_cut = 0.90; lib_cut = 0; neg_lb = FALSE
prepro = feature_table_pre_process(feature_table, meta_data, sample_var, group_var,
out_cut, zero_cut, lib_cut, neg_lb)
feature_table = prepro$feature_table # Preprocessed feature table
meta_data = prepro$meta_data # Preprocessed metadata
struc_zero = prepro$structure_zeros # Structural zero info
main_var = "Reactor"; p_adj_method = "BH"; alpha = 0.05 ; adj_formula = "OLR" ; rand_formula=NULL
res = ANCOM(feature_table, meta_data, struc_zero, main_var, p_adj_method,
alpha, adj_formula, rand_formula)
head(res$out)
ANCOM-BC (analysis of compositions of microbiome with bias correction). Corrects bias introduced from unknown sampling fractions by introducing a sample-specific offset term to the linear regression framework. Linear regression is in log-scale. (Lin and Peddada, 2020) #### WW+SG vs CNTRL
out = ancombc(phyloseq=ps.SG.CNTRL, formula= "Reactor + OLR", p_adj_method = "holm", zero_cut = 0.90, lib_cut = 0, group= NULL, struc_zero = FALSE, neg_lb= FALSE, tol = 1e-5, max_iter=100, alpha=0.05, global= FALSE)
res= out$res
diff_abn <- res$diff_abn
head(diff_abn)
out = ancombc(phyloseq=ps.CS.CNTRL, formula= "Reactor + OLR", p_adj_method = "holm", zero_cut = 0.90, lib_cut = 0, group= NULL, struc_zero = FALSE, neg_lb= FALSE, tol = 1e-5, max_iter=100, alpha=0.05, global= FALSE)
res= out$res
diff_abn <- res$diff_abn
head(diff_abn)
out = ancombc(phyloseq=ps.SG.SOLIDS, formula= "Reactor + OLR", p_adj_method = "holm", zero_cut = 0.90, lib_cut = 0, group= NULL, struc_zero = FALSE, neg_lb= FALSE, tol = 1e-5, max_iter=100, alpha=0.05, global= FALSE)
res= out$res
diff_abn.2 <- res$diff_abn
head(diff_abn.2)
out = ancombc(phyloseq=ps.CS.SOLIDS, formula= "Reactor + OLR", p_adj_method = "holm", zero_cut = 0.90, lib_cut = 0, group= NULL, struc_zero = FALSE, neg_lb= FALSE, tol = 1e-5, max_iter=100, alpha=0.05, global= FALSE)
res= out$res
diff_abn.3 <- res$diff_abn
head(diff_abn.3)