Custom scripts
Before analyzing, run the following codes: ## Turn OTU table into
matrix
otu.matrix <- function(ps, force.taxa.cols = TRUE) {
mat <- as(phyloseq::otu_table(ps), "matrix")
if (taxa_are_rows(ps) & force.taxa.cols) {
mat <- t(mat)
}
return(mat)
}
Scripts of ggrare
ggrare <- function(physeq, step = 10, label = NULL, color = NULL,
plot = TRUE, parallel = FALSE, se = TRUE) {
require("ggplot2")
x <- as(phyloseq::otu_table(physeq), "matrix")
if (phyloseq::taxa_are_rows(physeq)) x <- t(x)
## This script is adapted from vegan `rarecurve` function
tot <- rowSums(x)
S <- rowSums(x > 0)
nr <- nrow(x)
rarefun <- function(i) {
cat(paste("rarefying sample", rownames(x)[i]), sep = "\n")
n <- seq(1, tot[i], by = step)
if (n[length(n)] != tot[i]) {
n <- c(n, tot[i])
}
y <- vegan::rarefy(x[i, ,drop = FALSE], n, se = se)
if (nrow(y) != 1) {
rownames(y) <- c(".S", ".se")
return(data.frame(t(y), Size = n, Sample = rownames(x)[i]))
} else {
return(data.frame(.S = y[1, ], Size = n, Sample = rownames(x)[i]))
}
}
if (parallel) {
out <- parallel::mclapply(seq_len(nr), rarefun, mc.preschedule = FALSE)
} else {
out <- lapply(seq_len(nr), rarefun)
}
df <- do.call(rbind, out)
## Get sample data
if (!is.null(phyloseq::sample_data(physeq, FALSE))) {
sdf <- as(phyloseq::sample_data(physeq), "data.frame")
sdf$Sample <- rownames(sdf)
data <- merge(df, sdf, by = "Sample")
labels <- data.frame(x = tot, y = S, Sample = rownames(x))
labels <- merge(labels, sdf, by = "Sample")
}
## Add, any custom-supplied plot-mapped variables
if (length(color) > 1) {
data$color <- color
names(data)[names(data) == "color"] <- deparse(substitute(color))
color <- deparse(substitute(color))
}
if (length(label) > 1) {
labels$label <- label
names(labels)[names(labels) == "label"] <- deparse(substitute(label))
label <- deparse(substitute(label))
}
p <- ggplot(data = data,
aes_string(x = "Size", y = ".S",
group = "Sample", color = color)) +
labs(x = "Sample Size", y = "Species Richness")
if (!is.null(label)) {
p <- p + geom_text(data = labels,
aes_string(x = "x", y = "y",
label = label, color = color),
size = 4, hjust = 0)
}
p <- p + geom_line()
if (se) { ## add standard error if available
p <- p +
geom_ribbon(aes_string(ymin = ".S - .se", ymax = ".S + .se",
color = NULL, fill = color),
alpha = 0.2)
}
if (plot) {
plot(p)
}
invisible(p)
}
#' Check for normal distribution of alpha diversity measurements
#'
#'
#' @return Print ggplot rarefaction curves
#' @param rich A dataframe with alpha diversity indices as columns
#' and samples as rows as the output of pyloseq::estimate_richness()
#' @param file Output file name
indices_normality <- function(rich, nrow, ncol) {
### p-value < 0.05 means data failed normality test
par(mfrow = c(nrow, ncol))
for (i in names(rich)) {
shap <- shapiro.test(rich[, i])
qqnorm(rich[, i], main = i, sub = shap$p.value)
qqline(rich[, i])
}
par(mfrow = c(1, 1))
}
Script for ancombc
run_ancombc_patched <- function(
ps,
group,
confounders = character(0),
contrast = NULL,
taxa_rank = "all",
transform = c("identity", "log10", "log10p"),
norm = "none",
norm_para = list(),
p_adjust = c(
"none", "fdr", "bonferroni", "holm",
"hochberg", "hommel", "BH", "BY"
),
prv_cut = 0.1,
lib_cut = 0,
struc_zero = FALSE,
neg_lb = FALSE,
tol = 1e-05,
max_iter = 100,
conserve = FALSE,
pvalue_cutoff = 0.05) {
stopifnot(inherits(ps, "phyloseq"))
ps <- microbiomeMarker:::check_rank_names(ps) |>
microbiomeMarker:::check_taxa_rank(taxa_rank)
if (length(confounders)) {
confounders <- microbiomeMarker:::check_confounder(ps, group, confounders)
}
# if it contains missing values for any
# variable specified in the formula, the corresponding sampling fraction
# estimate for this sample will return NA since the sampling fraction is
# not estimable with the presence of missing values.
# remove this samples
fml_char <- ifelse(
length(confounders),
paste(c(confounders, group), collapse = " + "),
group
)
# fml_char <- paste(c(confounders, group), collapse = " + ")
# fml <- stats::as.formula(paste("~", fml_char))
# vars_fml <- all.vars(fml)
for (var in c(confounders, group)) {
ps <- microbiomeMarker:::remove_na_samples(ps, var)
}
# check whether group is valid, write a function
meta <- phyloseq::sample_data(ps)
groups <- meta[[group]]
groups <- make.names(groups)
if (!is.null(contrast)) {
contrast <- make.names(contrast)
}
if (!is.factor(groups)) {
groups <- factor(groups)
}
groups <- microbiomeMarker:::set_lvl(groups, contrast)
phyloseq::sample_data(ps)[[group]] <- groups
lvl <- levels(groups)
n_lvl <- length(lvl)
contrast <- microbiomeMarker:::check_contrast(contrast)
transform <- match.arg(transform, c("identity", "log10", "log10p"))
p_adjust <- match.arg(
p_adjust,
c(
"none", "fdr", "bonferroni", "holm",
"hochberg", "hommel", "BH", "BY"
)
)
# set the reference level for pair-wise comparison from mutliple groups
# if (!is.null(contrast) && n_lvl > 2) {
# groups <- relevel(groups, ref = contrast[1])
# sample_data(ps)[[group]] <- groups
# }
# preprocess phyloseq object
ps <- microbiomeMarker:::preprocess_ps(ps)
ps <- microbiomeMarker::transform_abundances(ps, transform = transform)
# normalize the data
norm_para <- c(norm_para, method = norm, object = list(ps))
ps_normed <- do.call(microbiomeMarker::normalize, norm_para)
ps_summarized <- microbiomeMarker:::pre_ps_taxa_rank(ps_normed, taxa_rank)
global <- ifelse(n_lvl > 2, TRUE, FALSE)
# ancombc differential abundance analysis
if (taxa_rank == "all") {
ancombc_taxa_rank <- phyloseq::rank_names(ps_summarized)[1]
} else if(taxa_rank == "none") {
ancombc_taxa_rank <- NULL
} else {
ancombc_taxa_rank <- taxa_rank
}
ancombc_out <- ANCOMBC::ancombc(
ps_summarized,
tax_level = ancombc_taxa_rank,
formula = fml_char,
p_adj_method = p_adjust,
prv_cut = prv_cut,
lib_cut = lib_cut,
group = group,
struc_zero = struc_zero,
neg_lb = neg_lb,
tol = tol,
max_iter = max_iter,
conserve = conserve,
alpha = pvalue_cutoff,
global = global
)
# multiple-group comparison will be performed while the group
# variable has > 2 levels
keep_var <- c("W", "p_val", "q_val", "diff_abn")
if (n_lvl > 2) {
# ANCOM-BC global test to determine taxa that are differentially
# abundant between three or more groups of multiple samples.
# global result to marker_table
if (is.null(contrast)) {
mtab <- ancombc_out$res_global
} else {
exp_lvl <- paste0(group, contrast[2])
ancombc_res <- ancombc_out$res
mtab <- lapply(keep_var, function(x) ancombc_res[[x]][exp_lvl])
mtab <- do.call(cbind, mtab)
}
} else {
ancombc_out_res <- ancombc_out$res
# drop intercept
ancombc_out_res <- lapply(
ancombc_out_res,
function(x) data.frame(x, row.names = "taxon")[-1])
mtab <- do.call(
cbind,
ancombc_out_res[c("W", "p_val", "q_val", "diff_abn")]
)
}
names(mtab) <- keep_var
# determine enrich group based on coefficients
# drop intercept
cf <- data.frame(ancombc_out$res$lfc, row.names = "taxon")[-1]
if (n_lvl > 2) {
if (!is.null(contrast)) {
cf <- cf[exp_lvl]
enrich_group <- ifelse(cf[[1]] > 0, contrast[2], contrast[1])
} else {
cf <- cbind(0, cf)
enrich_group <- lvl[apply(cf, 1, which.max)]
}
} else {
enrich_group <- ifelse(cf[[1]] > 0, lvl[2], lvl[1])
}
# # enriched group
enrich_abd <- microbiomeMarker:::get_ancombc_enrich_group(ps_summarized, ancombc_out, group)
norm_abd <- enrich_abd$abd
mtab <- cbind(feature = row.names(mtab), mtab, enrich_group)
mtab_sig <- mtab[mtab$diff_abn, ]
mtab_sig <- mtab_sig[c("feature", "enrich_group", "W", "p_val", "q_val")]
names(mtab_sig) <- c("feature", "enrich_group", "ef_W", "pvalue", "padj")
marker <- microbiomeMarker:::return_marker(mtab_sig, mtab)
marker <- microbiomeMarker::microbiomeMarker(
marker_table = marker,
norm_method = microbiomeMarker:::get_norm_method(norm),
diff_method = "ancombc",
sam_data = phyloseq::sample_data(ps_summarized),
otu_table = phyloseq::otu_table(norm_abd, taxa_are_rows = TRUE),
tax_table = phyloseq::tax_table(ps_summarized)
)
marker
}
Colors list (make your own)
library(RColorBrewer)
# Define the number of colors from each palette
num_colors <- 100
custom_colors <- colorRampPalette(brewer.pal(8, "Paired"))(num_colors)
custom_colors2 <- c( "#CBD588", "#5F7FC7","#DA5724", "#508578", "#CD9BCD",
"#AD6F3B", "#673770","#D14285", "#652926", "#C84248",
"#8569D5", "#5E738F","#D1A33D", "#8A7C64", "#599861", "black", "red", "green")
Main analysis
library("seqinr")
library(Biostrings)
library(dplyr)
library(phyloseq)
library(writexl)
library(tidyr)
library(ggplot2)
library(readr)
library(ggpubr)
library(reshape)
Load context
Importing ASV table of Nem
asv_table <- read.table("/Users/qcvp/R/outputs/dada2_total/asv_table/asv_table.tsv", sep="\t", row.names=1, header=T)
Invert the ASV table in case sample IDs are on row not column
asv_inverted<-as.data.frame(t(asv_table))
Load the ASV sequences (previously created with DADA2)
asv_seq <- readDNAStringSet(file = "/Users/qcvp/R/outputs/dada2_total/asv_table/asv.fasta",
format = "fasta",
nrec = -1L,
skip = 0L,
seek.first.rec = FALSE,
use.names = TRUE)
head(asv_seq, 1)
## DNAStringSet object of length 1:
## width seq names
## [1] 370 TACGGAAGGTCCGGGCGTTATCC...TGGGGAGTACGCCGGCAACGGTG ASV1
Load asv tax of Nem
asv_tax <- read.table("/Users/qcvp/R/outputs/dada2_total/asv_table/taxonomy.tsv", header = TRUE, row.names = 1, sep="\t")
asv_tax <- as.matrix(asv_tax)
head(asv_tax, 1)
## Kingdom Phylum Class Order Family
## ASV1 "Bacteria" "Bacteroidota" "Bacteroidia" "Bacteroidales" "Prevotellaceae"
## Genus
## ASV1 "Prevotella_9"
Making raw phyloseq objects
physeq_Nem_transect <- phyloseq(
otu_table(asv_inverted, taxa_are_rows = F), #if you have taxa on rows = T
sample_data(context),
refseq(asv_seq),
tax_table(asv_tax)
)
physeq_Nem_transect
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 111454 taxa and 412 samples ]
## sample_data() Sample Data: [ 412 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 111454 taxa by 6 taxonomic ranks ]
## refseq() DNAStringSet: [ 111454 reference sequences ]
Decontamination process
Remove MOCK and Control samples from the phyloseq object
physeq_Nem_transect_1 <- subset_samples(physeq_Nem_transect, !Biosample_type =="Mock")
physeq_Nem_transect_1
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 111454 taxa and 410 samples ]
## sample_data() Sample Data: [ 410 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 111454 taxa by 6 taxonomic ranks ]
## refseq() DNAStringSet: [ 111454 reference sequences ]
physeq_Nem_transect_2 <- subset_samples(physeq_Nem_transect_1, Biosample != "Control")
physeq_Nem_transect_2
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 111454 taxa and 403 samples ]
## sample_data() Sample Data: [ 403 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 111454 taxa by 6 taxonomic ranks ]
## refseq() DNAStringSet: [ 111454 reference sequences ]
Quick checking library size of each Biomsaple types
df <- as.data.frame(sample_data(physeq_Nem_transect_2)) # Put sample_data into a ggplot-friendly data.frame
df$LibrarySize <- sample_sums(physeq_Nem_transect_2)
df <- df[order(df$LibrarySize),]
df$Index <- seq(nrow(df))
ggplot(data=df, aes(x=Index, y=LibrarySize, color=as.factor(Biosample))) +
geom_point() + geom_text(aes(label=Index), hjust=2, nudge_y = 0.05)

Identification of contaminants - prevalence
Remove comtaminant ASVs
physeq_Nem_transect_3 <- subset_taxa(physeq_Nem_transect_2, !taxa_names(physeq_Nem_transect) %in% c("ASV2", "ASV8", "ASV52", "ASV97", "ASV128", "ASV318", "ASV356", "ASV471", "ASV708", "ASV914", "ASV1101", "ASV1127", "ASV1915", "ASV1952", "ASV2892", "ASV3517", "ASV4859", "ASV9739", "ASV26448", "ASV30253"))
physeq_Nem_transect_3
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 111434 taxa and 403 samples ]
## sample_data() Sample Data: [ 403 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 111434 taxa by 6 taxonomic ranks ]
## refseq() DNAStringSet: [ 111434 reference sequences ]
Known kit Contaminant detection
contaminant_genera <- read.table("/Users/qcvp/R/Contaminant.txt", header = T)
a <- contaminant_genera[, ]
physeq_Nem_transect_4 <- subset_taxa(physeq_Nem_transect_3, !Genus %in% a)
physeq_Nem_transect_4
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 109183 taxa and 403 samples ]
## sample_data() Sample Data: [ 403 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 109183 taxa by 6 taxonomic ranks ]
## refseq() DNAStringSet: [ 109183 reference sequences ]
Checking data
#Tax
whole_bacteria_ASV_TAX <- data.frame(physeq_Nem_transect@tax_table)
ID <- rownames(whole_bacteria_ASV_TAX)
whole_bacteria_ASV_TAX <- cbind(ID=ID, whole_bacteria_ASV_TAX)
#Dataset with Relative abundance
physeq_Nem_transect_rel <- transform_sample_counts(physeq_Nem_transect, function(x) x / sum(x) )
rowSums(physeq_Nem_transect_rel@otu_table@.Data)
## BD.S.1.M BD.S.2.M BD.S.3.M BD.S.4.B BD.S.5.B
## 1 1 1 1 1
## BD.S.6.B BD.W.1.M BD.W.2.M BD.W.3.M BD.W.4.B
## 1 1 1 1 1
## BD.W.5.B BD.W.6.B Bds1b Bds2b Bds3b
## 1 1 1 1 1
## Bdw1b Bdw2b Bdw3b BioFilm.21.1 BioFilm.21.2
## 1 1 1 1 1
## BioFilm.21.3 CR.S.1.M CR.S.2.M CR.S.3.M CR.S.4.B
## 1 1 1 1 1
## CR.S.5.B CR.S.6.B CR.W.1.M CR.W.2.M CR.W.3.M
## 1 1 1 1 1
## CR.W.4.B CR.W.5.B CR.W.6.B F1s1b F1s2b
## 1 1 1 1 1
## F1s3b F1w1b F1w2b F1w3b F2s1b
## 1 1 1 1 1
## F2s2b F2s3b F2w1b F2w2b F2w3b
## 1 1 1 1 1
## F3s1b F3s2b F3s3b F3w1b F3w2b
## 1 1 1 1 1
## F3w3b F4s1b F4s2b F4s3b F4w1b
## 1 1 1 1 1
## F4w2b F4w3b Fi.10.3.GM Fi.10.4.GB Fi.10.5.GB
## 1 1 1 1 1
## Fi.10.6.GB Fi.12.10.GB Fi.12.11.GB Fi.12.13.GB Fi.12.4.GB
## 1 1 1 1 1
## Fi.12.5.GB Fi.12.6.GB Fi.12.7.GB Fi.12.8.GB Fi.12.9.GB
## 1 1 1 1 1
## Fi.13.1.GM Fi.13.2.GB Fi.13.3.GB Fi.14.1.GM Fi.14.2.GM
## 1 1 1 1 1
## Fi.14.5.GB Fi.14.6.GB Fi.17.8.GM Fi.18.1.GB Fi.18.2.GB
## 1 1 1 1 1
## Fi.18.3.GB Fi.19.1.GB Fi.20.1.GB Fi.20.2.GB Fi.3.6.GB
## 1 1 1 1 1
## Fi.3.7.GB Fi.4.1.GB Fi.4.3.GM Fi.4.4.GB Fi.5.3.GB
## 1 1 NaN 1 1
## Fi.5.4.GB Fi.5.5.GB Fi.6.4.GB Fi.6.5.GB Fi.6.6.GM
## 1 1 1 1 1
## Fi.6.7.GB Fi.6.8.GB Fi.6.9.GB Fi10.1gb Fi10.2gb
## 1 1 1 1 1
## Fi11.1gb Fi11.2gb Fi12.1gb Fi12.2gb Fi12.3gb
## 1 1 1 1 1
## Fi3.1gb Fi3.2gb Fi3.3gb Fi3.4gb Fi3.5gb
## 1 1 1 1 1
## Fi5.1gb Fi5.2gb Fi6.1gb Fi6.2gb Fi6.3gb
## 1 1 1 1 1
## Fi7.1gb Fi7.2gb Fi7.3gb Fi7.4gb Fi7.5gb
## 1 1 1 1 1
## Fo.10.3.BM Fo.10.4.BM Fo.15.1.BM Fo.15.2.BM Fo.15.3.BM
## 1 1 1 1 1
## Fo.17.1.BM Fo.17.2.BM Fo.17.3.BM Fo.17.5.BM Fo.4.1.BM
## 1 1 1 1 1
## Fo.4.2.BM Fo.4.3.BM Fo.4.4.BM Fo.4.5.BM HM.S.1.M
## 1 1 1 1 1
## HM.S.2.M HM.S.3.M HM.S.4.B HM.S.5.B HM.S.6.B
## 1 1 1 1 1
## HM.W.1.M HM.W.2.M HM.W.3.M HM.W.4.B HM.W.5.B
## 1 1 1 1 1
## HM.W.6.B Hu.18.GM Hu.18.SM Hu.19.GM Hu.19.SM
## 1 1 1 1 1
## Hu.20.GM Hu.20.SM Hu.21.GM Hu.21.SM Hu.22.GM
## 1 1 1 1 1
## Hu.22.SM Hu.23.GM Hu.23.SM Hu.25.GM Hu.25.SM
## 1 1 1 1 1
## Hu.26.GM Hu.26.SM Hu.27.GM Hu.27.SM Hu.28.GM
## 1 1 1 1 1
## Hu.28.SM Hu.29.GM Hu.29.SM Hu.30.GM Hu.30.SM
## 1 1 1 1 1
## Hu.31.GM Hu.31.SM Hu.32.GM Hu.32.SM Hu.33.GM
## 1 1 1 1 1
## Hu.33.SM Hu.34.GM Hu.34.SM Hu.35.GM Hu.35.SM
## 1 1 1 1 1
## Hu.36.GM Hu.36.SM Hu.37.GM Hu.37.SM Hu.38.GM
## 1 1 1 1 1
## Hu.38.SM Hu.39.GM Hu.39.SM Hu.40.SM Hu.41.GM
## 1 1 1 1 1
## Hu.41.SM Hu.43.GM Hu.43.SM Hu.44.GM Hu.44.SM
## 1 1 1 1 1
## Hu.45.GB Hu.45.SB Hu.46.GB Hu.46.SB Hu.47.GB
## 1 1 1 1 1
## Hu.47.SB Hu.48.GB Hu.48.SB Hu.49.GB Hu.49.SB
## 1 1 1 1 1
## Hu.50.GB Hu.50.SB Hu.51.GB Hu.51.SB Hu.52.GB
## 1 1 1 1 1
## Hu.52.SB Hu.53.GB Hu.53.SB Hu.54.GB Hu.54.SB
## 1 1 1 1 1
## Hu.55.GB Hu.55.SB Hu.56.GB Hu.56.SB Hu.57.GB
## 1 1 1 1 1
## Hu.57.SB Hu.58.GB Hu.58.SB Hu.59.GB Hu.59.SB
## 1 1 1 1 1
## Hu.60.GB Hu.60.SB Hu.61.GB Hu.61.SB Hu.63.GB
## 1 1 1 1 1
## Hu.63.SB Hu.64.GB Hu.64.SB Hu.66.GB Hu.66.SB
## 1 1 1 1 1
## Hu.67.GB Hu.67.SB Hu.68.GB Hu.68.SB Hu.69.GB
## 1 1 1 1 1
## Hu.69.SB Hu.70.GB Hu.70.SB Hu.71.GB Hu.71.SB
## 1 1 1 1 1
## Hu10.1.biof.6 Hu10.1gb Hu10.1sb Hu10.2.biof.6 Hu10.3.biof.6
## 1 1 1 1 1
## Hu11.1.biof.7 Hu11.1gb Hu11.1sb Hu11.2.biof.7 Hu11.3.biof.7
## 1 1 1 1 1
## Hu12.1.biof.8 Hu12.1gb Hu12.1sb Hu12.2.biof.8 Hu12.3.biof.8
## 1 1 1 1 1
## Hu13.1.biof.9 Hu13.1gb Hu13.1sb Hu13.2.biof.9 Hu13.3.biof.9
## 1 1 1 1 1
## Hu14.1gb Hu14.1sb Hu15.1.biof.11 Hu15.2.biof.11 Hu15.3.biof.11
## 1 1 1 1 1
## Hu15.3gb Hu15.3sb Hu16.1.biof.12 Hu16.2.biof.12 Hu16.3.biof.12
## 1 1 1 1 1
## Hu16.3gb Hu16.3sb Hu17.3gb Hu17.3sb Hu3.1gb
## 1 1 1 1 1
## Hu3.1sb Hu4.1gb Hu4.1sb Hu5.1.biof.3 Hu5.1gb
## 1 1 1 1 1
## Hu5.1sb Hu5.2.biof.3 Hu5.3.biof.3 Hu6.1gb Hu6.1sb
## 1 1 1 1 1
## Hu7.1.biof.4 Hu7.1gb Hu7.1sb Hu7.2.biof.4 Hu7.3.biof.4
## 1 1 1 1 1
## Hu8.1.biof.5 Hu8.1gb Hu8.1sb Hu8.2.biof.5 Hu8.3.biof.5
## 1 1 1 1 1
## Hu9.1gb Hu9.1sb Lo.2.4.GB Lo.2.5.GB Lo.2.6.GB
## 1 1 1 1 1
## Lo.2.7.GB Lo.2.8.GB Lo.2.9.GB Lo.5.1.GM Lo.5.3.GB
## 1 1 1 1 1
## Lo.5.4.GB Lo.5.5.GB Lo.9.1.GB Lo.9.2.GB Lo2.1gb
## 1 1 1 1 1
## Lo2.2gb Lo2.3gb Lo4.1gb Lo4.2gb Lo8.1gb
## 1 1 1 1 1
## Lo8.2gb Lo8.3gb Lo8.4gb Lo8.5gb Mock.control
## 1 1 1 1 1
## Mock.control.b Nega.control.3 Nega.control.3b Nega.control.4 NP.S.1.M
## 1 1 1 1 1
## NP.S.2.M NP.S.3.M NP.S.4.B NP.S.5.B NP.S.6.B
## 1 1 1 1 1
## NP.W.1.M NP.W.2.M NP.W.3.M NP.W.4.B NP.W.5.B
## 1 1 1 1 1
## NP.W.6.B Oy.13.2.GB Oy.13.3.GM Oy.13.7.GB Oy.15.4.GM
## 1 1 1 1 1
## Oy.15.5.GM Oy.15.6.GB Oy.15.7.GB Oy.15.8.GB PCR.caouanem
## 1 1 1 1 1
## PCR.p1.dec23 PCR.p1.jul23 PCR.p2.dec23 R1s1b R1s2b
## 1 1 1 1 1
## R1s3b R1w1b R1w2b R1w3b R2s1b
## 1 1 1 1 1
## R2s2b R2s3b R2w1b R2w2b R2w3b
## 1 1 1 1 1
## R3s1b R3s2b R3s3b R3w1b R3w2b
## 1 1 1 1 1
## R3w3b R4s1b R4s2b R4s3b R4w1b
## 1 1 1 1 1
## R4w2b R4w3b Rivs1b Rivs2b Rivs3b
## 1 1 1 1 1
## Rivw1b Rivw2b Rivw3b Sq.11.1.GM Sq.11.2.GM
## 1 1 1 1 1
## Sq.11.3.GB Sq.11.4.GB Sq.11.5.GB U1s1b U1s2b
## 1 1 1 1 1
## U1s3b U1w1b U1w2b U1w3b U2s1b
## 1 1 1 1 1
## U2s2b U2s3b U2w1b U2w2b U2w3b
## 1 1 1 1 1
## U3s1b U3s2b U3s3b U3w1b U3w2b
## 1 1 1 1 1
## U3w3b U4s1b U4s2b U4s3b U4w1b
## 1 1 1 1 1
## U4w2b U4w3b
## 1 1
Plotting bar plot of reads
# Extract row sum of physeq_Nem_transect_3
OTU_asmatrix_3 <- otu.matrix(physeq_Nem_transect_3, force.taxa.cols = TRUE)
sample_reads_3 <- rowSums(OTU_asmatrix_3)
# Extract row sum of physeq_Nem_transect_4
OTU_asmatrix_4 <- otu.matrix(physeq_Nem_transect_4, force.taxa.cols = TRUE)
sample_reads_4 <- rowSums(OTU_asmatrix_4)
#Combine
sample_reads_total <- cbind(sample_reads_4, sample_reads_3)
# Load data
Abs_abun_arg <- sample_reads_total
# Reshape the data to long format
Abs_abun_arg_long <- melt(Abs_abun_arg, id.vars = "Sample.name", variable.name = "Runs", value.name = "Reads")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
Biosample_type <- physeq_Nem_transect_4@sam_data$Biosample_type
Abs_abun_arg_long <- cbind( Abs_abun_arg_long, Biosample_type)
#Calculate errors
Abs_abun_summary <- Abs_abun_arg_long %>%
group_by(Biosample_type, X2) %>%
summarise(
Mean = mean(value, na.rm = TRUE),
SD = sd(value, na.rm = TRUE))
Abs_abun_summary
## # A tibble: 12 × 4
## # Groups: Biosample_type [6]
## Biosample_type X2 Mean SD
## <chr> <fct> <dbl> <dbl>
## 1 Biofilm sample_reads_4 44370. 15625.
## 2 Biofilm sample_reads_3 48793. 12809.
## 3 Gut sample_reads_4 40302. 17142.
## 4 Gut sample_reads_3 41953. 17625.
## 5 Sediment sample_reads_4 31652. 12182.
## 6 Sediment sample_reads_3 31951. 12768.
## 7 Skin sample_reads_4 41408. 17116.
## 8 Skin sample_reads_3 46206. 17859.
## 9 Water sample_reads_4 45188. 10212.
## 10 Water sample_reads_3 45539. 10277.
## 11 Wholebody sample_reads_4 19216. 19883.
## 12 Wholebody sample_reads_3 20532. 20549.
# Plot using ggplot2
# Plot for sediment data
ggplot(Abs_abun_summary, aes(x = Biosample_type, y = Mean, fill = X2)) +
geom_bar(stat = "identity", position = "dodge") +
geom_errorbar(aes(ymin = Mean-SD, ymax = Mean+SD), width = 0.2, position = position_dodge(0.9)) +
theme_minimal() +
theme(legend.text = element_text(size = 15),
legend.position = "top",
plot.title = element_text(size = 20, face = "bold"),
legend.title = element_text(size = 18),
axis.title.x = element_blank(),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 15),
strip.text = element_text(size = 15),
axis.text.x = element_text(angle = 45, colour = "black", vjust = 1, hjust = 1, size = 15)) +
labs(x = "biosample type", y = "number of read") +
facet_grid( scales = "free")

Removing samples with < 5000 reads
physeq_Nem_transect_5 <- subset_samples(physeq_Nem_transect_4, !sample_names(physeq_Nem_transect_4) %in% c("Fi.4.3.GM", "Hu.35.SM","Sq.11.3.GB", "Fi.4.1.GB", "Hu.40.SM", "Oy.13.3.GM", "Fi.14.1.GM", "Hu.33.SM", "Hu.34.SM", "Hu.32.SM", "Fo.10.4.BM", "Fo.10.3.BM", "Fo.17.3.BM", "Fo.15.3.BM", "Fo.17.2.BM", "Oy.13.2.GB", "Fo.17.1.BM", "Fo.15.1.BM", "Fi.17.8.GM", "Fo.15.2.BM", "Oy.15.7.GB", "Lo.2.6.GB", "Lo.9.1.GB", "Lo.5.3.GB"))
physeq_Nem_transect_5
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 109183 taxa and 379 samples ]
## sample_data() Sample Data: [ 379 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 109183 taxa by 6 taxonomic ranks ]
## refseq() DNAStringSet: [ 109183 reference sequences ]
max(rowSums(physeq_Nem_transect_5@otu_table))
## [1] 79551
min(rowSums(physeq_Nem_transect_5@otu_table))
## [1] 6723
mean(rowSums(physeq_Nem_transect_5@otu_table))
## [1] 41402.22
Rarefy at minimum sample size (for Alpha diversity analysis)
ps_Nem_rarefy <- phyloseq::rarefy_even_depth(physeq_Nem_transect_5, rngseed = TRUE, replace = FALSE, sample.size = min(rowSums(physeq_Nem_transect_5@otu_table)))
#Check rarefied curves
p01 <- ggrare(ps_Nem_rarefy, step = 100, color = "Biosample_type", se = TRUE)
## rarefying sample BD.S.1.M
## rarefying sample BD.S.2.M
## rarefying sample BD.S.3.M
## rarefying sample BD.S.4.B
## rarefying sample BD.S.5.B
## rarefying sample BD.S.6.B
## rarefying sample BD.W.1.M
## rarefying sample BD.W.2.M
## rarefying sample BD.W.3.M
## rarefying sample BD.W.4.B
## rarefying sample BD.W.5.B
## rarefying sample BD.W.6.B
## rarefying sample Bds1b
## rarefying sample Bds2b
## rarefying sample Bds3b
## rarefying sample Bdw1b
## rarefying sample Bdw2b
## rarefying sample Bdw3b
## rarefying sample BioFilm.21.1
## rarefying sample BioFilm.21.2
## rarefying sample BioFilm.21.3
## rarefying sample CR.S.1.M
## rarefying sample CR.S.2.M
## rarefying sample CR.S.3.M
## rarefying sample CR.S.4.B
## rarefying sample CR.S.5.B
## rarefying sample CR.S.6.B
## rarefying sample CR.W.1.M
## rarefying sample CR.W.2.M
## rarefying sample CR.W.3.M
## rarefying sample CR.W.4.B
## rarefying sample CR.W.5.B
## rarefying sample CR.W.6.B
## rarefying sample F1s1b
## rarefying sample F1s2b
## rarefying sample F1s3b
## rarefying sample F1w1b
## rarefying sample F1w2b
## rarefying sample F1w3b
## rarefying sample F2s1b
## rarefying sample F2s2b
## rarefying sample F2s3b
## rarefying sample F2w1b
## rarefying sample F2w2b
## rarefying sample F2w3b
## rarefying sample F3s1b
## rarefying sample F3s2b
## rarefying sample F3s3b
## rarefying sample F3w1b
## rarefying sample F3w2b
## rarefying sample F3w3b
## rarefying sample F4s1b
## rarefying sample F4s2b
## rarefying sample F4s3b
## rarefying sample F4w1b
## rarefying sample F4w2b
## rarefying sample F4w3b
## rarefying sample Fi.10.3.GM
## rarefying sample Fi.10.4.GB
## rarefying sample Fi.10.5.GB
## rarefying sample Fi.10.6.GB
## rarefying sample Fi.12.10.GB
## rarefying sample Fi.12.11.GB
## rarefying sample Fi.12.13.GB
## rarefying sample Fi.12.4.GB
## rarefying sample Fi.12.5.GB
## rarefying sample Fi.12.6.GB
## rarefying sample Fi.12.7.GB
## rarefying sample Fi.12.8.GB
## rarefying sample Fi.12.9.GB
## rarefying sample Fi.13.1.GM
## rarefying sample Fi.13.2.GB
## rarefying sample Fi.13.3.GB
## rarefying sample Fi.14.2.GM
## rarefying sample Fi.14.5.GB
## rarefying sample Fi.14.6.GB
## rarefying sample Fi.18.1.GB
## rarefying sample Fi.18.2.GB
## rarefying sample Fi.18.3.GB
## rarefying sample Fi.19.1.GB
## rarefying sample Fi.20.1.GB
## rarefying sample Fi.20.2.GB
## rarefying sample Fi.3.6.GB
## rarefying sample Fi.3.7.GB
## rarefying sample Fi.4.4.GB
## rarefying sample Fi.5.3.GB
## rarefying sample Fi.5.4.GB
## rarefying sample Fi.5.5.GB
## rarefying sample Fi.6.4.GB
## rarefying sample Fi.6.5.GB
## rarefying sample Fi.6.6.GM
## rarefying sample Fi.6.7.GB
## rarefying sample Fi.6.8.GB
## rarefying sample Fi.6.9.GB
## rarefying sample Fi10.1gb
## rarefying sample Fi10.2gb
## rarefying sample Fi11.1gb
## rarefying sample Fi11.2gb
## rarefying sample Fi12.1gb
## rarefying sample Fi12.2gb
## rarefying sample Fi12.3gb
## rarefying sample Fi3.1gb
## rarefying sample Fi3.2gb
## rarefying sample Fi3.3gb
## rarefying sample Fi3.4gb
## rarefying sample Fi3.5gb
## rarefying sample Fi5.1gb
## rarefying sample Fi5.2gb
## rarefying sample Fi6.1gb
## rarefying sample Fi6.2gb
## rarefying sample Fi6.3gb
## rarefying sample Fi7.1gb
## rarefying sample Fi7.2gb
## rarefying sample Fi7.3gb
## rarefying sample Fi7.4gb
## rarefying sample Fi7.5gb
## rarefying sample Fo.17.5.BM
## rarefying sample Fo.4.1.BM
## rarefying sample Fo.4.2.BM
## rarefying sample Fo.4.3.BM
## rarefying sample Fo.4.4.BM
## rarefying sample Fo.4.5.BM
## rarefying sample HM.S.1.M
## rarefying sample HM.S.2.M
## rarefying sample HM.S.3.M
## rarefying sample HM.S.4.B
## rarefying sample HM.S.5.B
## rarefying sample HM.S.6.B
## rarefying sample HM.W.1.M
## rarefying sample HM.W.2.M
## rarefying sample HM.W.3.M
## rarefying sample HM.W.4.B
## rarefying sample HM.W.5.B
## rarefying sample HM.W.6.B
## rarefying sample Hu.18.GM
## rarefying sample Hu.18.SM
## rarefying sample Hu.19.GM
## rarefying sample Hu.19.SM
## rarefying sample Hu.20.GM
## rarefying sample Hu.20.SM
## rarefying sample Hu.21.GM
## rarefying sample Hu.21.SM
## rarefying sample Hu.22.GM
## rarefying sample Hu.22.SM
## rarefying sample Hu.23.GM
## rarefying sample Hu.23.SM
## rarefying sample Hu.25.GM
## rarefying sample Hu.25.SM
## rarefying sample Hu.26.GM
## rarefying sample Hu.26.SM
## rarefying sample Hu.27.GM
## rarefying sample Hu.27.SM
## rarefying sample Hu.28.GM
## rarefying sample Hu.28.SM
## rarefying sample Hu.29.GM
## rarefying sample Hu.29.SM
## rarefying sample Hu.30.GM
## rarefying sample Hu.30.SM
## rarefying sample Hu.31.GM
## rarefying sample Hu.31.SM
## rarefying sample Hu.32.GM
## rarefying sample Hu.33.GM
## rarefying sample Hu.34.GM
## rarefying sample Hu.35.GM
## rarefying sample Hu.36.GM
## rarefying sample Hu.36.SM
## rarefying sample Hu.37.GM
## rarefying sample Hu.37.SM
## rarefying sample Hu.38.GM
## rarefying sample Hu.38.SM
## rarefying sample Hu.39.GM
## rarefying sample Hu.39.SM
## rarefying sample Hu.41.GM
## rarefying sample Hu.41.SM
## rarefying sample Hu.43.GM
## rarefying sample Hu.43.SM
## rarefying sample Hu.44.GM
## rarefying sample Hu.44.SM
## rarefying sample Hu.45.GB
## rarefying sample Hu.45.SB
## rarefying sample Hu.46.GB
## rarefying sample Hu.46.SB
## rarefying sample Hu.47.GB
## rarefying sample Hu.47.SB
## rarefying sample Hu.48.GB
## rarefying sample Hu.48.SB
## rarefying sample Hu.49.GB
## rarefying sample Hu.49.SB
## rarefying sample Hu.50.GB
## rarefying sample Hu.50.SB
## rarefying sample Hu.51.GB
## rarefying sample Hu.51.SB
## rarefying sample Hu.52.GB
## rarefying sample Hu.52.SB
## rarefying sample Hu.53.GB
## rarefying sample Hu.53.SB
## rarefying sample Hu.54.GB
## rarefying sample Hu.54.SB
## rarefying sample Hu.55.GB
## rarefying sample Hu.55.SB
## rarefying sample Hu.56.GB
## rarefying sample Hu.56.SB
## rarefying sample Hu.57.GB
## rarefying sample Hu.57.SB
## rarefying sample Hu.58.GB
## rarefying sample Hu.58.SB
## rarefying sample Hu.59.GB
## rarefying sample Hu.59.SB
## rarefying sample Hu.60.GB
## rarefying sample Hu.60.SB
## rarefying sample Hu.61.GB
## rarefying sample Hu.61.SB
## rarefying sample Hu.63.GB
## rarefying sample Hu.63.SB
## rarefying sample Hu.64.GB
## rarefying sample Hu.64.SB
## rarefying sample Hu.66.GB
## rarefying sample Hu.66.SB
## rarefying sample Hu.67.GB
## rarefying sample Hu.67.SB
## rarefying sample Hu.68.GB
## rarefying sample Hu.68.SB
## rarefying sample Hu.69.GB
## rarefying sample Hu.69.SB
## rarefying sample Hu.70.GB
## rarefying sample Hu.70.SB
## rarefying sample Hu.71.GB
## rarefying sample Hu.71.SB
## rarefying sample Hu10.1.biof.6
## rarefying sample Hu10.1gb
## rarefying sample Hu10.1sb
## rarefying sample Hu10.2.biof.6
## rarefying sample Hu10.3.biof.6
## rarefying sample Hu11.1.biof.7
## rarefying sample Hu11.1gb
## rarefying sample Hu11.1sb
## rarefying sample Hu11.2.biof.7
## rarefying sample Hu11.3.biof.7
## rarefying sample Hu12.1.biof.8
## rarefying sample Hu12.1gb
## rarefying sample Hu12.1sb
## rarefying sample Hu12.2.biof.8
## rarefying sample Hu12.3.biof.8
## rarefying sample Hu13.1.biof.9
## rarefying sample Hu13.1gb
## rarefying sample Hu13.1sb
## rarefying sample Hu13.2.biof.9
## rarefying sample Hu13.3.biof.9
## rarefying sample Hu14.1gb
## rarefying sample Hu14.1sb
## rarefying sample Hu15.1.biof.11
## rarefying sample Hu15.2.biof.11
## rarefying sample Hu15.3.biof.11
## rarefying sample Hu15.3gb
## rarefying sample Hu15.3sb
## rarefying sample Hu16.1.biof.12
## rarefying sample Hu16.2.biof.12
## rarefying sample Hu16.3.biof.12
## rarefying sample Hu16.3gb
## rarefying sample Hu16.3sb
## rarefying sample Hu17.3gb
## rarefying sample Hu17.3sb
## rarefying sample Hu3.1gb
## rarefying sample Hu3.1sb
## rarefying sample Hu4.1gb
## rarefying sample Hu4.1sb
## rarefying sample Hu5.1.biof.3
## rarefying sample Hu5.1gb
## rarefying sample Hu5.1sb
## rarefying sample Hu5.2.biof.3
## rarefying sample Hu5.3.biof.3
## rarefying sample Hu6.1gb
## rarefying sample Hu6.1sb
## rarefying sample Hu7.1.biof.4
## rarefying sample Hu7.1gb
## rarefying sample Hu7.1sb
## rarefying sample Hu7.2.biof.4
## rarefying sample Hu7.3.biof.4
## rarefying sample Hu8.1.biof.5
## rarefying sample Hu8.1gb
## rarefying sample Hu8.1sb
## rarefying sample Hu8.2.biof.5
## rarefying sample Hu8.3.biof.5
## rarefying sample Hu9.1gb
## rarefying sample Hu9.1sb
## rarefying sample Lo.2.4.GB
## rarefying sample Lo.2.5.GB
## rarefying sample Lo.2.7.GB
## rarefying sample Lo.2.8.GB
## rarefying sample Lo.2.9.GB
## rarefying sample Lo.5.1.GM
## rarefying sample Lo.5.4.GB
## rarefying sample Lo.5.5.GB
## rarefying sample Lo.9.2.GB
## rarefying sample Lo2.1gb
## rarefying sample Lo2.2gb
## rarefying sample Lo2.3gb
## rarefying sample Lo4.1gb
## rarefying sample Lo4.2gb
## rarefying sample Lo8.1gb
## rarefying sample Lo8.2gb
## rarefying sample Lo8.3gb
## rarefying sample Lo8.4gb
## rarefying sample Lo8.5gb
## rarefying sample NP.S.1.M
## rarefying sample NP.S.2.M
## rarefying sample NP.S.3.M
## rarefying sample NP.S.4.B
## rarefying sample NP.S.5.B
## rarefying sample NP.S.6.B
## rarefying sample NP.W.1.M
## rarefying sample NP.W.2.M
## rarefying sample NP.W.3.M
## rarefying sample NP.W.4.B
## rarefying sample NP.W.5.B
## rarefying sample NP.W.6.B
## rarefying sample Oy.13.7.GB
## rarefying sample Oy.15.4.GM
## rarefying sample Oy.15.5.GM
## rarefying sample Oy.15.6.GB
## rarefying sample Oy.15.8.GB
## rarefying sample R1s1b
## rarefying sample R1s2b
## rarefying sample R1s3b
## rarefying sample R1w1b
## rarefying sample R1w2b
## rarefying sample R1w3b
## rarefying sample R2s1b
## rarefying sample R2s2b
## rarefying sample R2s3b
## rarefying sample R2w1b
## rarefying sample R2w2b
## rarefying sample R2w3b
## rarefying sample R3s1b
## rarefying sample R3s2b
## rarefying sample R3s3b
## rarefying sample R3w1b
## rarefying sample R3w2b
## rarefying sample R3w3b
## rarefying sample R4s1b
## rarefying sample R4s2b
## rarefying sample R4s3b
## rarefying sample R4w1b
## rarefying sample R4w2b
## rarefying sample R4w3b
## rarefying sample Rivs1b
## rarefying sample Rivs2b
## rarefying sample Rivs3b
## rarefying sample Rivw1b
## rarefying sample Rivw2b
## rarefying sample Rivw3b
## rarefying sample Sq.11.1.GM
## rarefying sample Sq.11.2.GM
## rarefying sample Sq.11.4.GB
## rarefying sample Sq.11.5.GB
## rarefying sample U1s1b
## rarefying sample U1s2b
## rarefying sample U1s3b
## rarefying sample U1w1b
## rarefying sample U1w2b
## rarefying sample U1w3b
## rarefying sample U2s1b
## rarefying sample U2s2b
## rarefying sample U2s3b
## rarefying sample U2w1b
## rarefying sample U2w2b
## rarefying sample U2w3b
## rarefying sample U3s1b
## rarefying sample U3s2b
## rarefying sample U3s3b
## rarefying sample U3w1b
## rarefying sample U3w2b
## rarefying sample U3w3b
## rarefying sample U4s1b
## rarefying sample U4s2b
## rarefying sample U4s3b
## rarefying sample U4w1b
## rarefying sample U4w2b
## rarefying sample U4w3b

#check Zhang & Huang's coverage estimator
library(entropart)
Coverage_estimator_rarefied <- Coverage(ps_Nem_rarefy@otu_table, Estimator = "Best", Level = NULL, CheckArguments = TRUE)
Coverage_estimator_rarefied
## ZhangHuang
## 0.9792839
#Remove unnecessary samples
#ps_Nem_rarefy <- subset_samples(ps_Nem_rarefy, !(sample_names(ps_Nem_rarefy) %in% c("R1w2b", "JCA.U2S2B", "U1w2b")))
#Check sample size
dim(ps_Nem_rarefy@otu_table)
## [1] 379 70796