Before analyzing, run the following codes: ## OTU to matrix
otu.matrix <- function(ps, force.taxa.cols = TRUE) {
mat <- as(phyloseq::otu_table(ps), "matrix")
if (taxa_are_rows(ps) & force.taxa.cols) {
mat <- t(mat)
}
return(mat)
}
ggrare
ggrare <- function(physeq, step = 10, label = NULL, color = NULL,
plot = TRUE, parallel = FALSE, se = TRUE) {
require("ggplot2")
x <- as(phyloseq::otu_table(physeq), "matrix")
if (phyloseq::taxa_are_rows(physeq)) x <- t(x)
## This script is adapted from vegan `rarecurve` function
tot <- rowSums(x)
S <- rowSums(x > 0)
nr <- nrow(x)
rarefun <- function(i) {
cat(paste("rarefying sample", rownames(x)[i]), sep = "\n")
n <- seq(1, tot[i], by = step)
if (n[length(n)] != tot[i]) {
n <- c(n, tot[i])
}
y <- vegan::rarefy(x[i, ,drop = FALSE], n, se = se)
if (nrow(y) != 1) {
rownames(y) <- c(".S", ".se")
return(data.frame(t(y), Size = n, Sample = rownames(x)[i]))
} else {
return(data.frame(.S = y[1, ], Size = n, Sample = rownames(x)[i]))
}
}
if (parallel) {
out <- parallel::mclapply(seq_len(nr), rarefun, mc.preschedule = FALSE)
} else {
out <- lapply(seq_len(nr), rarefun)
}
df <- do.call(rbind, out)
## Get sample data
if (!is.null(phyloseq::sample_data(physeq, FALSE))) {
sdf <- as(phyloseq::sample_data(physeq), "data.frame")
sdf$Sample <- rownames(sdf)
data <- merge(df, sdf, by = "Sample")
labels <- data.frame(x = tot, y = S, Sample = rownames(x))
labels <- merge(labels, sdf, by = "Sample")
}
## Add, any custom-supplied plot-mapped variables
if (length(color) > 1) {
data$color <- color
names(data)[names(data) == "color"] <- deparse(substitute(color))
color <- deparse(substitute(color))
}
if (length(label) > 1) {
labels$label <- label
names(labels)[names(labels) == "label"] <- deparse(substitute(label))
label <- deparse(substitute(label))
}
p <- ggplot(data = data,
aes_string(x = "Size", y = ".S",
group = "Sample", color = color)) +
labs(x = "Sample Size", y = "Species Richness")
if (!is.null(label)) {
p <- p + geom_text(data = labels,
aes_string(x = "x", y = "y",
label = label, color = color),
size = 4, hjust = 0)
}
p <- p + geom_line()
if (se) { ## add standard error if available
p <- p +
geom_ribbon(aes_string(ymin = ".S - .se", ymax = ".S + .se",
color = NULL, fill = color),
alpha = 0.2)
}
if (plot) {
plot(p)
}
invisible(p)
}
#' Check for normal distribution of alpha diversity measurements
#'
#'
#' @return Print ggplot rarefaction curves
#' @param rich A dataframe with alpha diversity indices as columns
#' and samples as rows as the output of pyloseq::estimate_richness()
#' @param file Output file name
indices_normality <- function(rich, nrow, ncol) {
### p-value < 0.05 means data failed normality test
par(mfrow = c(nrow, ncol))
for (i in names(rich)) {
shap <- shapiro.test(rich[, i])
qqnorm(rich[, i], main = i, sub = shap$p.value)
qqline(rich[, i])
}
par(mfrow = c(1, 1))
}
library("seqinr")
library(Biostrings)
library(dplyr)
library(phyloseq)
library(writexl)
library(tidyr)
library(ggplot2)
library(readr)
library(ggpubr)
library(reshape)
library(microbiomeMarker)
library(ANCOMBC)
library(tidyverse)
library(DT)
library(ggVennDiagram)
library(VennDiagram)
asv_table <- read.table("/Users/qcvp/R/outputs/dada2_total/asv_table/asv_table.tsv", sep="\t", row.names=1, header=T)
Invert the ASV table in case sample IDs are on row not column
asv_inverted<-as.data.frame(t(asv_table))
asv_seq <- readDNAStringSet(file = "/Users/qcvp/R/outputs/dada2_total/asv_table/asv.fasta",
format = "fasta",
nrec = -1L,
skip = 0L,
seek.first.rec = FALSE,
use.names = TRUE)
head(asv_seq, 1)
## DNAStringSet object of length 1:
## width seq names
## [1] 370 TACGGAAGGTCCGGGCGTTATCC...TGGGGAGTACGCCGGCAACGGTG ASV1
asv_tax <- read.table("/Users/qcvp/R/outputs/dada2_total/asv_table/taxonomy.tsv", header = TRUE, row.names = 1, sep="\t")
asv_tax <- as.matrix(asv_tax)
head(asv_tax, 1)
## Kingdom Phylum Class Order Family
## ASV1 "Bacteria" "Bacteroidota" "Bacteroidia" "Bacteroidales" "Prevotellaceae"
## Genus
## ASV1 "Prevotella_9"
context <- read.table("/Users/qcvp/R/outputs/dada2_total/asv_table/metadata.txt", header = TRUE, row.names = 1, sep="\t")
rownames(context) <- gsub("-",".",rownames(context))
head(context)
## Sequencing Date Transect Station Biosample Biosample_type
## BD.S.1.M Metagenome Jul_2023 Bich_Dam BD Environment Sediment
## BD.S.2.M Metagenome Jul_2023 Bich_Dam BD Environment Sediment
## BD.S.3.M Metagenome Jul_2023 Bich_Dam BD Environment Sediment
## BD.S.4.B Barcoding Dec_2023 Bich_Dam BD Environment Sediment
## BD.S.5.B Barcoding Dec_2023 Bich_Dam BD Environment Sediment
## BD.S.6.B Barcoding Dec_2023 Bich_Dam BD Environment Sediment
## Biosample_cluster Host_type Host_subtype Host_genus Host_species
## BD.S.1.M Sediment Environment Sediment <NA> <NA>
## BD.S.2.M Sediment Environment Sediment <NA> <NA>
## BD.S.3.M Sediment Environment Sediment <NA> <NA>
## BD.S.4.B Sediment Environment Sediment <NA> <NA>
## BD.S.5.B Sediment Environment Sediment <NA> <NA>
## BD.S.6.B Sediment Environment Sediment <NA> <NA>
## Species_ID
## BD.S.1.M <NA>
## BD.S.2.M <NA>
## BD.S.3.M <NA>
## BD.S.4.B <NA>
## BD.S.5.B <NA>
## BD.S.6.B <NA>
#Checking samples IDs uniformity
setdiff(x = row.names(asv_inverted), y = row.names(context))
## character(0)
physeq_Nem <- phyloseq(
otu_table(asv_inverted, taxa_are_rows = F), #if you have taxa on rows = T
sample_data(context),
refseq(asv_seq),
tax_table(asv_tax)
)
physeq_Nem
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 111454 taxa and 412 samples ]
## sample_data() Sample Data: [ 412 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 111454 taxa by 6 taxonomic ranks ]
## refseq() DNAStringSet: [ 111454 reference sequences ]
Remove MOCK and Control samples from the phyloseq object
physeq_Nem_1 <- subset_samples(physeq_Nem, !Biosample =="Mock")
physeq_Nem_1
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 111454 taxa and 412 samples ]
## sample_data() Sample Data: [ 412 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 111454 taxa by 6 taxonomic ranks ]
## refseq() DNAStringSet: [ 111454 reference sequences ]
physeq_Nem_2 <- subset_samples(physeq_Nem_1, Biosample != "Control")
physeq_Nem_2
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 111454 taxa and 403 samples ]
## sample_data() Sample Data: [ 403 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 111454 taxa by 6 taxonomic ranks ]
## refseq() DNAStringSet: [ 111454 reference sequences ]
Quick checking library size of each Biomsaple types
df <- as.data.frame(sample_data(physeq_Nem_2)) # Put sample_data into a ggplot-friendly data.frame
df$LibrarySize <- sample_sums(physeq_Nem_2)
df <- df[order(df$LibrarySize),]
df$Index <- seq(nrow(df))
ggplot(data=df, aes(x=Index, y=LibrarySize, color=as.factor(Biosample))) +
geom_point() + geom_text(aes(label=Index), hjust=2, nudge_y = 0.05)
Remove comtaminant ASVs
physeq_Nem_3 <- subset_taxa(physeq_Nem_2, !taxa_names(physeq_Nem) %in% c("ASV2", "ASV8", "ASV52", "ASV97", "ASV128", "ASV318", "ASV356", "ASV471", "ASV708", "ASV914", "ASV1101", "ASV1127", "ASV1915", "ASV1952", "ASV2892", "ASV3517", "ASV4859", "ASV9739", "ASV26448", "ASV30253"))
physeq_Nem_3
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 111434 taxa and 403 samples ]
## sample_data() Sample Data: [ 403 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 111434 taxa by 6 taxonomic ranks ]
## refseq() DNAStringSet: [ 111434 reference sequences ]
contaminant_genera <- read.table("/Users/qcvp/R/Contaminant.txt", header = T)
a <- contaminant_genera[, ]
physeq_Nem_4 <- subset_taxa(physeq_Nem_3, !Genus %in% a)
physeq_Nem_4
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 109183 taxa and 403 samples ]
## sample_data() Sample Data: [ 403 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 109183 taxa by 6 taxonomic ranks ]
## refseq() DNAStringSet: [ 109183 reference sequences ]
Removing samples with < 5000 reads
physeq_Nem_5 <- subset_samples(physeq_Nem_4, !sample_names(physeq_Nem_4) %in% c("Fi.4.3.GM", "Hu.35.SM","Sq.11.3.GB", "Fi.4.1.GB", "Hu.40.SM", "Oy.13.3.GM", "Fi.14.1.GM", "Hu.33.SM", "Hu.34.SM", "Hu.32.SM", "Fo.10.4.BM", "Fo.10.3.BM", "Fo.17.3.BM", "Fo.15.3.BM", "Fo.17.2.BM", "Oy.13.2.GB", "Fo.17.1.BM", "Fo.15.1.BM", "Fi.17.8.GM", "Fo.15.2.BM", "Oy.15.7.GB", "Lo.9.1.GB", "Lo.5.3.GB", "Lo.2.6.GB"))
physeq_Nem_5
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 109183 taxa and 379 samples ]
## sample_data() Sample Data: [ 379 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 109183 taxa by 6 taxonomic ranks ]
## refseq() DNAStringSet: [ 109183 reference sequences ]
max(rowSums(physeq_Nem_5@otu_table))
## [1] 79551
min(rowSums(physeq_Nem_5@otu_table))
## [1] 6723
mean(rowSums(physeq_Nem_5@otu_table))
## [1] 41402.22
Plotting bar plot of reads
# Extract row sum of physeq_Nemt_3
OTU_asmatrix_3 <- otu.matrix(physeq_Nem_3, force.taxa.cols = TRUE)
sample_reads_3 <- rowSums(OTU_asmatrix_3)
# Extract row sum of physeq_Nem_4
OTU_asmatrix_4 <- otu.matrix(physeq_Nem_4, force.taxa.cols = TRUE)
sample_reads_4 <- rowSums(OTU_asmatrix_4)
#Combine
sample_reads_total <- cbind(sample_reads_4, sample_reads_3)
# Load data
Abs_abun_arg <- sample_reads_total
# Reshape the data to long format
Abs_abun_arg_long <- melt(Abs_abun_arg, id.vars = "Sample.name", variable.name = "Runs", value.name = "Reads")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
Biosample_type <- physeq_Nem_4@sam_data$Biosample_type
Abs_abun_arg_long <- cbind( Abs_abun_arg_long, Biosample_type)
#Calculate errors
Abs_abun_summary <- Abs_abun_arg_long %>%
group_by(Biosample_type, X2) %>%
summarise(
Mean = mean(value, na.rm = TRUE),
SD = sd(value, na.rm = TRUE))
Abs_abun_summary
## # A tibble: 12 × 4
## # Groups: Biosample_type [6]
## Biosample_type X2 Mean SD
## <chr> <fct> <dbl> <dbl>
## 1 Biofilm sample_reads_4 44370. 15625.
## 2 Biofilm sample_reads_3 48793. 12809.
## 3 Gut sample_reads_4 40302. 17142.
## 4 Gut sample_reads_3 41953. 17625.
## 5 Sediment sample_reads_4 31652. 12182.
## 6 Sediment sample_reads_3 31951. 12768.
## 7 Skin sample_reads_4 41408. 17116.
## 8 Skin sample_reads_3 46206. 17859.
## 9 Water sample_reads_4 45188. 10212.
## 10 Water sample_reads_3 45539. 10277.
## 11 Wholebody sample_reads_4 19216. 19883.
## 12 Wholebody sample_reads_3 20532. 20549.
# Plot using ggplot2
# Plot for sediment data
ggplot(Abs_abun_summary, aes(x = Biosample_type, y = Mean, fill = X2)) +
geom_bar(stat = "identity", position = "dodge") +
geom_errorbar(aes(ymin = Mean-SD, ymax = Mean+SD), width = 0.2, position = position_dodge(0.9)) +
theme_minimal() +
theme(legend.text = element_text(size = 15),
legend.position = "top",
plot.title = element_text(size = 20, face = "bold"),
legend.title = element_text(size = 18),
axis.title.x = element_blank(),
axis.title.y = element_text(size = 18),
axis.text.y = element_text(size = 15),
strip.text = element_text(size = 15),
axis.text.x = element_text(angle = 45, colour = "black", vjust = 1, hjust = 1, size = 15)) +
labs(x = "biosample type", y = "number of read") +
facet_grid( scales = "free")
#Tax
whole_bacteria_ASV_TAX <- data.frame(physeq_Nem_5@tax_table)
ID <- rownames(whole_bacteria_ASV_TAX)
whole_bacteria_ASV_TAX <- cbind(ID=ID, whole_bacteria_ASV_TAX)
#Dataset with Relative abundance
physeq_Nem_rel <- transform_sample_counts(physeq_Nem_5, function(x) x / sum(x) )
rowSums(physeq_Nem_rel@otu_table@.Data)
## BD.S.1.M BD.S.2.M BD.S.3.M BD.S.4.B BD.S.5.B
## 1 1 1 1 1
## BD.S.6.B BD.W.1.M BD.W.2.M BD.W.3.M BD.W.4.B
## 1 1 1 1 1
## BD.W.5.B BD.W.6.B Bds1b Bds2b Bds3b
## 1 1 1 1 1
## Bdw1b Bdw2b Bdw3b BioFilm.21.1 BioFilm.21.2
## 1 1 1 1 1
## BioFilm.21.3 CR.S.1.M CR.S.2.M CR.S.3.M CR.S.4.B
## 1 1 1 1 1
## CR.S.5.B CR.S.6.B CR.W.1.M CR.W.2.M CR.W.3.M
## 1 1 1 1 1
## CR.W.4.B CR.W.5.B CR.W.6.B F1s1b F1s2b
## 1 1 1 1 1
## F1s3b F1w1b F1w2b F1w3b F2s1b
## 1 1 1 1 1
## F2s2b F2s3b F2w1b F2w2b F2w3b
## 1 1 1 1 1
## F3s1b F3s2b F3s3b F3w1b F3w2b
## 1 1 1 1 1
## F3w3b F4s1b F4s2b F4s3b F4w1b
## 1 1 1 1 1
## F4w2b F4w3b Fi.10.3.GM Fi.10.4.GB Fi.10.5.GB
## 1 1 1 1 1
## Fi.10.6.GB Fi.12.10.GB Fi.12.11.GB Fi.12.13.GB Fi.12.4.GB
## 1 1 1 1 1
## Fi.12.5.GB Fi.12.6.GB Fi.12.7.GB Fi.12.8.GB Fi.12.9.GB
## 1 1 1 1 1
## Fi.13.1.GM Fi.13.2.GB Fi.13.3.GB Fi.14.2.GM Fi.14.5.GB
## 1 1 1 1 1
## Fi.14.6.GB Fi.18.1.GB Fi.18.2.GB Fi.18.3.GB Fi.19.1.GB
## 1 1 1 1 1
## Fi.20.1.GB Fi.20.2.GB Fi.3.6.GB Fi.3.7.GB Fi.4.4.GB
## 1 1 1 1 1
## Fi.5.3.GB Fi.5.4.GB Fi.5.5.GB Fi.6.4.GB Fi.6.5.GB
## 1 1 1 1 1
## Fi.6.6.GM Fi.6.7.GB Fi.6.8.GB Fi.6.9.GB Fi10.1gb
## 1 1 1 1 1
## Fi10.2gb Fi11.1gb Fi11.2gb Fi12.1gb Fi12.2gb
## 1 1 1 1 1
## Fi12.3gb Fi3.1gb Fi3.2gb Fi3.3gb Fi3.4gb
## 1 1 1 1 1
## Fi3.5gb Fi5.1gb Fi5.2gb Fi6.1gb Fi6.2gb
## 1 1 1 1 1
## Fi6.3gb Fi7.1gb Fi7.2gb Fi7.3gb Fi7.4gb
## 1 1 1 1 1
## Fi7.5gb Fo.17.5.BM Fo.4.1.BM Fo.4.2.BM Fo.4.3.BM
## 1 1 1 1 1
## Fo.4.4.BM Fo.4.5.BM HM.S.1.M HM.S.2.M HM.S.3.M
## 1 1 1 1 1
## HM.S.4.B HM.S.5.B HM.S.6.B HM.W.1.M HM.W.2.M
## 1 1 1 1 1
## HM.W.3.M HM.W.4.B HM.W.5.B HM.W.6.B Hu.18.GM
## 1 1 1 1 1
## Hu.18.SM Hu.19.GM Hu.19.SM Hu.20.GM Hu.20.SM
## 1 1 1 1 1
## Hu.21.GM Hu.21.SM Hu.22.GM Hu.22.SM Hu.23.GM
## 1 1 1 1 1
## Hu.23.SM Hu.25.GM Hu.25.SM Hu.26.GM Hu.26.SM
## 1 1 1 1 1
## Hu.27.GM Hu.27.SM Hu.28.GM Hu.28.SM Hu.29.GM
## 1 1 1 1 1
## Hu.29.SM Hu.30.GM Hu.30.SM Hu.31.GM Hu.31.SM
## 1 1 1 1 1
## Hu.32.GM Hu.33.GM Hu.34.GM Hu.35.GM Hu.36.GM
## 1 1 1 1 1
## Hu.36.SM Hu.37.GM Hu.37.SM Hu.38.GM Hu.38.SM
## 1 1 1 1 1
## Hu.39.GM Hu.39.SM Hu.41.GM Hu.41.SM Hu.43.GM
## 1 1 1 1 1
## Hu.43.SM Hu.44.GM Hu.44.SM Hu.45.GB Hu.45.SB
## 1 1 1 1 1
## Hu.46.GB Hu.46.SB Hu.47.GB Hu.47.SB Hu.48.GB
## 1 1 1 1 1
## Hu.48.SB Hu.49.GB Hu.49.SB Hu.50.GB Hu.50.SB
## 1 1 1 1 1
## Hu.51.GB Hu.51.SB Hu.52.GB Hu.52.SB Hu.53.GB
## 1 1 1 1 1
## Hu.53.SB Hu.54.GB Hu.54.SB Hu.55.GB Hu.55.SB
## 1 1 1 1 1
## Hu.56.GB Hu.56.SB Hu.57.GB Hu.57.SB Hu.58.GB
## 1 1 1 1 1
## Hu.58.SB Hu.59.GB Hu.59.SB Hu.60.GB Hu.60.SB
## 1 1 1 1 1
## Hu.61.GB Hu.61.SB Hu.63.GB Hu.63.SB Hu.64.GB
## 1 1 1 1 1
## Hu.64.SB Hu.66.GB Hu.66.SB Hu.67.GB Hu.67.SB
## 1 1 1 1 1
## Hu.68.GB Hu.68.SB Hu.69.GB Hu.69.SB Hu.70.GB
## 1 1 1 1 1
## Hu.70.SB Hu.71.GB Hu.71.SB Hu10.1.biof.6 Hu10.1gb
## 1 1 1 1 1
## Hu10.1sb Hu10.2.biof.6 Hu10.3.biof.6 Hu11.1.biof.7 Hu11.1gb
## 1 1 1 1 1
## Hu11.1sb Hu11.2.biof.7 Hu11.3.biof.7 Hu12.1.biof.8 Hu12.1gb
## 1 1 1 1 1
## Hu12.1sb Hu12.2.biof.8 Hu12.3.biof.8 Hu13.1.biof.9 Hu13.1gb
## 1 1 1 1 1
## Hu13.1sb Hu13.2.biof.9 Hu13.3.biof.9 Hu14.1gb Hu14.1sb
## 1 1 1 1 1
## Hu15.1.biof.11 Hu15.2.biof.11 Hu15.3.biof.11 Hu15.3gb Hu15.3sb
## 1 1 1 1 1
## Hu16.1.biof.12 Hu16.2.biof.12 Hu16.3.biof.12 Hu16.3gb Hu16.3sb
## 1 1 1 1 1
## Hu17.3gb Hu17.3sb Hu3.1gb Hu3.1sb Hu4.1gb
## 1 1 1 1 1
## Hu4.1sb Hu5.1.biof.3 Hu5.1gb Hu5.1sb Hu5.2.biof.3
## 1 1 1 1 1
## Hu5.3.biof.3 Hu6.1gb Hu6.1sb Hu7.1.biof.4 Hu7.1gb
## 1 1 1 1 1
## Hu7.1sb Hu7.2.biof.4 Hu7.3.biof.4 Hu8.1.biof.5 Hu8.1gb
## 1 1 1 1 1
## Hu8.1sb Hu8.2.biof.5 Hu8.3.biof.5 Hu9.1gb Hu9.1sb
## 1 1 1 1 1
## Lo.2.4.GB Lo.2.5.GB Lo.2.7.GB Lo.2.8.GB Lo.2.9.GB
## 1 1 1 1 1
## Lo.5.1.GM Lo.5.4.GB Lo.5.5.GB Lo.9.2.GB Lo2.1gb
## 1 1 1 1 1
## Lo2.2gb Lo2.3gb Lo4.1gb Lo4.2gb Lo8.1gb
## 1 1 1 1 1
## Lo8.2gb Lo8.3gb Lo8.4gb Lo8.5gb NP.S.1.M
## 1 1 1 1 1
## NP.S.2.M NP.S.3.M NP.S.4.B NP.S.5.B NP.S.6.B
## 1 1 1 1 1
## NP.W.1.M NP.W.2.M NP.W.3.M NP.W.4.B NP.W.5.B
## 1 1 1 1 1
## NP.W.6.B Oy.13.7.GB Oy.15.4.GM Oy.15.5.GM Oy.15.6.GB
## 1 1 1 1 1
## Oy.15.8.GB R1s1b R1s2b R1s3b R1w1b
## 1 1 1 1 1
## R1w2b R1w3b R2s1b R2s2b R2s3b
## 1 1 1 1 1
## R2w1b R2w2b R2w3b R3s1b R3s2b
## 1 1 1 1 1
## R3s3b R3w1b R3w2b R3w3b R4s1b
## 1 1 1 1 1
## R4s2b R4s3b R4w1b R4w2b R4w3b
## 1 1 1 1 1
## Rivs1b Rivs2b Rivs3b Rivw1b Rivw2b
## 1 1 1 1 1
## Rivw3b Sq.11.1.GM Sq.11.2.GM Sq.11.4.GB Sq.11.5.GB
## 1 1 1 1 1
## U1s1b U1s2b U1s3b U1w1b U1w2b
## 1 1 1 1 1
## U1w3b U2s1b U2s2b U2s3b U2w1b
## 1 1 1 1 1
## U2w2b U2w3b U3s1b U3s2b U3s3b
## 1 1 1 1 1
## U3w1b U3w2b U3w3b U4s1b U4s2b
## 1 1 1 1 1
## U4s3b U4w1b U4w2b U4w3b
## 1 1 1 1
ps_Nem_rarefy <- phyloseq::rarefy_even_depth(physeq_Nem_5, rngseed = TRUE, replace = FALSE, sample.size = min(rowSums(physeq_Nem_5@otu_table)))
#Check rarefied curves
p01 <- ggrare(ps_Nem_rarefy, step = 100, color = "Biosample_type", se = TRUE)
## rarefying sample BD.S.1.M
## rarefying sample BD.S.2.M
## rarefying sample BD.S.3.M
## rarefying sample BD.S.4.B
## rarefying sample BD.S.5.B
## rarefying sample BD.S.6.B
## rarefying sample BD.W.1.M
## rarefying sample BD.W.2.M
## rarefying sample BD.W.3.M
## rarefying sample BD.W.4.B
## rarefying sample BD.W.5.B
## rarefying sample BD.W.6.B
## rarefying sample Bds1b
## rarefying sample Bds2b
## rarefying sample Bds3b
## rarefying sample Bdw1b
## rarefying sample Bdw2b
## rarefying sample Bdw3b
## rarefying sample BioFilm.21.1
## rarefying sample BioFilm.21.2
## rarefying sample BioFilm.21.3
## rarefying sample CR.S.1.M
## rarefying sample CR.S.2.M
## rarefying sample CR.S.3.M
## rarefying sample CR.S.4.B
## rarefying sample CR.S.5.B
## rarefying sample CR.S.6.B
## rarefying sample CR.W.1.M
## rarefying sample CR.W.2.M
## rarefying sample CR.W.3.M
## rarefying sample CR.W.4.B
## rarefying sample CR.W.5.B
## rarefying sample CR.W.6.B
## rarefying sample F1s1b
## rarefying sample F1s2b
## rarefying sample F1s3b
## rarefying sample F1w1b
## rarefying sample F1w2b
## rarefying sample F1w3b
## rarefying sample F2s1b
## rarefying sample F2s2b
## rarefying sample F2s3b
## rarefying sample F2w1b
## rarefying sample F2w2b
## rarefying sample F2w3b
## rarefying sample F3s1b
## rarefying sample F3s2b
## rarefying sample F3s3b
## rarefying sample F3w1b
## rarefying sample F3w2b
## rarefying sample F3w3b
## rarefying sample F4s1b
## rarefying sample F4s2b
## rarefying sample F4s3b
## rarefying sample F4w1b
## rarefying sample F4w2b
## rarefying sample F4w3b
## rarefying sample Fi.10.3.GM
## rarefying sample Fi.10.4.GB
## rarefying sample Fi.10.5.GB
## rarefying sample Fi.10.6.GB
## rarefying sample Fi.12.10.GB
## rarefying sample Fi.12.11.GB
## rarefying sample Fi.12.13.GB
## rarefying sample Fi.12.4.GB
## rarefying sample Fi.12.5.GB
## rarefying sample Fi.12.6.GB
## rarefying sample Fi.12.7.GB
## rarefying sample Fi.12.8.GB
## rarefying sample Fi.12.9.GB
## rarefying sample Fi.13.1.GM
## rarefying sample Fi.13.2.GB
## rarefying sample Fi.13.3.GB
## rarefying sample Fi.14.2.GM
## rarefying sample Fi.14.5.GB
## rarefying sample Fi.14.6.GB
## rarefying sample Fi.18.1.GB
## rarefying sample Fi.18.2.GB
## rarefying sample Fi.18.3.GB
## rarefying sample Fi.19.1.GB
## rarefying sample Fi.20.1.GB
## rarefying sample Fi.20.2.GB
## rarefying sample Fi.3.6.GB
## rarefying sample Fi.3.7.GB
## rarefying sample Fi.4.4.GB
## rarefying sample Fi.5.3.GB
## rarefying sample Fi.5.4.GB
## rarefying sample Fi.5.5.GB
## rarefying sample Fi.6.4.GB
## rarefying sample Fi.6.5.GB
## rarefying sample Fi.6.6.GM
## rarefying sample Fi.6.7.GB
## rarefying sample Fi.6.8.GB
## rarefying sample Fi.6.9.GB
## rarefying sample Fi10.1gb
## rarefying sample Fi10.2gb
## rarefying sample Fi11.1gb
## rarefying sample Fi11.2gb
## rarefying sample Fi12.1gb
## rarefying sample Fi12.2gb
## rarefying sample Fi12.3gb
## rarefying sample Fi3.1gb
## rarefying sample Fi3.2gb
## rarefying sample Fi3.3gb
## rarefying sample Fi3.4gb
## rarefying sample Fi3.5gb
## rarefying sample Fi5.1gb
## rarefying sample Fi5.2gb
## rarefying sample Fi6.1gb
## rarefying sample Fi6.2gb
## rarefying sample Fi6.3gb
## rarefying sample Fi7.1gb
## rarefying sample Fi7.2gb
## rarefying sample Fi7.3gb
## rarefying sample Fi7.4gb
## rarefying sample Fi7.5gb
## rarefying sample Fo.17.5.BM
## rarefying sample Fo.4.1.BM
## rarefying sample Fo.4.2.BM
## rarefying sample Fo.4.3.BM
## rarefying sample Fo.4.4.BM
## rarefying sample Fo.4.5.BM
## rarefying sample HM.S.1.M
## rarefying sample HM.S.2.M
## rarefying sample HM.S.3.M
## rarefying sample HM.S.4.B
## rarefying sample HM.S.5.B
## rarefying sample HM.S.6.B
## rarefying sample HM.W.1.M
## rarefying sample HM.W.2.M
## rarefying sample HM.W.3.M
## rarefying sample HM.W.4.B
## rarefying sample HM.W.5.B
## rarefying sample HM.W.6.B
## rarefying sample Hu.18.GM
## rarefying sample Hu.18.SM
## rarefying sample Hu.19.GM
## rarefying sample Hu.19.SM
## rarefying sample Hu.20.GM
## rarefying sample Hu.20.SM
## rarefying sample Hu.21.GM
## rarefying sample Hu.21.SM
## rarefying sample Hu.22.GM
## rarefying sample Hu.22.SM
## rarefying sample Hu.23.GM
## rarefying sample Hu.23.SM
## rarefying sample Hu.25.GM
## rarefying sample Hu.25.SM
## rarefying sample Hu.26.GM
## rarefying sample Hu.26.SM
## rarefying sample Hu.27.GM
## rarefying sample Hu.27.SM
## rarefying sample Hu.28.GM
## rarefying sample Hu.28.SM
## rarefying sample Hu.29.GM
## rarefying sample Hu.29.SM
## rarefying sample Hu.30.GM
## rarefying sample Hu.30.SM
## rarefying sample Hu.31.GM
## rarefying sample Hu.31.SM
## rarefying sample Hu.32.GM
## rarefying sample Hu.33.GM
## rarefying sample Hu.34.GM
## rarefying sample Hu.35.GM
## rarefying sample Hu.36.GM
## rarefying sample Hu.36.SM
## rarefying sample Hu.37.GM
## rarefying sample Hu.37.SM
## rarefying sample Hu.38.GM
## rarefying sample Hu.38.SM
## rarefying sample Hu.39.GM
## rarefying sample Hu.39.SM
## rarefying sample Hu.41.GM
## rarefying sample Hu.41.SM
## rarefying sample Hu.43.GM
## rarefying sample Hu.43.SM
## rarefying sample Hu.44.GM
## rarefying sample Hu.44.SM
## rarefying sample Hu.45.GB
## rarefying sample Hu.45.SB
## rarefying sample Hu.46.GB
## rarefying sample Hu.46.SB
## rarefying sample Hu.47.GB
## rarefying sample Hu.47.SB
## rarefying sample Hu.48.GB
## rarefying sample Hu.48.SB
## rarefying sample Hu.49.GB
## rarefying sample Hu.49.SB
## rarefying sample Hu.50.GB
## rarefying sample Hu.50.SB
## rarefying sample Hu.51.GB
## rarefying sample Hu.51.SB
## rarefying sample Hu.52.GB
## rarefying sample Hu.52.SB
## rarefying sample Hu.53.GB
## rarefying sample Hu.53.SB
## rarefying sample Hu.54.GB
## rarefying sample Hu.54.SB
## rarefying sample Hu.55.GB
## rarefying sample Hu.55.SB
## rarefying sample Hu.56.GB
## rarefying sample Hu.56.SB
## rarefying sample Hu.57.GB
## rarefying sample Hu.57.SB
## rarefying sample Hu.58.GB
## rarefying sample Hu.58.SB
## rarefying sample Hu.59.GB
## rarefying sample Hu.59.SB
## rarefying sample Hu.60.GB
## rarefying sample Hu.60.SB
## rarefying sample Hu.61.GB
## rarefying sample Hu.61.SB
## rarefying sample Hu.63.GB
## rarefying sample Hu.63.SB
## rarefying sample Hu.64.GB
## rarefying sample Hu.64.SB
## rarefying sample Hu.66.GB
## rarefying sample Hu.66.SB
## rarefying sample Hu.67.GB
## rarefying sample Hu.67.SB
## rarefying sample Hu.68.GB
## rarefying sample Hu.68.SB
## rarefying sample Hu.69.GB
## rarefying sample Hu.69.SB
## rarefying sample Hu.70.GB
## rarefying sample Hu.70.SB
## rarefying sample Hu.71.GB
## rarefying sample Hu.71.SB
## rarefying sample Hu10.1.biof.6
## rarefying sample Hu10.1gb
## rarefying sample Hu10.1sb
## rarefying sample Hu10.2.biof.6
## rarefying sample Hu10.3.biof.6
## rarefying sample Hu11.1.biof.7
## rarefying sample Hu11.1gb
## rarefying sample Hu11.1sb
## rarefying sample Hu11.2.biof.7
## rarefying sample Hu11.3.biof.7
## rarefying sample Hu12.1.biof.8
## rarefying sample Hu12.1gb
## rarefying sample Hu12.1sb
## rarefying sample Hu12.2.biof.8
## rarefying sample Hu12.3.biof.8
## rarefying sample Hu13.1.biof.9
## rarefying sample Hu13.1gb
## rarefying sample Hu13.1sb
## rarefying sample Hu13.2.biof.9
## rarefying sample Hu13.3.biof.9
## rarefying sample Hu14.1gb
## rarefying sample Hu14.1sb
## rarefying sample Hu15.1.biof.11
## rarefying sample Hu15.2.biof.11
## rarefying sample Hu15.3.biof.11
## rarefying sample Hu15.3gb
## rarefying sample Hu15.3sb
## rarefying sample Hu16.1.biof.12
## rarefying sample Hu16.2.biof.12
## rarefying sample Hu16.3.biof.12
## rarefying sample Hu16.3gb
## rarefying sample Hu16.3sb
## rarefying sample Hu17.3gb
## rarefying sample Hu17.3sb
## rarefying sample Hu3.1gb
## rarefying sample Hu3.1sb
## rarefying sample Hu4.1gb
## rarefying sample Hu4.1sb
## rarefying sample Hu5.1.biof.3
## rarefying sample Hu5.1gb
## rarefying sample Hu5.1sb
## rarefying sample Hu5.2.biof.3
## rarefying sample Hu5.3.biof.3
## rarefying sample Hu6.1gb
## rarefying sample Hu6.1sb
## rarefying sample Hu7.1.biof.4
## rarefying sample Hu7.1gb
## rarefying sample Hu7.1sb
## rarefying sample Hu7.2.biof.4
## rarefying sample Hu7.3.biof.4
## rarefying sample Hu8.1.biof.5
## rarefying sample Hu8.1gb
## rarefying sample Hu8.1sb
## rarefying sample Hu8.2.biof.5
## rarefying sample Hu8.3.biof.5
## rarefying sample Hu9.1gb
## rarefying sample Hu9.1sb
## rarefying sample Lo.2.4.GB
## rarefying sample Lo.2.5.GB
## rarefying sample Lo.2.7.GB
## rarefying sample Lo.2.8.GB
## rarefying sample Lo.2.9.GB
## rarefying sample Lo.5.1.GM
## rarefying sample Lo.5.4.GB
## rarefying sample Lo.5.5.GB
## rarefying sample Lo.9.2.GB
## rarefying sample Lo2.1gb
## rarefying sample Lo2.2gb
## rarefying sample Lo2.3gb
## rarefying sample Lo4.1gb
## rarefying sample Lo4.2gb
## rarefying sample Lo8.1gb
## rarefying sample Lo8.2gb
## rarefying sample Lo8.3gb
## rarefying sample Lo8.4gb
## rarefying sample Lo8.5gb
## rarefying sample NP.S.1.M
## rarefying sample NP.S.2.M
## rarefying sample NP.S.3.M
## rarefying sample NP.S.4.B
## rarefying sample NP.S.5.B
## rarefying sample NP.S.6.B
## rarefying sample NP.W.1.M
## rarefying sample NP.W.2.M
## rarefying sample NP.W.3.M
## rarefying sample NP.W.4.B
## rarefying sample NP.W.5.B
## rarefying sample NP.W.6.B
## rarefying sample Oy.13.7.GB
## rarefying sample Oy.15.4.GM
## rarefying sample Oy.15.5.GM
## rarefying sample Oy.15.6.GB
## rarefying sample Oy.15.8.GB
## rarefying sample R1s1b
## rarefying sample R1s2b
## rarefying sample R1s3b
## rarefying sample R1w1b
## rarefying sample R1w2b
## rarefying sample R1w3b
## rarefying sample R2s1b
## rarefying sample R2s2b
## rarefying sample R2s3b
## rarefying sample R2w1b
## rarefying sample R2w2b
## rarefying sample R2w3b
## rarefying sample R3s1b
## rarefying sample R3s2b
## rarefying sample R3s3b
## rarefying sample R3w1b
## rarefying sample R3w2b
## rarefying sample R3w3b
## rarefying sample R4s1b
## rarefying sample R4s2b
## rarefying sample R4s3b
## rarefying sample R4w1b
## rarefying sample R4w2b
## rarefying sample R4w3b
## rarefying sample Rivs1b
## rarefying sample Rivs2b
## rarefying sample Rivs3b
## rarefying sample Rivw1b
## rarefying sample Rivw2b
## rarefying sample Rivw3b
## rarefying sample Sq.11.1.GM
## rarefying sample Sq.11.2.GM
## rarefying sample Sq.11.4.GB
## rarefying sample Sq.11.5.GB
## rarefying sample U1s1b
## rarefying sample U1s2b
## rarefying sample U1s3b
## rarefying sample U1w1b
## rarefying sample U1w2b
## rarefying sample U1w3b
## rarefying sample U2s1b
## rarefying sample U2s2b
## rarefying sample U2s3b
## rarefying sample U2w1b
## rarefying sample U2w2b
## rarefying sample U2w3b
## rarefying sample U3s1b
## rarefying sample U3s2b
## rarefying sample U3s3b
## rarefying sample U3w1b
## rarefying sample U3w2b
## rarefying sample U3w3b
## rarefying sample U4s1b
## rarefying sample U4s2b
## rarefying sample U4s3b
## rarefying sample U4w1b
## rarefying sample U4w2b
## rarefying sample U4w3b
#check Zhang & Huang's coverage estimator
library(entropart)
Coverage_estimator_rarefied <- Coverage(ps_Nem_rarefy@otu_table, Estimator = "Best", Level = NULL, CheckArguments = TRUE)
Coverage_estimator_rarefied
## ZhangHuang
## 0.9792839
#Remove unnecessary samples
#ps_Nem_rarefy <- subset_samples(ps_Nem_rarefy, !(sample_names(ps_Nem_rarefy) %in% c("R1w2b", "JCA.U2S2B", "U1w2b")))
#Check sample size
dim(ps_Nem_rarefy@otu_table)
## [1] 379 70796
PathoHuman.txt
(of Nem data)PathoHuman_Nem_TAX1 <- read.table ("/Users/qcvp/R/outputs/dada2_total/asv_table/PathoHuman.txt", sep="\t", row.names=1)
head(PathoHuman_Nem_TAX1, 1)
## V2 V3 V4 V5
## ASV1 MG592701.1.1238 100 370 0
## V6
## ASV1 TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGACGCTCAACGTCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCACAAAGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGAGCGCAACTGACGCTGAAGCTCGAAAGTGCGGGTATCGAACAGGATTAGATACCCTGGTAGTCCGCACGGTAAACGATGGATGCCCGCTGTTGGTCTGAATAGGTCAGCGGCCAAGCGAAAGCATTAAGCATCCCACCTGGGGAGTACGCCGGCAACGGTG
## V7 V8 V9 V10
## ASV1 1 370 442 811
## V11
## ASV1 MG592701.1.1238 Bacteria;Bacteroidota;Bacteroidia;Bacteroidales;Prevotellaceae;Prevotella_9;Prevotella copri
## V12
## ASV1 100
Add names to each column of the table
colnames(PathoHuman_Nem_TAX1) <- c("sseqid", "pident", "length","mismatch", "qseq",
"qstart", "qend", "sstart","send","stitle", "qcovs")
head(PathoHuman_Nem_TAX1, 1)
## sseqid pident length mismatch
## ASV1 MG592701.1.1238 100 370 0
## qseq
## ASV1 TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGACGCTCAACGTCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCACAAAGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGAGCGCAACTGACGCTGAAGCTCGAAAGTGCGGGTATCGAACAGGATTAGATACCCTGGTAGTCCGCACGGTAAACGATGGATGCCCGCTGTTGGTCTGAATAGGTCAGCGGCCAAGCGAAAGCATTAAGCATCCCACCTGGGGAGTACGCCGGCAACGGTG
## qstart qend sstart send
## ASV1 1 370 442 811
## stitle
## ASV1 MG592701.1.1238 Bacteria;Bacteroidota;Bacteroidia;Bacteroidales;Prevotellaceae;Prevotella_9;Prevotella copri
## qcovs
## ASV1 100
Separating multiple information column to our desire columns
PathoHuman_Nem_TAX2 <- separate(data = PathoHuman_Nem_TAX1, col = stitle,
into = c("ACC_Kingdom", "Phylum",
"Class", "Order", "Family", "Genus_Species"), sep = ";", extra = "merge")
head(PathoHuman_Nem_TAX2, 1)
## sseqid pident length mismatch
## ASV1 MG592701.1.1238 100 370 0
## qseq
## ASV1 TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGACGCTCAACGTCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCACAAAGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGAGCGCAACTGACGCTGAAGCTCGAAAGTGCGGGTATCGAACAGGATTAGATACCCTGGTAGTCCGCACGGTAAACGATGGATGCCCGCTGTTGGTCTGAATAGGTCAGCGGCCAAGCGAAAGCATTAAGCATCCCACCTGGGGAGTACGCCGGCAACGGTG
## qstart qend sstart send ACC_Kingdom Phylum Class
## ASV1 1 370 442 811 MG592701.1.1238 Bacteria Bacteroidota Bacteroidia
## Order Family Genus_Species qcovs
## ASV1 Bacteroidales Prevotellaceae Prevotella_9;Prevotella copri 100
PathoHuman_Nem_TAX3 <- separate(data = PathoHuman_Nem_TAX2, col = Genus_Species, into = c("Genus", "Species"), sep = ";", extra = "merge")
head(PathoHuman_Nem_TAX3, 1)
## sseqid pident length mismatch
## ASV1 MG592701.1.1238 100 370 0
## qseq
## ASV1 TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGACGCTCAACGTCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCACAAAGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGAGCGCAACTGACGCTGAAGCTCGAAAGTGCGGGTATCGAACAGGATTAGATACCCTGGTAGTCCGCACGGTAAACGATGGATGCCCGCTGTTGGTCTGAATAGGTCAGCGGCCAAGCGAAAGCATTAAGCATCCCACCTGGGGAGTACGCCGGCAACGGTG
## qstart qend sstart send ACC_Kingdom Phylum Class
## ASV1 1 370 442 811 MG592701.1.1238 Bacteria Bacteroidota Bacteroidia
## Order Family Genus Species qcovs
## ASV1 Bacteroidales Prevotellaceae Prevotella_9 Prevotella copri 100
PathoHuman_Nem_TAX4 <- separate(data = PathoHuman_Nem_TAX3, col = ACC_Kingdom, into = c("ACC", "Kingdom"), sep = "\\s")
head(PathoHuman_Nem_TAX4, 1)
## sseqid pident length mismatch
## ASV1 MG592701.1.1238 100 370 0
## qseq
## ASV1 TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGACGCTCAACGTCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCACAAAGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGAGCGCAACTGACGCTGAAGCTCGAAAGTGCGGGTATCGAACAGGATTAGATACCCTGGTAGTCCGCACGGTAAACGATGGATGCCCGCTGTTGGTCTGAATAGGTCAGCGGCCAAGCGAAAGCATTAAGCATCCCACCTGGGGAGTACGCCGGCAACGGTG
## qstart qend sstart send ACC Kingdom Phylum Class
## ASV1 1 370 442 811 MG592701.1.1238 Bacteria Bacteroidota Bacteroidia
## Order Family Genus Species qcovs
## ASV1 Bacteroidales Prevotellaceae Prevotella_9 Prevotella copri 100
PathoHuman_Nem_TAX5 <- separate(data = PathoHuman_Nem_TAX4, col = Species, into = c("Genus", "Species"), sep = " ", extra = "merge" )
head(PathoHuman_Nem_TAX5, 1)
## sseqid pident length mismatch
## ASV1 MG592701.1.1238 100 370 0
## qseq
## ASV1 TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGACGCTCAACGTCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCACAAAGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGAGCGCAACTGACGCTGAAGCTCGAAAGTGCGGGTATCGAACAGGATTAGATACCCTGGTAGTCCGCACGGTAAACGATGGATGCCCGCTGTTGGTCTGAATAGGTCAGCGGCCAAGCGAAAGCATTAAGCATCCCACCTGGGGAGTACGCCGGCAACGGTG
## qstart qend sstart send ACC Kingdom Phylum Class
## ASV1 1 370 442 811 MG592701.1.1238 Bacteria Bacteroidota Bacteroidia
## Order Family Genus Species qcovs
## ASV1 Bacteroidales Prevotellaceae Prevotella copri 100
PathoHuman_Nem_TAX6 <- separate(data = PathoHuman_Nem_TAX5, col = Species, into = c("Species", "Suplement_info"), sep =" ", extra = "merge")
head(PathoHuman_Nem_TAX6, 1)
## sseqid pident length mismatch
## ASV1 MG592701.1.1238 100 370 0
## qseq
## ASV1 TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGACGCTCAACGTCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCACAAAGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGAGCGCAACTGACGCTGAAGCTCGAAAGTGCGGGTATCGAACAGGATTAGATACCCTGGTAGTCCGCACGGTAAACGATGGATGCCCGCTGTTGGTCTGAATAGGTCAGCGGCCAAGCGAAAGCATTAAGCATCCCACCTGGGGAGTACGCCGGCAACGGTG
## qstart qend sstart send ACC Kingdom Phylum Class
## ASV1 1 370 442 811 MG592701.1.1238 Bacteria Bacteroidota Bacteroidia
## Order Family Genus Species Suplement_info qcovs
## ASV1 Bacteroidales Prevotellaceae Prevotella copri <NA> 100
Filter the bacterial pathogens with coverage score = 100, length >= 239bp
PathoHuman_Nem_TAX_filtered <- PathoHuman_Nem_TAX6[PathoHuman_Nem_TAX6$qcovs == 100
& PathoHuman_Nem_TAX6$length > 239
& PathoHuman_Nem_TAX6$Kingdom == "Bacteria",]
head(PathoHuman_Nem_TAX_filtered, 1)
## sseqid pident length mismatch
## ASV1 MG592701.1.1238 100 370 0
## qseq
## ASV1 TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGACGCTCAACGTCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCACAAAGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGAGCGCAACTGACGCTGAAGCTCGAAAGTGCGGGTATCGAACAGGATTAGATACCCTGGTAGTCCGCACGGTAAACGATGGATGCCCGCTGTTGGTCTGAATAGGTCAGCGGCCAAGCGAAAGCATTAAGCATCCCACCTGGGGAGTACGCCGGCAACGGTG
## qstart qend sstart send ACC Kingdom Phylum Class
## ASV1 1 370 442 811 MG592701.1.1238 Bacteria Bacteroidota Bacteroidia
## Order Family Genus Species Suplement_info qcovs
## ASV1 Bacteroidales Prevotellaceae Prevotella copri <NA> 100
Creating a table with ASV IDs and taxonomy of human pathogens
Patho_Hu_vec <- row.names(PathoHuman_Nem_TAX_filtered)
#Only select taxo cols
Patho_TAX_Hu <- as.matrix(PathoHuman_Nem_TAX_filtered[,c(11:17)])
Patho_TAX_Hu = tax_table(Patho_TAX_Hu)
#Adding ASV sorted by ACC col to new dataframe
row.names(Patho_TAX_Hu) <- Patho_Hu_vec
head(Patho_TAX_Hu, 5)
## Taxonomy Table: [5 taxa by 7 taxonomic ranks]:
## Kingdom Phylum Class Order
## ASV1 "Bacteria" "Bacteroidota" "Bacteroidia" "Bacteroidales"
## ASV7 "Bacteria" "Proteobacteria" "Gammaproteobacteria" "Enterobacterales"
## ASV8 "Bacteria" "Firmicutes" "Bacilli" "Lactobacillales"
## ASV10 "Bacteria" "Firmicutes" "Bacilli" "Staphylococcales"
## ASV13 "Bacteria" "Firmicutes" "Clostridia" "Oscillospirales"
## Family Genus Species
## ASV1 "Prevotellaceae" "Prevotella" "copri"
## ASV7 "Vibrionaceae" "Photobacterium" "damselae"
## ASV8 "Listeriaceae" "Listeria" "welshimeri"
## ASV10 "Staphylococcaceae" "Staphylococcus" "warneri"
## ASV13 "Ruminococcaceae" "Faecalibacterium" "prausnitzii"
Extracting samples
# Extract samples names
longitudinal_sample <- read.table("/Users/qcvp/R/outputs/dada2_total/asv_table/metadata_longitudinal.txt", header = TRUE, row.names = 1, sep="\t")
rownames(longitudinal_sample) <- gsub("-",".",rownames(longitudinal_sample))
longitudinal_sample_names <- rownames(longitudinal_sample)
# Absolute abundance
physeq_Nem_long <- subset_samples(physeq_Nem_5, sample_names(physeq_Nem_5) %in% longitudinal_sample_names)
physeq_Nem_long
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 109183 taxa and 48 samples ]
## sample_data() Sample Data: [ 48 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 109183 taxa by 6 taxonomic ranks ]
## refseq() DNAStringSet: [ 109183 reference sequences ]
# Relative abundance
physeq_Nem_long_rel <- subset_samples(physeq_Nem_rel, sample_names(physeq_Nem_rel) %in% longitudinal_sample_names)
physeq_Nem_long_rel
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 109183 taxa and 48 samples ]
## sample_data() Sample Data: [ 48 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 109183 taxa by 6 taxonomic ranks ]
## refseq() DNAStringSet: [ 109183 reference sequences ]
Raw dataset
Nem_raw_ASV <- data.frame(physeq_Nem_long@otu_table@.Data)
Nem_raw_hu_patho_ASV <- Nem_raw_ASV[,colnames(Nem_raw_ASV) %in% Patho_Hu_vec]
# Create new physeq object
Nem_hu_unrarefied <- phyloseq(otu_table(Nem_raw_hu_patho_ASV, taxa_are_rows = FALSE),
sample_data(context),
tax_table(as.matrix(Patho_TAX_Hu)))
Nem_hu_unrarefied
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 600 taxa and 48 samples ]
## sample_data() Sample Data: [ 48 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 600 taxa by 7 taxonomic ranks ]
Relative dataset
Nem_raw_unrarefied_ASV_rel <- data.frame(physeq_Nem_long_rel@otu_table@.Data)
Nem_raw_unrarefied_hu_patho_ASV_rel <- Nem_raw_unrarefied_ASV_rel
#Create new physeq object
Nem_hu_unrarefied_rel <- phyloseq(otu_table(Nem_raw_unrarefied_hu_patho_ASV_rel, taxa_are_rows = FALSE),
sample_data(context),
tax_table(as.matrix(Patho_TAX_Hu)))
Nem_hu_unrarefied_rel
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 600 taxa and 48 samples ]
## sample_data() Sample Data: [ 48 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 600 taxa by 7 taxonomic ranks ]
Prepare data for bacterial composition analysis
# Melting absolute abundance
Melted_Nem_hu_unrarefied_species <- Nem_hu_unrarefied %>%
tax_glom(taxrank = "Species") %>% # agglomerate at rank level
psmelt() %>% # Melt to long format
filter(Abundance > 0) %>% # Filter out absent taxa reads
arrange(Species) # Sort data frame alphabetically by family
# Melting Relative Abundance
Melted_Nem_hu_unrarefied_species_rel <- Nem_hu_unrarefied_rel %>%
tax_glom(taxrank = "Species") %>% # agglomerate at rank level
psmelt() %>% # Melt to long format
filter(Abundance > 0) %>% # Filter out absent taxa reads
arrange(Species)
# Adding pathogen column for pie chart
#physeq_whole_Nem_species$Pathogen_type <- ifelse(physeq_whole_Nem_species$OTU %in% Patho_Hu_vec, physeq_whole_Nem_species$Biosample, "Other")
#physeq_whole_Nem_species$Pathogen <- ifelse(physeq_whole_Nem_species$OTU %in% Patho_Hu_vec, physeq_whole_Nem_species$Class, "Other")
Extrat samples occur in each part of Venn diagram
# Sample occur in each Biosample
animal_sample <- subset(Melted_Nem_hu_unrarefied_species, Biosample == "Animals")
Envi_sample <- subset(Melted_Nem_hu_unrarefied_species, Biosample == "Environment")
Human_sample <- subset(Melted_Nem_hu_unrarefied_species, Biosample == "Human")
#Extract list of ASV for each components
list_ASV_all <- Reduce(intersect, list(animal_sample$OTU, Envi_sample$OTU, Human_sample$OTU)) #ASV occur in all biosample types
list_ASV_all
## [1] "ASV1" "ASV145" "ASV7" "ASV383" "ASV182" "ASV121" "ASV263" "ASV107"
## [9] "ASV279" "ASV261" "ASV260" "ASV115" "ASV112" "ASV40" "ASV13" "ASV48"
## [17] "ASV74" "ASV32" "ASV391" "ASV108" "ASV18" "ASV106" "ASV10" "ASV110"
## [25] "ASV663"
list_ASV_EA <- setdiff(intersect(Envi_sample$OTU,animal_sample$OTU), list_ASV_all) #ASV occur in both Environment and animal but not in Human
list_ASV_EA
## character(0)
list_ASV_EH <- setdiff(intersect(Envi_sample$OTU,Human_sample$OTU), list_ASV_all) #ASV occur in both Environment and human but not in animal
list_ASV_EH
## [1] "ASV28" "ASV172" "ASV245" "ASV309" "ASV1192" "ASV2582" "ASV185"
## [8] "ASV811" "ASV348" "ASV38" "ASV2330" "ASV720" "ASV1706" "ASV2156"
## [15] "ASV817" "ASV844" "ASV9796" "ASV1271"
list_ASV_AH <- setdiff(intersect(animal_sample$OTU,Human_sample$OTU), list_ASV_all) #ASV occur in both human and animal but not in environment
list_ASV_AH
## [1] "ASV1276" "ASV72" "ASV57955" "ASV431" "ASV496" "ASV191"
## [7] "ASV7760" "ASV1190" "ASV950" "ASV223" "ASV522" "ASV1842"
## [13] "ASV737" "ASV70" "ASV9460"
list_ASV_A <- Reduce(setdiff, list(animal_sample$OTU, list_ASV_EA, list_ASV_AH, list_ASV_all)) # ASV occur in only Animal
list_ASV_A
## [1] "ASV73" "ASV4092" "ASV24639" "ASV2427" "ASV6340" "ASV738"
## [7] "ASV547" "ASV943" "ASV6423" "ASV9725" "ASV679" "ASV1533"
## [13] "ASV23547" "ASV613" "ASV53236" "ASV640"
list_ASV_H <- Reduce(setdiff, list(Human_sample$OTU, list_ASV_EH, list_ASV_AH, list_ASV_all)) # ASV occur in only Human
list_ASV_H
## [1] "ASV3220" "ASV316" "ASV1088" "ASV4028" "ASV1334" "ASV7426"
## [7] "ASV6858" "ASV5245" "ASV426" "ASV7427" "ASV6337" "ASV2934"
## [13] "ASV4500" "ASV6368" "ASV1740" "ASV167" "ASV783" "ASV6880"
## [19] "ASV1991" "ASV5244" "ASV1349" "ASV2167" "ASV1539" "ASV398"
## [25] "ASV16681" "ASV2815" "ASV3654" "ASV265" "ASV2983" "ASV2070"
## [31] "ASV612" "ASV10033" "ASV826" "ASV1640" "ASV9787" "ASV2524"
## [37] "ASV2790" "ASV14047" "ASV4272" "ASV3456" "ASV7610" "ASV2092"
## [43] "ASV1237" "ASV1391" "ASV53743" "ASV2996" "ASV23530" "ASV6711"
## [49] "ASV718" "ASV5981" "ASV2893" "ASV32069" "ASV6765" "ASV6175"
## [55] "ASV1582" "ASV1050" "ASV22648" "ASV411" "ASV1177" "ASV15653"
## [61] "ASV13593" "ASV3237" "ASV2062" "ASV8852" "ASV342"
list_ASV_E <- Reduce(setdiff, list(Envi_sample$OTU, list_ASV_EH, list_ASV_EA, list_ASV_all)) # ASV occur in only Environment
list_ASV_E
## [1] "ASV5193" "ASV8778" "ASV5532" "ASV2891" "ASV123" "ASV1435"
## [7] "ASV16429" "ASV2649" "ASV5502" "ASV11514" "ASV34362" "ASV2204"
## [13] "ASV14165"
# Checking
length(unique(Melted_Nem_hu_unrarefied_species$OTU))
## [1] 152
length(list_ASV_E) + length(list_ASV_H) + length(list_ASV_A) + length(list_ASV_all) + length(list_ASV_EA) + length(list_ASV_EH) + length(list_ASV_AH)
## [1] 152
Relative abundance in all biosample
# Extract samples reads and add metadata
longitudinan_sample_rel <- as.data.frame(rowSums(Nem_hu_unrarefied_rel@otu_table@.Data))
longitudinan_sample_rel <- cbind(longitudinan_sample_rel, physeq_Nem_long@sam_data)
custom_colors <- c( "#CBD588", "#5F7FC7","#DA5724", "#508578", "#CD9BCD",
"#AD6F3B", "#673770","#D14285", "#652926", "#C84248",
"#8569D5", "#5E738F","#D1A33D", "#8A7C64", "#599861", "black", "red", "green")
a <- ggplot(longitudinan_sample_rel, aes(x = Biosample, y = rowSums(Nem_hu_unrarefied_rel@otu_table@.Data), color = Biosample)) +
geom_jitter(position = position_jitter(width = .20), alpha = 0.5, size = 3, aes(colour = Biosample), show.legend = TRUE) + theme_bw() +
geom_boxplot(aes(colour = Biosample), alpha=0.1, outlier.colour = NA) +
theme(axis.title.x = element_blank(),
legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 13),
strip.text = element_text(size = 18),
axis.title.y = element_text(size = 20),
axis.text.y = element_text(size=20),
axis.text.x = element_blank()) +
ylab("Relative Abundance") +
facet_wrap(~Date, nrow=1, scales = "free_x") + scale_fill_manual(values = custom_colors)
a
Calculate sample reads
# prepare data for bacterial composition
all_sample <- subset(Melted_Nem_hu_unrarefied_species, OTU %in% list_ASV_all)
EA_sample <- subset(Melted_Nem_hu_unrarefied_species, OTU %in% list_ASV_EA)
EH_sample <- subset(Melted_Nem_hu_unrarefied_species, OTU %in% list_ASV_EH)
AH_sample <- subset(Melted_Nem_hu_unrarefied_species, OTU %in% list_ASV_AH)
A_sample <- subset(Melted_Nem_hu_unrarefied_species, OTU %in% list_ASV_A)
E_sample <- subset(Melted_Nem_hu_unrarefied_species, OTU %in% list_ASV_E)
H_sample <- subset(Melted_Nem_hu_unrarefied_species, OTU %in% list_ASV_H)
# calculate read
colSums(as.data.frame(all_sample[,3]))
## all_sample[, 3]
## 510804
colSums(as.data.frame(E_sample[,3]))
## E_sample[, 3]
## 165
colSums(as.data.frame(EA_sample[,3]))
## EA_sample[, 3]
## 0
colSums(as.data.frame(EH_sample[,3]))
## EH_sample[, 3]
## 31450
colSums(as.data.frame(AH_sample[,3]))
## AH_sample[, 3]
## 27393
colSums(as.data.frame(H_sample[,3]))
## H_sample[, 3]
## 12466
colSums(as.data.frame(A_sample[,3]))
## A_sample[, 3]
## 2474
Compositional distribution of pathogenic ASV (venn diagram)
# Number of ASV
x <- list(A = animal_sample$OTU, B = Envi_sample$OTU, C = Human_sample$OTU)
c1 <- ggVennDiagram(x,
category.names = c("Animal","Environment","Human"),
label = "count",) +
theme(legend.position = "none") +
scale_fill_gradient(low = "#F4FAFE", high = "#4981BF")
c1
Bacterial composition of each biosample(animal, human, environment) compositional bar chart
# Composition for whole community
c1 <- ggplot(Melted_Nem_hu_unrarefied_species_rel, aes(x = Biosample, fill = Class)) +
geom_bar(position ="fill") + scale_colour_brewer(palette="Set1") +
ylab("Whole bacterial composition") +
scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) + #remove the space below the 0 of the y axis in the graph
theme_bw() +
theme(legend.text = element_text(face = "italic")) +
theme(axis.title.x = element_blank(),
axis.text.x = element_text(angle=30, colour = "black", vjust=1, hjust = 1, size=10),
axis.ticks.x = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(), #remove major-grid labels
panel.grid.minor = element_blank()) #remove minor-grid labels
c1
#Composition for ASV in Enviroment and Animal
#Composition for ASV in Enviroment and Human
c2 <- ggplot(EH_sample, aes(x = "", fill = Genus)) +
geom_bar(position ="fill") + scale_colour_brewer(palette="Set1") +
ylab("Whole bacterial composition") +
scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) + #remove the space below the 0 of the y axis in the graph
theme_bw() +
theme(legend.text = element_text(face = "italic")) +
xlab("ASV in Environment and Human only")+
theme(axis.text.x = element_text(angle=30, colour = "black", vjust=1, hjust = 1, size=10),
axis.ticks.x = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(), #remove major-grid labels
panel.grid.minor = element_blank()) #remove minor-grid labels
c2
#Composition for ASV in Human and Animal
c3 <- ggplot(AH_sample, aes(x = "", fill = Genus)) +
geom_bar(position ="fill") + scale_colour_brewer(palette="Set1") +
ylab("Whole bacterial composition") +
scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) + #remove the space below the 0 of the y axis in the graph
theme_bw() +
xlab("ASV in Human and Animal only")+
theme(legend.text = element_text(face = "italic")) +
theme(axis.text.x = element_text(angle=30, colour = "black", vjust=1, hjust = 1, size=10),
axis.ticks.x = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(), #remove major-grid labels
panel.grid.minor = element_blank()) #remove minor-grid labels
c3
#Composition for ASV in Enviroment only
c4 <- ggplot(E_sample, aes(x = "", fill = Genus)) +
geom_bar(position ="fill") + scale_colour_brewer(palette="Set1") +
ylab("Whole bacterial composition") +
scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) + #remove the space below the 0 of the y axis in the graph
theme_bw() +
theme(legend.text = element_text(face = "italic")) +
xlab("ASV in Environment only")+
theme(axis.text.x = element_text(angle=30, colour = "black", vjust=1, hjust = 1, size=10),
axis.ticks.x = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(), #remove major-grid labels
panel.grid.minor = element_blank()) #remove minor-grid labels
c4
#Composition for ASV in Animal only
c5 <- ggplot(A_sample, aes(x = "", fill = Genus)) +
geom_bar(position ="fill") + scale_colour_brewer(palette="Set1") +
ylab("Whole bacterial composition") +
scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) + #remove the space below the 0 of the y axis in the graph
theme_bw() +
theme(legend.text = element_text(face = "italic")) +
xlab("ASV in Animal only")+
theme(axis.text.x = element_text(angle=30, colour = "black", vjust=1, hjust = 1, size=10),
axis.ticks.x = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(), #remove major-grid labels
panel.grid.minor = element_blank()) #remove minor-grid labels
c5
#Composition for ASV in Human only
c6 <- ggplot(H_sample, aes(x = "", fill = Genus)) +
geom_bar(position ="fill") + scale_colour_brewer(palette="Set1") +
ylab("Whole bacterial composition") +
scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) + #remove the space below the 0 of the y axis in the graph
theme_bw() +
theme(legend.text = element_text(face = "italic")) +
xlab("ASV in Human only")+
theme(axis.text.x = element_text(angle=30, colour = "black", vjust=1, hjust = 1, size=10),
axis.ticks.x = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(), #remove major-grid labels
panel.grid.minor = element_blank()) #remove minor-grid labels
c6
#Composition for ASV in all 3 biosamples
c7 <- ggplot(all_sample, aes(x = "", fill = Genus)) +
geom_bar(position ="fill") + scale_colour_brewer(palette="Set1") +
xlab("ASV appear in all biosamples")+
ylab("Whole bacterial composition") +
scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) + #remove the space below the 0 of the y axis in the graph
theme_bw() +
theme(legend.text = element_text(face = "italic")) +
theme(axis.text.x = element_text(angle=30, colour = "black", vjust=1, hjust = 1, size=10),
axis.ticks.x = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(), #remove major-grid labels
panel.grid.minor = element_blank()) #remove minor-grid labels
c7