Custom scripts

Before analyzing, run the following codes: ## OTU to matrix

otu.matrix <- function(ps, force.taxa.cols = TRUE) {
  mat <- as(phyloseq::otu_table(ps), "matrix")
  if (taxa_are_rows(ps) & force.taxa.cols) {
    mat <- t(mat)
  }
  return(mat)
}

Scripts of ggrare

ggrare <- function(physeq, step = 10, label = NULL, color = NULL,
                   plot = TRUE, parallel = FALSE, se = TRUE) {
  
  require("ggplot2")
  
  x <- as(phyloseq::otu_table(physeq), "matrix")
  if (phyloseq::taxa_are_rows(physeq)) x <- t(x)
  ## This script is adapted from vegan `rarecurve` function
  tot <- rowSums(x)
  S <- rowSums(x > 0)
  nr <- nrow(x)
  
  rarefun <- function(i) {
    cat(paste("rarefying sample", rownames(x)[i]), sep = "\n")
    n <- seq(1, tot[i], by = step)
    if (n[length(n)] != tot[i]) {
      n <- c(n, tot[i])
    }
    y <- vegan::rarefy(x[i, ,drop = FALSE], n, se = se)
    if (nrow(y) != 1) {
      rownames(y) <- c(".S", ".se")
      return(data.frame(t(y), Size = n, Sample = rownames(x)[i]))
    } else {
      return(data.frame(.S = y[1, ], Size = n, Sample = rownames(x)[i]))
    }
  }
  
  if (parallel) {
    out <- parallel::mclapply(seq_len(nr), rarefun, mc.preschedule = FALSE)
  } else {
    out <- lapply(seq_len(nr), rarefun)
  }
  
  df <- do.call(rbind, out)
  
  ## Get sample data 
  if (!is.null(phyloseq::sample_data(physeq, FALSE))) {
    sdf <- as(phyloseq::sample_data(physeq), "data.frame")
    sdf$Sample <- rownames(sdf)
    data <- merge(df, sdf, by = "Sample")
    labels <- data.frame(x = tot, y = S, Sample = rownames(x))
    labels <- merge(labels, sdf, by = "Sample")
  }
  
  ## Add, any custom-supplied plot-mapped variables
  if (length(color) > 1) {
    data$color <- color
    names(data)[names(data) == "color"] <- deparse(substitute(color))
    color <- deparse(substitute(color))
  }
  
  if (length(label) > 1) {
    labels$label <- label
    names(labels)[names(labels) == "label"] <- deparse(substitute(label))
    label <- deparse(substitute(label))
  }
  
  p <- ggplot(data = data,
              aes_string(x = "Size", y = ".S",
                         group = "Sample", color = color)) +
    labs(x = "Sample Size", y = "Species Richness")
  
  if (!is.null(label)) {
    p <- p + geom_text(data = labels,
                       aes_string(x = "x", y = "y",
                                  label = label, color = color),
                       size = 4, hjust = 0)
  }
  
  p <- p + geom_line()
  
  if (se) { ## add standard error if available
    p <- p +
      geom_ribbon(aes_string(ymin = ".S - .se", ymax = ".S + .se",
                             color = NULL, fill = color),
                  alpha = 0.2)
  }
  
  if (plot) {
    plot(p)
  }
  
  invisible(p)
  
}

#' Check for normal distribution of alpha diversity measurements
#'
#'
#' @return Print ggplot rarefaction curves
#' @param rich A dataframe with alpha diversity indices as columns
#' and samples as rows as the output of pyloseq::estimate_richness()
#' @param file Output file name

indices_normality <- function(rich, nrow, ncol) {
  
  ### p-value < 0.05 means data failed normality test
  
  par(mfrow = c(nrow, ncol))
  
  for (i in names(rich)) {
    shap <- shapiro.test(rich[, i])
    qqnorm(rich[, i], main = i, sub = shap$p.value)
    qqline(rich[, i])
  }
  
  par(mfrow = c(1, 1))
}

Library loading

library("seqinr")
library(Biostrings)
library(dplyr)
library(phyloseq)
library(writexl)
library(tidyr)
library(ggplot2)
library(readr)
library(ggpubr)
library(reshape)
library(microbiomeMarker)
library(ANCOMBC)
library(tidyverse)
library(DT)
library(ggVennDiagram)
library(VennDiagram)    

Load context

Importing ASV table of Nem

asv_table <- read.table("/Users/qcvp/R/outputs/dada2_total/asv_table/asv_table.tsv", sep="\t", row.names=1, header=T)

Invert the ASV table in case sample IDs are on row not column

asv_inverted<-as.data.frame(t(asv_table))

Load the ASV sequences (previously created with DADA2)

asv_seq <- readDNAStringSet(file = "/Users/qcvp/R/outputs/dada2_total/asv_table/asv.fasta", 
                            format = "fasta",
                            nrec = -1L, 
                            skip = 0L, 
                            seek.first.rec = FALSE, 
                            use.names = TRUE)
head(asv_seq, 1)
## DNAStringSet object of length 1:
##     width seq                                               names               
## [1]   370 TACGGAAGGTCCGGGCGTTATCC...TGGGGAGTACGCCGGCAACGGTG ASV1

Load asv tax of Nem

asv_tax <- read.table("/Users/qcvp/R/outputs/dada2_total/asv_table/taxonomy.tsv", header = TRUE, row.names = 1, sep="\t")
asv_tax <- as.matrix(asv_tax)

head(asv_tax, 1)
##      Kingdom    Phylum         Class         Order           Family          
## ASV1 "Bacteria" "Bacteroidota" "Bacteroidia" "Bacteroidales" "Prevotellaceae"
##      Genus         
## ASV1 "Prevotella_9"

Import additional the metadata information of Nem

context <- read.table("/Users/qcvp/R/outputs/dada2_total/asv_table/metadata.txt", header = TRUE, row.names = 1, sep="\t")
rownames(context) <- gsub("-",".",rownames(context))
head(context)
##          Sequencing     Date Transect Station   Biosample Biosample_type
## BD.S.1.M Metagenome Jul_2023 Bich_Dam      BD Environment       Sediment
## BD.S.2.M Metagenome Jul_2023 Bich_Dam      BD Environment       Sediment
## BD.S.3.M Metagenome Jul_2023 Bich_Dam      BD Environment       Sediment
## BD.S.4.B  Barcoding Dec_2023 Bich_Dam      BD Environment       Sediment
## BD.S.5.B  Barcoding Dec_2023 Bich_Dam      BD Environment       Sediment
## BD.S.6.B  Barcoding Dec_2023 Bich_Dam      BD Environment       Sediment
##          Biosample_cluster   Host_type Host_subtype Host_genus Host_species
## BD.S.1.M          Sediment Environment     Sediment       <NA>         <NA>
## BD.S.2.M          Sediment Environment     Sediment       <NA>         <NA>
## BD.S.3.M          Sediment Environment     Sediment       <NA>         <NA>
## BD.S.4.B          Sediment Environment     Sediment       <NA>         <NA>
## BD.S.5.B          Sediment Environment     Sediment       <NA>         <NA>
## BD.S.6.B          Sediment Environment     Sediment       <NA>         <NA>
##          Species_ID
## BD.S.1.M       <NA>
## BD.S.2.M       <NA>
## BD.S.3.M       <NA>
## BD.S.4.B       <NA>
## BD.S.5.B       <NA>
## BD.S.6.B       <NA>
#Checking samples IDs uniformity 
setdiff(x = row.names(asv_inverted), y = row.names(context)) 
## character(0)

Making raw phyloseq objects

physeq_Nem <- phyloseq(
  otu_table(asv_inverted, taxa_are_rows = F), #if you have taxa on rows = T
  sample_data(context),
  refseq(asv_seq),
  tax_table(asv_tax)
)
physeq_Nem
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 111454 taxa and 412 samples ]
## sample_data() Sample Data:       [ 412 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 111454 taxa by 6 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 111454 reference sequences ]

Decontamination process

Remove MOCK and Control samples from the phyloseq object

physeq_Nem_1 <- subset_samples(physeq_Nem, !Biosample =="Mock")
physeq_Nem_1
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 111454 taxa and 412 samples ]
## sample_data() Sample Data:       [ 412 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 111454 taxa by 6 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 111454 reference sequences ]
physeq_Nem_2 <- subset_samples(physeq_Nem_1, Biosample != "Control")
physeq_Nem_2
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 111454 taxa and 403 samples ]
## sample_data() Sample Data:       [ 403 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 111454 taxa by 6 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 111454 reference sequences ]

Quick checking library size of each Biomsaple types

df <- as.data.frame(sample_data(physeq_Nem_2))  # Put sample_data into a ggplot-friendly data.frame
df$LibrarySize <- sample_sums(physeq_Nem_2)
df <- df[order(df$LibrarySize),]
df$Index <- seq(nrow(df))
ggplot(data=df, aes(x=Index, y=LibrarySize, color=as.factor(Biosample))) + 
  geom_point() + geom_text(aes(label=Index), hjust=2, nudge_y = 0.05)

Identification of contaminants - prevalence

Remove comtaminant ASVs

physeq_Nem_3 <- subset_taxa(physeq_Nem_2, !taxa_names(physeq_Nem) %in% c("ASV2", "ASV8", "ASV52", "ASV97", "ASV128", "ASV318", "ASV356", "ASV471", "ASV708", "ASV914", "ASV1101", "ASV1127", "ASV1915", "ASV1952", "ASV2892", "ASV3517", "ASV4859", "ASV9739", "ASV26448", "ASV30253"))
physeq_Nem_3
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 111434 taxa and 403 samples ]
## sample_data() Sample Data:       [ 403 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 111434 taxa by 6 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 111434 reference sequences ]

Known kit Contaminant detection

contaminant_genera <- read.table("/Users/qcvp/R/Contaminant.txt", header = T)
a <- contaminant_genera[, ]
physeq_Nem_4 <- subset_taxa(physeq_Nem_3, !Genus %in% a)
physeq_Nem_4
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 109183 taxa and 403 samples ]
## sample_data() Sample Data:       [ 403 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 109183 taxa by 6 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 109183 reference sequences ]

Removing samples with < 5000 reads

physeq_Nem_5 <- subset_samples(physeq_Nem_4, !sample_names(physeq_Nem_4) %in% c("Fi.4.3.GM", "Hu.35.SM","Sq.11.3.GB", "Fi.4.1.GB", "Hu.40.SM", "Oy.13.3.GM", "Fi.14.1.GM", "Hu.33.SM", "Hu.34.SM", "Hu.32.SM", "Fo.10.4.BM", "Fo.10.3.BM", "Fo.17.3.BM", "Fo.15.3.BM", "Fo.17.2.BM", "Oy.13.2.GB", "Fo.17.1.BM", "Fo.15.1.BM", "Fi.17.8.GM", "Fo.15.2.BM", "Oy.15.7.GB", "Lo.9.1.GB", "Lo.5.3.GB", "Lo.2.6.GB"))
physeq_Nem_5
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 109183 taxa and 379 samples ]
## sample_data() Sample Data:       [ 379 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 109183 taxa by 6 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 109183 reference sequences ]

Checking data

max(rowSums(physeq_Nem_5@otu_table))
## [1] 79551
min(rowSums(physeq_Nem_5@otu_table))
## [1] 6723
mean(rowSums(physeq_Nem_5@otu_table))
## [1] 41402.22

Plotting bar plot of reads

# Extract row sum of physeq_Nemt_3
OTU_asmatrix_3 <- otu.matrix(physeq_Nem_3, force.taxa.cols = TRUE)
sample_reads_3 <- rowSums(OTU_asmatrix_3)

# Extract row sum of physeq_Nem_4
OTU_asmatrix_4 <- otu.matrix(physeq_Nem_4, force.taxa.cols = TRUE)
sample_reads_4 <- rowSums(OTU_asmatrix_4)

#Combine
sample_reads_total <- cbind(sample_reads_4, sample_reads_3)


# Load data
Abs_abun_arg <- sample_reads_total
# Reshape the data to long format
  Abs_abun_arg_long <- melt(Abs_abun_arg, id.vars = "Sample.name", variable.name = "Runs", value.name = "Reads")
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
  Biosample_type <- physeq_Nem_4@sam_data$Biosample_type
  Abs_abun_arg_long <- cbind( Abs_abun_arg_long, Biosample_type)

#Calculate errors
Abs_abun_summary <- Abs_abun_arg_long %>%
  group_by(Biosample_type, X2) %>%
  summarise(
    Mean = mean(value, na.rm = TRUE),
    SD = sd(value, na.rm = TRUE))
Abs_abun_summary
## # A tibble: 12 × 4
## # Groups:   Biosample_type [6]
##    Biosample_type X2               Mean     SD
##    <chr>          <fct>           <dbl>  <dbl>
##  1 Biofilm        sample_reads_4 44370. 15625.
##  2 Biofilm        sample_reads_3 48793. 12809.
##  3 Gut            sample_reads_4 40302. 17142.
##  4 Gut            sample_reads_3 41953. 17625.
##  5 Sediment       sample_reads_4 31652. 12182.
##  6 Sediment       sample_reads_3 31951. 12768.
##  7 Skin           sample_reads_4 41408. 17116.
##  8 Skin           sample_reads_3 46206. 17859.
##  9 Water          sample_reads_4 45188. 10212.
## 10 Water          sample_reads_3 45539. 10277.
## 11 Wholebody      sample_reads_4 19216. 19883.
## 12 Wholebody      sample_reads_3 20532. 20549.
# Plot using ggplot2
# Plot for sediment data
ggplot(Abs_abun_summary, aes(x = Biosample_type, y = Mean, fill = X2)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(ymin =  Mean-SD, ymax = Mean+SD), width = 0.2, position = position_dodge(0.9)) +
  theme_minimal() +
  theme(legend.text = element_text(size = 15),
        legend.position = "top",
        plot.title = element_text(size = 20, face = "bold"),
        legend.title = element_text(size = 18),
        axis.title.x = element_blank(),
        axis.title.y = element_text(size = 18),
        axis.text.y = element_text(size = 15),
        strip.text = element_text(size = 15),
        axis.text.x = element_text(angle = 45, colour = "black", vjust = 1, hjust = 1, size = 15)) +
  labs(x = "biosample type", y = "number of read") +
  facet_grid( scales = "free")

Calculate relative abundance

#Tax 
whole_bacteria_ASV_TAX <- data.frame(physeq_Nem_5@tax_table)
ID <- rownames(whole_bacteria_ASV_TAX)
whole_bacteria_ASV_TAX <- cbind(ID=ID, whole_bacteria_ASV_TAX)

#Dataset with Relative abundance
physeq_Nem_rel <- transform_sample_counts(physeq_Nem_5, function(x) x / sum(x) )
rowSums(physeq_Nem_rel@otu_table@.Data)
##       BD.S.1.M       BD.S.2.M       BD.S.3.M       BD.S.4.B       BD.S.5.B 
##              1              1              1              1              1 
##       BD.S.6.B       BD.W.1.M       BD.W.2.M       BD.W.3.M       BD.W.4.B 
##              1              1              1              1              1 
##       BD.W.5.B       BD.W.6.B          Bds1b          Bds2b          Bds3b 
##              1              1              1              1              1 
##          Bdw1b          Bdw2b          Bdw3b   BioFilm.21.1   BioFilm.21.2 
##              1              1              1              1              1 
##   BioFilm.21.3       CR.S.1.M       CR.S.2.M       CR.S.3.M       CR.S.4.B 
##              1              1              1              1              1 
##       CR.S.5.B       CR.S.6.B       CR.W.1.M       CR.W.2.M       CR.W.3.M 
##              1              1              1              1              1 
##       CR.W.4.B       CR.W.5.B       CR.W.6.B          F1s1b          F1s2b 
##              1              1              1              1              1 
##          F1s3b          F1w1b          F1w2b          F1w3b          F2s1b 
##              1              1              1              1              1 
##          F2s2b          F2s3b          F2w1b          F2w2b          F2w3b 
##              1              1              1              1              1 
##          F3s1b          F3s2b          F3s3b          F3w1b          F3w2b 
##              1              1              1              1              1 
##          F3w3b          F4s1b          F4s2b          F4s3b          F4w1b 
##              1              1              1              1              1 
##          F4w2b          F4w3b     Fi.10.3.GM     Fi.10.4.GB     Fi.10.5.GB 
##              1              1              1              1              1 
##     Fi.10.6.GB    Fi.12.10.GB    Fi.12.11.GB    Fi.12.13.GB     Fi.12.4.GB 
##              1              1              1              1              1 
##     Fi.12.5.GB     Fi.12.6.GB     Fi.12.7.GB     Fi.12.8.GB     Fi.12.9.GB 
##              1              1              1              1              1 
##     Fi.13.1.GM     Fi.13.2.GB     Fi.13.3.GB     Fi.14.2.GM     Fi.14.5.GB 
##              1              1              1              1              1 
##     Fi.14.6.GB     Fi.18.1.GB     Fi.18.2.GB     Fi.18.3.GB     Fi.19.1.GB 
##              1              1              1              1              1 
##     Fi.20.1.GB     Fi.20.2.GB      Fi.3.6.GB      Fi.3.7.GB      Fi.4.4.GB 
##              1              1              1              1              1 
##      Fi.5.3.GB      Fi.5.4.GB      Fi.5.5.GB      Fi.6.4.GB      Fi.6.5.GB 
##              1              1              1              1              1 
##      Fi.6.6.GM      Fi.6.7.GB      Fi.6.8.GB      Fi.6.9.GB       Fi10.1gb 
##              1              1              1              1              1 
##       Fi10.2gb       Fi11.1gb       Fi11.2gb       Fi12.1gb       Fi12.2gb 
##              1              1              1              1              1 
##       Fi12.3gb        Fi3.1gb        Fi3.2gb        Fi3.3gb        Fi3.4gb 
##              1              1              1              1              1 
##        Fi3.5gb        Fi5.1gb        Fi5.2gb        Fi6.1gb        Fi6.2gb 
##              1              1              1              1              1 
##        Fi6.3gb        Fi7.1gb        Fi7.2gb        Fi7.3gb        Fi7.4gb 
##              1              1              1              1              1 
##        Fi7.5gb     Fo.17.5.BM      Fo.4.1.BM      Fo.4.2.BM      Fo.4.3.BM 
##              1              1              1              1              1 
##      Fo.4.4.BM      Fo.4.5.BM       HM.S.1.M       HM.S.2.M       HM.S.3.M 
##              1              1              1              1              1 
##       HM.S.4.B       HM.S.5.B       HM.S.6.B       HM.W.1.M       HM.W.2.M 
##              1              1              1              1              1 
##       HM.W.3.M       HM.W.4.B       HM.W.5.B       HM.W.6.B       Hu.18.GM 
##              1              1              1              1              1 
##       Hu.18.SM       Hu.19.GM       Hu.19.SM       Hu.20.GM       Hu.20.SM 
##              1              1              1              1              1 
##       Hu.21.GM       Hu.21.SM       Hu.22.GM       Hu.22.SM       Hu.23.GM 
##              1              1              1              1              1 
##       Hu.23.SM       Hu.25.GM       Hu.25.SM       Hu.26.GM       Hu.26.SM 
##              1              1              1              1              1 
##       Hu.27.GM       Hu.27.SM       Hu.28.GM       Hu.28.SM       Hu.29.GM 
##              1              1              1              1              1 
##       Hu.29.SM       Hu.30.GM       Hu.30.SM       Hu.31.GM       Hu.31.SM 
##              1              1              1              1              1 
##       Hu.32.GM       Hu.33.GM       Hu.34.GM       Hu.35.GM       Hu.36.GM 
##              1              1              1              1              1 
##       Hu.36.SM       Hu.37.GM       Hu.37.SM       Hu.38.GM       Hu.38.SM 
##              1              1              1              1              1 
##       Hu.39.GM       Hu.39.SM       Hu.41.GM       Hu.41.SM       Hu.43.GM 
##              1              1              1              1              1 
##       Hu.43.SM       Hu.44.GM       Hu.44.SM       Hu.45.GB       Hu.45.SB 
##              1              1              1              1              1 
##       Hu.46.GB       Hu.46.SB       Hu.47.GB       Hu.47.SB       Hu.48.GB 
##              1              1              1              1              1 
##       Hu.48.SB       Hu.49.GB       Hu.49.SB       Hu.50.GB       Hu.50.SB 
##              1              1              1              1              1 
##       Hu.51.GB       Hu.51.SB       Hu.52.GB       Hu.52.SB       Hu.53.GB 
##              1              1              1              1              1 
##       Hu.53.SB       Hu.54.GB       Hu.54.SB       Hu.55.GB       Hu.55.SB 
##              1              1              1              1              1 
##       Hu.56.GB       Hu.56.SB       Hu.57.GB       Hu.57.SB       Hu.58.GB 
##              1              1              1              1              1 
##       Hu.58.SB       Hu.59.GB       Hu.59.SB       Hu.60.GB       Hu.60.SB 
##              1              1              1              1              1 
##       Hu.61.GB       Hu.61.SB       Hu.63.GB       Hu.63.SB       Hu.64.GB 
##              1              1              1              1              1 
##       Hu.64.SB       Hu.66.GB       Hu.66.SB       Hu.67.GB       Hu.67.SB 
##              1              1              1              1              1 
##       Hu.68.GB       Hu.68.SB       Hu.69.GB       Hu.69.SB       Hu.70.GB 
##              1              1              1              1              1 
##       Hu.70.SB       Hu.71.GB       Hu.71.SB  Hu10.1.biof.6       Hu10.1gb 
##              1              1              1              1              1 
##       Hu10.1sb  Hu10.2.biof.6  Hu10.3.biof.6  Hu11.1.biof.7       Hu11.1gb 
##              1              1              1              1              1 
##       Hu11.1sb  Hu11.2.biof.7  Hu11.3.biof.7  Hu12.1.biof.8       Hu12.1gb 
##              1              1              1              1              1 
##       Hu12.1sb  Hu12.2.biof.8  Hu12.3.biof.8  Hu13.1.biof.9       Hu13.1gb 
##              1              1              1              1              1 
##       Hu13.1sb  Hu13.2.biof.9  Hu13.3.biof.9       Hu14.1gb       Hu14.1sb 
##              1              1              1              1              1 
## Hu15.1.biof.11 Hu15.2.biof.11 Hu15.3.biof.11       Hu15.3gb       Hu15.3sb 
##              1              1              1              1              1 
## Hu16.1.biof.12 Hu16.2.biof.12 Hu16.3.biof.12       Hu16.3gb       Hu16.3sb 
##              1              1              1              1              1 
##       Hu17.3gb       Hu17.3sb        Hu3.1gb        Hu3.1sb        Hu4.1gb 
##              1              1              1              1              1 
##        Hu4.1sb   Hu5.1.biof.3        Hu5.1gb        Hu5.1sb   Hu5.2.biof.3 
##              1              1              1              1              1 
##   Hu5.3.biof.3        Hu6.1gb        Hu6.1sb   Hu7.1.biof.4        Hu7.1gb 
##              1              1              1              1              1 
##        Hu7.1sb   Hu7.2.biof.4   Hu7.3.biof.4   Hu8.1.biof.5        Hu8.1gb 
##              1              1              1              1              1 
##        Hu8.1sb   Hu8.2.biof.5   Hu8.3.biof.5        Hu9.1gb        Hu9.1sb 
##              1              1              1              1              1 
##      Lo.2.4.GB      Lo.2.5.GB      Lo.2.7.GB      Lo.2.8.GB      Lo.2.9.GB 
##              1              1              1              1              1 
##      Lo.5.1.GM      Lo.5.4.GB      Lo.5.5.GB      Lo.9.2.GB        Lo2.1gb 
##              1              1              1              1              1 
##        Lo2.2gb        Lo2.3gb        Lo4.1gb        Lo4.2gb        Lo8.1gb 
##              1              1              1              1              1 
##        Lo8.2gb        Lo8.3gb        Lo8.4gb        Lo8.5gb       NP.S.1.M 
##              1              1              1              1              1 
##       NP.S.2.M       NP.S.3.M       NP.S.4.B       NP.S.5.B       NP.S.6.B 
##              1              1              1              1              1 
##       NP.W.1.M       NP.W.2.M       NP.W.3.M       NP.W.4.B       NP.W.5.B 
##              1              1              1              1              1 
##       NP.W.6.B     Oy.13.7.GB     Oy.15.4.GM     Oy.15.5.GM     Oy.15.6.GB 
##              1              1              1              1              1 
##     Oy.15.8.GB          R1s1b          R1s2b          R1s3b          R1w1b 
##              1              1              1              1              1 
##          R1w2b          R1w3b          R2s1b          R2s2b          R2s3b 
##              1              1              1              1              1 
##          R2w1b          R2w2b          R2w3b          R3s1b          R3s2b 
##              1              1              1              1              1 
##          R3s3b          R3w1b          R3w2b          R3w3b          R4s1b 
##              1              1              1              1              1 
##          R4s2b          R4s3b          R4w1b          R4w2b          R4w3b 
##              1              1              1              1              1 
##         Rivs1b         Rivs2b         Rivs3b         Rivw1b         Rivw2b 
##              1              1              1              1              1 
##         Rivw3b     Sq.11.1.GM     Sq.11.2.GM     Sq.11.4.GB     Sq.11.5.GB 
##              1              1              1              1              1 
##          U1s1b          U1s2b          U1s3b          U1w1b          U1w2b 
##              1              1              1              1              1 
##          U1w3b          U2s1b          U2s2b          U2s3b          U2w1b 
##              1              1              1              1              1 
##          U2w2b          U2w3b          U3s1b          U3s2b          U3s3b 
##              1              1              1              1              1 
##          U3w1b          U3w2b          U3w3b          U4s1b          U4s2b 
##              1              1              1              1              1 
##          U4s3b          U4w1b          U4w2b          U4w3b 
##              1              1              1              1

Rarefy at minimum sample size (for Alpha diversity analysis)

ps_Nem_rarefy <- phyloseq::rarefy_even_depth(physeq_Nem_5, rngseed = TRUE, replace = FALSE, sample.size = min(rowSums(physeq_Nem_5@otu_table)))

#Check rarefied curves
p01 <- ggrare(ps_Nem_rarefy, step = 100, color = "Biosample_type", se = TRUE)
## rarefying sample BD.S.1.M
## rarefying sample BD.S.2.M
## rarefying sample BD.S.3.M
## rarefying sample BD.S.4.B
## rarefying sample BD.S.5.B
## rarefying sample BD.S.6.B
## rarefying sample BD.W.1.M
## rarefying sample BD.W.2.M
## rarefying sample BD.W.3.M
## rarefying sample BD.W.4.B
## rarefying sample BD.W.5.B
## rarefying sample BD.W.6.B
## rarefying sample Bds1b
## rarefying sample Bds2b
## rarefying sample Bds3b
## rarefying sample Bdw1b
## rarefying sample Bdw2b
## rarefying sample Bdw3b
## rarefying sample BioFilm.21.1
## rarefying sample BioFilm.21.2
## rarefying sample BioFilm.21.3
## rarefying sample CR.S.1.M
## rarefying sample CR.S.2.M
## rarefying sample CR.S.3.M
## rarefying sample CR.S.4.B
## rarefying sample CR.S.5.B
## rarefying sample CR.S.6.B
## rarefying sample CR.W.1.M
## rarefying sample CR.W.2.M
## rarefying sample CR.W.3.M
## rarefying sample CR.W.4.B
## rarefying sample CR.W.5.B
## rarefying sample CR.W.6.B
## rarefying sample F1s1b
## rarefying sample F1s2b
## rarefying sample F1s3b
## rarefying sample F1w1b
## rarefying sample F1w2b
## rarefying sample F1w3b
## rarefying sample F2s1b
## rarefying sample F2s2b
## rarefying sample F2s3b
## rarefying sample F2w1b
## rarefying sample F2w2b
## rarefying sample F2w3b
## rarefying sample F3s1b
## rarefying sample F3s2b
## rarefying sample F3s3b
## rarefying sample F3w1b
## rarefying sample F3w2b
## rarefying sample F3w3b
## rarefying sample F4s1b
## rarefying sample F4s2b
## rarefying sample F4s3b
## rarefying sample F4w1b
## rarefying sample F4w2b
## rarefying sample F4w3b
## rarefying sample Fi.10.3.GM
## rarefying sample Fi.10.4.GB
## rarefying sample Fi.10.5.GB
## rarefying sample Fi.10.6.GB
## rarefying sample Fi.12.10.GB
## rarefying sample Fi.12.11.GB
## rarefying sample Fi.12.13.GB
## rarefying sample Fi.12.4.GB
## rarefying sample Fi.12.5.GB
## rarefying sample Fi.12.6.GB
## rarefying sample Fi.12.7.GB
## rarefying sample Fi.12.8.GB
## rarefying sample Fi.12.9.GB
## rarefying sample Fi.13.1.GM
## rarefying sample Fi.13.2.GB
## rarefying sample Fi.13.3.GB
## rarefying sample Fi.14.2.GM
## rarefying sample Fi.14.5.GB
## rarefying sample Fi.14.6.GB
## rarefying sample Fi.18.1.GB
## rarefying sample Fi.18.2.GB
## rarefying sample Fi.18.3.GB
## rarefying sample Fi.19.1.GB
## rarefying sample Fi.20.1.GB
## rarefying sample Fi.20.2.GB
## rarefying sample Fi.3.6.GB
## rarefying sample Fi.3.7.GB
## rarefying sample Fi.4.4.GB
## rarefying sample Fi.5.3.GB
## rarefying sample Fi.5.4.GB
## rarefying sample Fi.5.5.GB
## rarefying sample Fi.6.4.GB
## rarefying sample Fi.6.5.GB
## rarefying sample Fi.6.6.GM
## rarefying sample Fi.6.7.GB
## rarefying sample Fi.6.8.GB
## rarefying sample Fi.6.9.GB
## rarefying sample Fi10.1gb
## rarefying sample Fi10.2gb
## rarefying sample Fi11.1gb
## rarefying sample Fi11.2gb
## rarefying sample Fi12.1gb
## rarefying sample Fi12.2gb
## rarefying sample Fi12.3gb
## rarefying sample Fi3.1gb
## rarefying sample Fi3.2gb
## rarefying sample Fi3.3gb
## rarefying sample Fi3.4gb
## rarefying sample Fi3.5gb
## rarefying sample Fi5.1gb
## rarefying sample Fi5.2gb
## rarefying sample Fi6.1gb
## rarefying sample Fi6.2gb
## rarefying sample Fi6.3gb
## rarefying sample Fi7.1gb
## rarefying sample Fi7.2gb
## rarefying sample Fi7.3gb
## rarefying sample Fi7.4gb
## rarefying sample Fi7.5gb
## rarefying sample Fo.17.5.BM
## rarefying sample Fo.4.1.BM
## rarefying sample Fo.4.2.BM
## rarefying sample Fo.4.3.BM
## rarefying sample Fo.4.4.BM
## rarefying sample Fo.4.5.BM
## rarefying sample HM.S.1.M
## rarefying sample HM.S.2.M
## rarefying sample HM.S.3.M
## rarefying sample HM.S.4.B
## rarefying sample HM.S.5.B
## rarefying sample HM.S.6.B
## rarefying sample HM.W.1.M
## rarefying sample HM.W.2.M
## rarefying sample HM.W.3.M
## rarefying sample HM.W.4.B
## rarefying sample HM.W.5.B
## rarefying sample HM.W.6.B
## rarefying sample Hu.18.GM
## rarefying sample Hu.18.SM
## rarefying sample Hu.19.GM
## rarefying sample Hu.19.SM
## rarefying sample Hu.20.GM
## rarefying sample Hu.20.SM
## rarefying sample Hu.21.GM
## rarefying sample Hu.21.SM
## rarefying sample Hu.22.GM
## rarefying sample Hu.22.SM
## rarefying sample Hu.23.GM
## rarefying sample Hu.23.SM
## rarefying sample Hu.25.GM
## rarefying sample Hu.25.SM
## rarefying sample Hu.26.GM
## rarefying sample Hu.26.SM
## rarefying sample Hu.27.GM
## rarefying sample Hu.27.SM
## rarefying sample Hu.28.GM
## rarefying sample Hu.28.SM
## rarefying sample Hu.29.GM
## rarefying sample Hu.29.SM
## rarefying sample Hu.30.GM
## rarefying sample Hu.30.SM
## rarefying sample Hu.31.GM
## rarefying sample Hu.31.SM
## rarefying sample Hu.32.GM
## rarefying sample Hu.33.GM
## rarefying sample Hu.34.GM
## rarefying sample Hu.35.GM
## rarefying sample Hu.36.GM
## rarefying sample Hu.36.SM
## rarefying sample Hu.37.GM
## rarefying sample Hu.37.SM
## rarefying sample Hu.38.GM
## rarefying sample Hu.38.SM
## rarefying sample Hu.39.GM
## rarefying sample Hu.39.SM
## rarefying sample Hu.41.GM
## rarefying sample Hu.41.SM
## rarefying sample Hu.43.GM
## rarefying sample Hu.43.SM
## rarefying sample Hu.44.GM
## rarefying sample Hu.44.SM
## rarefying sample Hu.45.GB
## rarefying sample Hu.45.SB
## rarefying sample Hu.46.GB
## rarefying sample Hu.46.SB
## rarefying sample Hu.47.GB
## rarefying sample Hu.47.SB
## rarefying sample Hu.48.GB
## rarefying sample Hu.48.SB
## rarefying sample Hu.49.GB
## rarefying sample Hu.49.SB
## rarefying sample Hu.50.GB
## rarefying sample Hu.50.SB
## rarefying sample Hu.51.GB
## rarefying sample Hu.51.SB
## rarefying sample Hu.52.GB
## rarefying sample Hu.52.SB
## rarefying sample Hu.53.GB
## rarefying sample Hu.53.SB
## rarefying sample Hu.54.GB
## rarefying sample Hu.54.SB
## rarefying sample Hu.55.GB
## rarefying sample Hu.55.SB
## rarefying sample Hu.56.GB
## rarefying sample Hu.56.SB
## rarefying sample Hu.57.GB
## rarefying sample Hu.57.SB
## rarefying sample Hu.58.GB
## rarefying sample Hu.58.SB
## rarefying sample Hu.59.GB
## rarefying sample Hu.59.SB
## rarefying sample Hu.60.GB
## rarefying sample Hu.60.SB
## rarefying sample Hu.61.GB
## rarefying sample Hu.61.SB
## rarefying sample Hu.63.GB
## rarefying sample Hu.63.SB
## rarefying sample Hu.64.GB
## rarefying sample Hu.64.SB
## rarefying sample Hu.66.GB
## rarefying sample Hu.66.SB
## rarefying sample Hu.67.GB
## rarefying sample Hu.67.SB
## rarefying sample Hu.68.GB
## rarefying sample Hu.68.SB
## rarefying sample Hu.69.GB
## rarefying sample Hu.69.SB
## rarefying sample Hu.70.GB
## rarefying sample Hu.70.SB
## rarefying sample Hu.71.GB
## rarefying sample Hu.71.SB
## rarefying sample Hu10.1.biof.6
## rarefying sample Hu10.1gb
## rarefying sample Hu10.1sb
## rarefying sample Hu10.2.biof.6
## rarefying sample Hu10.3.biof.6
## rarefying sample Hu11.1.biof.7
## rarefying sample Hu11.1gb
## rarefying sample Hu11.1sb
## rarefying sample Hu11.2.biof.7
## rarefying sample Hu11.3.biof.7
## rarefying sample Hu12.1.biof.8
## rarefying sample Hu12.1gb
## rarefying sample Hu12.1sb
## rarefying sample Hu12.2.biof.8
## rarefying sample Hu12.3.biof.8
## rarefying sample Hu13.1.biof.9
## rarefying sample Hu13.1gb
## rarefying sample Hu13.1sb
## rarefying sample Hu13.2.biof.9
## rarefying sample Hu13.3.biof.9
## rarefying sample Hu14.1gb
## rarefying sample Hu14.1sb
## rarefying sample Hu15.1.biof.11
## rarefying sample Hu15.2.biof.11
## rarefying sample Hu15.3.biof.11
## rarefying sample Hu15.3gb
## rarefying sample Hu15.3sb
## rarefying sample Hu16.1.biof.12
## rarefying sample Hu16.2.biof.12
## rarefying sample Hu16.3.biof.12
## rarefying sample Hu16.3gb
## rarefying sample Hu16.3sb
## rarefying sample Hu17.3gb
## rarefying sample Hu17.3sb
## rarefying sample Hu3.1gb
## rarefying sample Hu3.1sb
## rarefying sample Hu4.1gb
## rarefying sample Hu4.1sb
## rarefying sample Hu5.1.biof.3
## rarefying sample Hu5.1gb
## rarefying sample Hu5.1sb
## rarefying sample Hu5.2.biof.3
## rarefying sample Hu5.3.biof.3
## rarefying sample Hu6.1gb
## rarefying sample Hu6.1sb
## rarefying sample Hu7.1.biof.4
## rarefying sample Hu7.1gb
## rarefying sample Hu7.1sb
## rarefying sample Hu7.2.biof.4
## rarefying sample Hu7.3.biof.4
## rarefying sample Hu8.1.biof.5
## rarefying sample Hu8.1gb
## rarefying sample Hu8.1sb
## rarefying sample Hu8.2.biof.5
## rarefying sample Hu8.3.biof.5
## rarefying sample Hu9.1gb
## rarefying sample Hu9.1sb
## rarefying sample Lo.2.4.GB
## rarefying sample Lo.2.5.GB
## rarefying sample Lo.2.7.GB
## rarefying sample Lo.2.8.GB
## rarefying sample Lo.2.9.GB
## rarefying sample Lo.5.1.GM
## rarefying sample Lo.5.4.GB
## rarefying sample Lo.5.5.GB
## rarefying sample Lo.9.2.GB
## rarefying sample Lo2.1gb
## rarefying sample Lo2.2gb
## rarefying sample Lo2.3gb
## rarefying sample Lo4.1gb
## rarefying sample Lo4.2gb
## rarefying sample Lo8.1gb
## rarefying sample Lo8.2gb
## rarefying sample Lo8.3gb
## rarefying sample Lo8.4gb
## rarefying sample Lo8.5gb
## rarefying sample NP.S.1.M
## rarefying sample NP.S.2.M
## rarefying sample NP.S.3.M
## rarefying sample NP.S.4.B
## rarefying sample NP.S.5.B
## rarefying sample NP.S.6.B
## rarefying sample NP.W.1.M
## rarefying sample NP.W.2.M
## rarefying sample NP.W.3.M
## rarefying sample NP.W.4.B
## rarefying sample NP.W.5.B
## rarefying sample NP.W.6.B
## rarefying sample Oy.13.7.GB
## rarefying sample Oy.15.4.GM
## rarefying sample Oy.15.5.GM
## rarefying sample Oy.15.6.GB
## rarefying sample Oy.15.8.GB
## rarefying sample R1s1b
## rarefying sample R1s2b
## rarefying sample R1s3b
## rarefying sample R1w1b
## rarefying sample R1w2b
## rarefying sample R1w3b
## rarefying sample R2s1b
## rarefying sample R2s2b
## rarefying sample R2s3b
## rarefying sample R2w1b
## rarefying sample R2w2b
## rarefying sample R2w3b
## rarefying sample R3s1b
## rarefying sample R3s2b
## rarefying sample R3s3b
## rarefying sample R3w1b
## rarefying sample R3w2b
## rarefying sample R3w3b
## rarefying sample R4s1b
## rarefying sample R4s2b
## rarefying sample R4s3b
## rarefying sample R4w1b
## rarefying sample R4w2b
## rarefying sample R4w3b
## rarefying sample Rivs1b
## rarefying sample Rivs2b
## rarefying sample Rivs3b
## rarefying sample Rivw1b
## rarefying sample Rivw2b
## rarefying sample Rivw3b
## rarefying sample Sq.11.1.GM
## rarefying sample Sq.11.2.GM
## rarefying sample Sq.11.4.GB
## rarefying sample Sq.11.5.GB
## rarefying sample U1s1b
## rarefying sample U1s2b
## rarefying sample U1s3b
## rarefying sample U1w1b
## rarefying sample U1w2b
## rarefying sample U1w3b
## rarefying sample U2s1b
## rarefying sample U2s2b
## rarefying sample U2s3b
## rarefying sample U2w1b
## rarefying sample U2w2b
## rarefying sample U2w3b
## rarefying sample U3s1b
## rarefying sample U3s2b
## rarefying sample U3s3b
## rarefying sample U3w1b
## rarefying sample U3w2b
## rarefying sample U3w3b
## rarefying sample U4s1b
## rarefying sample U4s2b
## rarefying sample U4s3b
## rarefying sample U4w1b
## rarefying sample U4w2b
## rarefying sample U4w3b

#check Zhang & Huang's coverage estimator
library(entropart)

Coverage_estimator_rarefied <- Coverage(ps_Nem_rarefy@otu_table, Estimator = "Best", Level = NULL, CheckArguments = TRUE)

Coverage_estimator_rarefied
## ZhangHuang 
##  0.9792839
#Remove unnecessary samples 
#ps_Nem_rarefy <- subset_samples(ps_Nem_rarefy, !(sample_names(ps_Nem_rarefy) %in% c("R1w2b", "JCA.U2S2B", "U1w2b")))


#Check sample size
dim(ps_Nem_rarefy@otu_table)
## [1]   379 70796

PHPB analysis

Introducing the human pathogens list: PathoHuman.txt (of Nem data)

PathoHuman_Nem_TAX1 <- read.table ("/Users/qcvp/R/outputs/dada2_total/asv_table/PathoHuman.txt", sep="\t", row.names=1)

head(PathoHuman_Nem_TAX1, 1)
##                   V2  V3  V4 V5
## ASV1 MG592701.1.1238 100 370  0
##                                                                                                                                                                                                                                                                                                                                                                                      V6
## ASV1 TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGACGCTCAACGTCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCACAAAGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGAGCGCAACTGACGCTGAAGCTCGAAAGTGCGGGTATCGAACAGGATTAGATACCCTGGTAGTCCGCACGGTAAACGATGGATGCCCGCTGTTGGTCTGAATAGGTCAGCGGCCAAGCGAAAGCATTAAGCATCCCACCTGGGGAGTACGCCGGCAACGGTG
##      V7  V8  V9 V10
## ASV1  1 370 442 811
##                                                                                                               V11
## ASV1 MG592701.1.1238 Bacteria;Bacteroidota;Bacteroidia;Bacteroidales;Prevotellaceae;Prevotella_9;Prevotella copri
##      V12
## ASV1 100

Add names to each column of the table

colnames(PathoHuman_Nem_TAX1) <- c("sseqid", "pident", "length","mismatch", "qseq", 
                                       "qstart", "qend", "sstart","send","stitle", "qcovs")

head(PathoHuman_Nem_TAX1, 1)
##               sseqid pident length mismatch
## ASV1 MG592701.1.1238    100    370        0
##                                                                                                                                                                                                                                                                                                                                                                                    qseq
## ASV1 TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGACGCTCAACGTCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCACAAAGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGAGCGCAACTGACGCTGAAGCTCGAAAGTGCGGGTATCGAACAGGATTAGATACCCTGGTAGTCCGCACGGTAAACGATGGATGCCCGCTGTTGGTCTGAATAGGTCAGCGGCCAAGCGAAAGCATTAAGCATCCCACCTGGGGAGTACGCCGGCAACGGTG
##      qstart qend sstart send
## ASV1      1  370    442  811
##                                                                                                            stitle
## ASV1 MG592701.1.1238 Bacteria;Bacteroidota;Bacteroidia;Bacteroidales;Prevotellaceae;Prevotella_9;Prevotella copri
##      qcovs
## ASV1   100

Separating multiple information column to our desire columns

PathoHuman_Nem_TAX2 <- separate(data = PathoHuman_Nem_TAX1, col = stitle, 
                                    into = c("ACC_Kingdom", "Phylum", 
                                             "Class", "Order", "Family", "Genus_Species"), sep = ";", extra = "merge")

head(PathoHuman_Nem_TAX2, 1)
##               sseqid pident length mismatch
## ASV1 MG592701.1.1238    100    370        0
##                                                                                                                                                                                                                                                                                                                                                                                    qseq
## ASV1 TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGACGCTCAACGTCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCACAAAGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGAGCGCAACTGACGCTGAAGCTCGAAAGTGCGGGTATCGAACAGGATTAGATACCCTGGTAGTCCGCACGGTAAACGATGGATGCCCGCTGTTGGTCTGAATAGGTCAGCGGCCAAGCGAAAGCATTAAGCATCCCACCTGGGGAGTACGCCGGCAACGGTG
##      qstart qend sstart send              ACC_Kingdom       Phylum       Class
## ASV1      1  370    442  811 MG592701.1.1238 Bacteria Bacteroidota Bacteroidia
##              Order         Family                 Genus_Species qcovs
## ASV1 Bacteroidales Prevotellaceae Prevotella_9;Prevotella copri   100
PathoHuman_Nem_TAX3 <- separate(data = PathoHuman_Nem_TAX2, col = Genus_Species, into = c("Genus", "Species"), sep = ";", extra = "merge")

head(PathoHuman_Nem_TAX3, 1)
##               sseqid pident length mismatch
## ASV1 MG592701.1.1238    100    370        0
##                                                                                                                                                                                                                                                                                                                                                                                    qseq
## ASV1 TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGACGCTCAACGTCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCACAAAGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGAGCGCAACTGACGCTGAAGCTCGAAAGTGCGGGTATCGAACAGGATTAGATACCCTGGTAGTCCGCACGGTAAACGATGGATGCCCGCTGTTGGTCTGAATAGGTCAGCGGCCAAGCGAAAGCATTAAGCATCCCACCTGGGGAGTACGCCGGCAACGGTG
##      qstart qend sstart send              ACC_Kingdom       Phylum       Class
## ASV1      1  370    442  811 MG592701.1.1238 Bacteria Bacteroidota Bacteroidia
##              Order         Family        Genus          Species qcovs
## ASV1 Bacteroidales Prevotellaceae Prevotella_9 Prevotella copri   100
PathoHuman_Nem_TAX4 <- separate(data = PathoHuman_Nem_TAX3, col = ACC_Kingdom, into = c("ACC", "Kingdom"), sep = "\\s")

head(PathoHuman_Nem_TAX4, 1)
##               sseqid pident length mismatch
## ASV1 MG592701.1.1238    100    370        0
##                                                                                                                                                                                                                                                                                                                                                                                    qseq
## ASV1 TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGACGCTCAACGTCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCACAAAGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGAGCGCAACTGACGCTGAAGCTCGAAAGTGCGGGTATCGAACAGGATTAGATACCCTGGTAGTCCGCACGGTAAACGATGGATGCCCGCTGTTGGTCTGAATAGGTCAGCGGCCAAGCGAAAGCATTAAGCATCCCACCTGGGGAGTACGCCGGCAACGGTG
##      qstart qend sstart send             ACC  Kingdom       Phylum       Class
## ASV1      1  370    442  811 MG592701.1.1238 Bacteria Bacteroidota Bacteroidia
##              Order         Family        Genus          Species qcovs
## ASV1 Bacteroidales Prevotellaceae Prevotella_9 Prevotella copri   100
PathoHuman_Nem_TAX5 <- separate(data = PathoHuman_Nem_TAX4, col = Species, into = c("Genus", "Species"), sep = " ", extra = "merge" )

head(PathoHuman_Nem_TAX5, 1)
##               sseqid pident length mismatch
## ASV1 MG592701.1.1238    100    370        0
##                                                                                                                                                                                                                                                                                                                                                                                    qseq
## ASV1 TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGACGCTCAACGTCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCACAAAGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGAGCGCAACTGACGCTGAAGCTCGAAAGTGCGGGTATCGAACAGGATTAGATACCCTGGTAGTCCGCACGGTAAACGATGGATGCCCGCTGTTGGTCTGAATAGGTCAGCGGCCAAGCGAAAGCATTAAGCATCCCACCTGGGGAGTACGCCGGCAACGGTG
##      qstart qend sstart send             ACC  Kingdom       Phylum       Class
## ASV1      1  370    442  811 MG592701.1.1238 Bacteria Bacteroidota Bacteroidia
##              Order         Family      Genus Species qcovs
## ASV1 Bacteroidales Prevotellaceae Prevotella   copri   100
PathoHuman_Nem_TAX6 <- separate(data = PathoHuman_Nem_TAX5, col = Species, into = c("Species", "Suplement_info"), sep =" ", extra = "merge")

head(PathoHuman_Nem_TAX6, 1)
##               sseqid pident length mismatch
## ASV1 MG592701.1.1238    100    370        0
##                                                                                                                                                                                                                                                                                                                                                                                    qseq
## ASV1 TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGACGCTCAACGTCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCACAAAGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGAGCGCAACTGACGCTGAAGCTCGAAAGTGCGGGTATCGAACAGGATTAGATACCCTGGTAGTCCGCACGGTAAACGATGGATGCCCGCTGTTGGTCTGAATAGGTCAGCGGCCAAGCGAAAGCATTAAGCATCCCACCTGGGGAGTACGCCGGCAACGGTG
##      qstart qend sstart send             ACC  Kingdom       Phylum       Class
## ASV1      1  370    442  811 MG592701.1.1238 Bacteria Bacteroidota Bacteroidia
##              Order         Family      Genus Species Suplement_info qcovs
## ASV1 Bacteroidales Prevotellaceae Prevotella   copri           <NA>   100

Filter the bacterial pathogens with coverage score = 100, length >= 239bp

PathoHuman_Nem_TAX_filtered <- PathoHuman_Nem_TAX6[PathoHuman_Nem_TAX6$qcovs == 100 
                                                           & PathoHuman_Nem_TAX6$length > 239 
                                                           & PathoHuman_Nem_TAX6$Kingdom == "Bacteria",]

head(PathoHuman_Nem_TAX_filtered, 1)
##               sseqid pident length mismatch
## ASV1 MG592701.1.1238    100    370        0
##                                                                                                                                                                                                                                                                                                                                                                                    qseq
## ASV1 TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGACGCTCAACGTCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCACAAAGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGAGCGCAACTGACGCTGAAGCTCGAAAGTGCGGGTATCGAACAGGATTAGATACCCTGGTAGTCCGCACGGTAAACGATGGATGCCCGCTGTTGGTCTGAATAGGTCAGCGGCCAAGCGAAAGCATTAAGCATCCCACCTGGGGAGTACGCCGGCAACGGTG
##      qstart qend sstart send             ACC  Kingdom       Phylum       Class
## ASV1      1  370    442  811 MG592701.1.1238 Bacteria Bacteroidota Bacteroidia
##              Order         Family      Genus Species Suplement_info qcovs
## ASV1 Bacteroidales Prevotellaceae Prevotella   copri           <NA>   100

Creating a table with ASV IDs and taxonomy of human pathogens

Patho_Hu_vec <- row.names(PathoHuman_Nem_TAX_filtered)

#Only select taxo cols
Patho_TAX_Hu <- as.matrix(PathoHuman_Nem_TAX_filtered[,c(11:17)])

Patho_TAX_Hu = tax_table(Patho_TAX_Hu)

#Adding ASV sorted by ACC col to new dataframe
row.names(Patho_TAX_Hu) <- Patho_Hu_vec 

head(Patho_TAX_Hu, 5)
## Taxonomy Table:     [5 taxa by 7 taxonomic ranks]:
##       Kingdom    Phylum           Class                 Order             
## ASV1  "Bacteria" "Bacteroidota"   "Bacteroidia"         "Bacteroidales"   
## ASV7  "Bacteria" "Proteobacteria" "Gammaproteobacteria" "Enterobacterales"
## ASV8  "Bacteria" "Firmicutes"     "Bacilli"             "Lactobacillales" 
## ASV10 "Bacteria" "Firmicutes"     "Bacilli"             "Staphylococcales"
## ASV13 "Bacteria" "Firmicutes"     "Clostridia"          "Oscillospirales" 
##       Family              Genus              Species      
## ASV1  "Prevotellaceae"    "Prevotella"       "copri"      
## ASV7  "Vibrionaceae"      "Photobacterium"   "damselae"   
## ASV8  "Listeriaceae"      "Listeria"         "welshimeri" 
## ASV10 "Staphylococcaceae" "Staphylococcus"   "warneri"    
## ASV13 "Ruminococcaceae"   "Faecalibacterium" "prausnitzii"

Extract sample for longitudinal paper

Extracting samples

# Extract samples names
longitudinal_sample <- read.table("/Users/qcvp/R/outputs/dada2_total/asv_table/metadata_longitudinal.txt", header = TRUE, row.names = 1, sep="\t")
rownames(longitudinal_sample) <- gsub("-",".",rownames(longitudinal_sample))
longitudinal_sample_names <- rownames(longitudinal_sample)
# Absolute abundance
physeq_Nem_long <- subset_samples(physeq_Nem_5, sample_names(physeq_Nem_5) %in% longitudinal_sample_names)
physeq_Nem_long
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 109183 taxa and 48 samples ]
## sample_data() Sample Data:       [ 48 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 109183 taxa by 6 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 109183 reference sequences ]
# Relative abundance 
physeq_Nem_long_rel <- subset_samples(physeq_Nem_rel, sample_names(physeq_Nem_rel) %in% longitudinal_sample_names)
physeq_Nem_long_rel
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 109183 taxa and 48 samples ]
## sample_data() Sample Data:       [ 48 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 109183 taxa by 6 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 109183 reference sequences ]

Selecting the Pathogens in the datasets

Unrarefied samples for beta div

Raw dataset

Nem_raw_ASV <- data.frame(physeq_Nem_long@otu_table@.Data)
Nem_raw_hu_patho_ASV <- Nem_raw_ASV[,colnames(Nem_raw_ASV) %in% Patho_Hu_vec]

# Create new physeq object
Nem_hu_unrarefied <- phyloseq(otu_table(Nem_raw_hu_patho_ASV, taxa_are_rows = FALSE),
                       sample_data(context),
                       tax_table(as.matrix(Patho_TAX_Hu)))
Nem_hu_unrarefied
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 600 taxa and 48 samples ]
## sample_data() Sample Data:       [ 48 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 600 taxa by 7 taxonomic ranks ]

Relative dataset

Nem_raw_unrarefied_ASV_rel <- data.frame(physeq_Nem_long_rel@otu_table@.Data)
Nem_raw_unrarefied_hu_patho_ASV_rel <- Nem_raw_unrarefied_ASV_rel

#Create new physeq object
Nem_hu_unrarefied_rel <- phyloseq(otu_table(Nem_raw_unrarefied_hu_patho_ASV_rel, taxa_are_rows = FALSE),
                       sample_data(context),
                       tax_table(as.matrix(Patho_TAX_Hu)))
Nem_hu_unrarefied_rel
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 600 taxa and 48 samples ]
## sample_data() Sample Data:       [ 48 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 600 taxa by 7 taxonomic ranks ]

Rarefied samples for alphadiv

Bacterial composition analysis

Prepare data for bacterial composition analysis

# Melting absolute abundance
Melted_Nem_hu_unrarefied_species <- Nem_hu_unrarefied %>%
  tax_glom(taxrank = "Species") %>%                     # agglomerate at rank level
  psmelt() %>%                                         # Melt to long format
  filter(Abundance > 0) %>%                            # Filter out absent taxa reads
  arrange(Species)                                      # Sort data frame alphabetically by family

# Melting Relative Abundance
Melted_Nem_hu_unrarefied_species_rel <- Nem_hu_unrarefied_rel %>%
  tax_glom(taxrank = "Species") %>%                     # agglomerate at rank level
  psmelt() %>%                                         # Melt to long format    
  filter(Abundance > 0) %>%                            # Filter out absent taxa reads
  arrange(Species)       

# Adding pathogen column for pie chart
#physeq_whole_Nem_species$Pathogen_type <- ifelse(physeq_whole_Nem_species$OTU %in% Patho_Hu_vec, physeq_whole_Nem_species$Biosample, "Other")
#physeq_whole_Nem_species$Pathogen <- ifelse(physeq_whole_Nem_species$OTU %in% Patho_Hu_vec, physeq_whole_Nem_species$Class, "Other")

Extrat samples occur in each part of Venn diagram

# Sample occur in each Biosample
animal_sample <- subset(Melted_Nem_hu_unrarefied_species, Biosample == "Animals")
Envi_sample <- subset(Melted_Nem_hu_unrarefied_species, Biosample == "Environment")
Human_sample <- subset(Melted_Nem_hu_unrarefied_species, Biosample == "Human")

#Extract list of ASV for each components
list_ASV_all <- Reduce(intersect, list(animal_sample$OTU, Envi_sample$OTU, Human_sample$OTU)) #ASV occur in all biosample types
list_ASV_all
##  [1] "ASV1"   "ASV145" "ASV7"   "ASV383" "ASV182" "ASV121" "ASV263" "ASV107"
##  [9] "ASV279" "ASV261" "ASV260" "ASV115" "ASV112" "ASV40"  "ASV13"  "ASV48" 
## [17] "ASV74"  "ASV32"  "ASV391" "ASV108" "ASV18"  "ASV106" "ASV10"  "ASV110"
## [25] "ASV663"
list_ASV_EA <- setdiff(intersect(Envi_sample$OTU,animal_sample$OTU), list_ASV_all) #ASV occur in both Environment and animal but not in Human
list_ASV_EA 
## character(0)
list_ASV_EH <- setdiff(intersect(Envi_sample$OTU,Human_sample$OTU), list_ASV_all) #ASV occur in both Environment and human but not in animal
list_ASV_EH
##  [1] "ASV28"   "ASV172"  "ASV245"  "ASV309"  "ASV1192" "ASV2582" "ASV185" 
##  [8] "ASV811"  "ASV348"  "ASV38"   "ASV2330" "ASV720"  "ASV1706" "ASV2156"
## [15] "ASV817"  "ASV844"  "ASV9796" "ASV1271"
list_ASV_AH <- setdiff(intersect(animal_sample$OTU,Human_sample$OTU), list_ASV_all) #ASV occur in both human and animal but not in environment
list_ASV_AH
##  [1] "ASV1276"  "ASV72"    "ASV57955" "ASV431"   "ASV496"   "ASV191"  
##  [7] "ASV7760"  "ASV1190"  "ASV950"   "ASV223"   "ASV522"   "ASV1842" 
## [13] "ASV737"   "ASV70"    "ASV9460"
list_ASV_A <- Reduce(setdiff, list(animal_sample$OTU, list_ASV_EA, list_ASV_AH, list_ASV_all)) # ASV occur in only Animal
list_ASV_A
##  [1] "ASV73"    "ASV4092"  "ASV24639" "ASV2427"  "ASV6340"  "ASV738"  
##  [7] "ASV547"   "ASV943"   "ASV6423"  "ASV9725"  "ASV679"   "ASV1533" 
## [13] "ASV23547" "ASV613"   "ASV53236" "ASV640"
list_ASV_H <- Reduce(setdiff, list(Human_sample$OTU, list_ASV_EH, list_ASV_AH, list_ASV_all)) # ASV occur in only Human
list_ASV_H    
##  [1] "ASV3220"  "ASV316"   "ASV1088"  "ASV4028"  "ASV1334"  "ASV7426" 
##  [7] "ASV6858"  "ASV5245"  "ASV426"   "ASV7427"  "ASV6337"  "ASV2934" 
## [13] "ASV4500"  "ASV6368"  "ASV1740"  "ASV167"   "ASV783"   "ASV6880" 
## [19] "ASV1991"  "ASV5244"  "ASV1349"  "ASV2167"  "ASV1539"  "ASV398"  
## [25] "ASV16681" "ASV2815"  "ASV3654"  "ASV265"   "ASV2983"  "ASV2070" 
## [31] "ASV612"   "ASV10033" "ASV826"   "ASV1640"  "ASV9787"  "ASV2524" 
## [37] "ASV2790"  "ASV14047" "ASV4272"  "ASV3456"  "ASV7610"  "ASV2092" 
## [43] "ASV1237"  "ASV1391"  "ASV53743" "ASV2996"  "ASV23530" "ASV6711" 
## [49] "ASV718"   "ASV5981"  "ASV2893"  "ASV32069" "ASV6765"  "ASV6175" 
## [55] "ASV1582"  "ASV1050"  "ASV22648" "ASV411"   "ASV1177"  "ASV15653"
## [61] "ASV13593" "ASV3237"  "ASV2062"  "ASV8852"  "ASV342"
list_ASV_E <- Reduce(setdiff, list(Envi_sample$OTU, list_ASV_EH, list_ASV_EA, list_ASV_all)) # ASV occur in only Environment
list_ASV_E
##  [1] "ASV5193"  "ASV8778"  "ASV5532"  "ASV2891"  "ASV123"   "ASV1435" 
##  [7] "ASV16429" "ASV2649"  "ASV5502"  "ASV11514" "ASV34362" "ASV2204" 
## [13] "ASV14165"
# Checking
length(unique(Melted_Nem_hu_unrarefied_species$OTU))
## [1] 152
length(list_ASV_E) + length(list_ASV_H) + length(list_ASV_A) + length(list_ASV_all) + length(list_ASV_EA) + length(list_ASV_EH) + length(list_ASV_AH)
## [1] 152

Composition of human pathogenic ASV

Relative abundance in all biosample

# Extract samples reads and add metadata
longitudinan_sample_rel <- as.data.frame(rowSums(Nem_hu_unrarefied_rel@otu_table@.Data))
longitudinan_sample_rel <- cbind(longitudinan_sample_rel, physeq_Nem_long@sam_data)

custom_colors <- c( "#CBD588", "#5F7FC7","#DA5724", "#508578", "#CD9BCD",
                    "#AD6F3B", "#673770","#D14285", "#652926", "#C84248", 
                    "#8569D5", "#5E738F","#D1A33D", "#8A7C64", "#599861", "black", "red", "green")

a <- ggplot(longitudinan_sample_rel, aes(x = Biosample, y = rowSums(Nem_hu_unrarefied_rel@otu_table@.Data), color = Biosample)) + 
  geom_jitter(position = position_jitter(width = .20), alpha = 0.5, size = 3, aes(colour = Biosample), show.legend = TRUE) + theme_bw() +
  geom_boxplot(aes(colour = Biosample), alpha=0.1, outlier.colour = NA) + 
   theme(axis.title.x = element_blank(),
        legend.position = "bottom",
        legend.title = element_blank(),
        legend.text = element_text(size = 13),
        strip.text = element_text(size = 18),
        axis.title.y = element_text(size = 20),
        axis.text.y = element_text(size=20),
        axis.text.x = element_blank()) + 
  ylab("Relative Abundance") +   
  facet_wrap(~Date, nrow=1, scales = "free_x") + scale_fill_manual(values = custom_colors) 
a

Calculate sample reads

# prepare data for bacterial composition
all_sample <- subset(Melted_Nem_hu_unrarefied_species, OTU %in% list_ASV_all)
EA_sample <- subset(Melted_Nem_hu_unrarefied_species, OTU %in% list_ASV_EA)
EH_sample <- subset(Melted_Nem_hu_unrarefied_species, OTU %in% list_ASV_EH)
AH_sample <- subset(Melted_Nem_hu_unrarefied_species, OTU %in% list_ASV_AH)
A_sample <- subset(Melted_Nem_hu_unrarefied_species, OTU %in% list_ASV_A)
E_sample <- subset(Melted_Nem_hu_unrarefied_species, OTU %in% list_ASV_E)
H_sample <- subset(Melted_Nem_hu_unrarefied_species, OTU %in% list_ASV_H)
# calculate read
colSums(as.data.frame(all_sample[,3])) 
## all_sample[, 3] 
##          510804
colSums(as.data.frame(E_sample[,3])) 
## E_sample[, 3] 
##           165
colSums(as.data.frame(EA_sample[,3])) 
## EA_sample[, 3] 
##              0
colSums(as.data.frame(EH_sample[,3])) 
## EH_sample[, 3] 
##          31450
colSums(as.data.frame(AH_sample[,3])) 
## AH_sample[, 3] 
##          27393
colSums(as.data.frame(H_sample[,3])) 
## H_sample[, 3] 
##         12466
colSums(as.data.frame(A_sample[,3])) 
## A_sample[, 3] 
##          2474

Compositional distribution of pathogenic ASV (venn diagram)

# Number of ASV
x <- list(A = animal_sample$OTU, B = Envi_sample$OTU, C = Human_sample$OTU)
c1 <- ggVennDiagram(x, 
              category.names = c("Animal","Environment","Human"),
              label = "count",) +
              theme(legend.position = "none") +
              scale_fill_gradient(low = "#F4FAFE", high = "#4981BF")
c1

Bacterial composition of each biosample(animal, human, environment) compositional bar chart

# Composition for whole community
c1 <- ggplot(Melted_Nem_hu_unrarefied_species_rel, aes(x = Biosample, fill = Class)) + 
  geom_bar(position ="fill") + scale_colour_brewer(palette="Set1") +
  ylab("Whole bacterial composition") +
  scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) + #remove the space below the 0 of the y axis in the graph
  theme_bw() +
  theme(legend.text = element_text(face = "italic")) +
  theme(axis.title.x = element_blank(),
        axis.text.x = element_text(angle=30, colour = "black", vjust=1, hjust = 1, size=10),
        axis.ticks.x = element_blank(),
        panel.background = element_blank(), 
        panel.grid.major = element_blank(),  #remove major-grid labels
        panel.grid.minor = element_blank())  #remove minor-grid labels
c1

#Composition for ASV in Enviroment and Animal

#Composition for ASV in Enviroment and Human
c2 <- ggplot(EH_sample, aes(x = "", fill = Genus)) + 
  geom_bar(position ="fill") + scale_colour_brewer(palette="Set1") +
  ylab("Whole bacterial composition") +
  scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) + #remove the space below the 0 of the y axis in the graph
  theme_bw() +
  theme(legend.text = element_text(face = "italic")) +
  xlab("ASV in Environment and Human only")+
  theme(axis.text.x = element_text(angle=30, colour = "black", vjust=1, hjust = 1, size=10),
        axis.ticks.x = element_blank(),
        panel.background = element_blank(), 
        panel.grid.major = element_blank(),  #remove major-grid labels
        panel.grid.minor = element_blank())  #remove minor-grid labels
c2

#Composition for ASV in Human and Animal
c3 <- ggplot(AH_sample, aes(x = "", fill = Genus)) + 
  geom_bar(position ="fill") + scale_colour_brewer(palette="Set1") +
  ylab("Whole bacterial composition") +
  scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) + #remove the space below the 0 of the y axis in the graph
  theme_bw() +
  xlab("ASV in Human and Animal only")+
  theme(legend.text = element_text(face = "italic")) +
  theme(axis.text.x = element_text(angle=30, colour = "black", vjust=1, hjust = 1, size=10),
        axis.ticks.x = element_blank(),
        panel.background = element_blank(), 
        panel.grid.major = element_blank(),  #remove major-grid labels
        panel.grid.minor = element_blank())  #remove minor-grid labels
c3

#Composition for ASV in Enviroment only
c4 <- ggplot(E_sample, aes(x = "", fill = Genus)) + 
  geom_bar(position ="fill") + scale_colour_brewer(palette="Set1") +
  ylab("Whole bacterial composition") +
  scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) + #remove the space below the 0 of the y axis in the graph
  theme_bw() +
  theme(legend.text = element_text(face = "italic")) +
  xlab("ASV in Environment only")+
  theme(axis.text.x = element_text(angle=30, colour = "black", vjust=1, hjust = 1, size=10),
        axis.ticks.x = element_blank(),
        panel.background = element_blank(), 
        panel.grid.major = element_blank(),  #remove major-grid labels
        panel.grid.minor = element_blank())  #remove minor-grid labels
c4

#Composition for ASV in Animal only
c5 <- ggplot(A_sample, aes(x = "", fill = Genus)) + 
  geom_bar(position ="fill") + scale_colour_brewer(palette="Set1") +
  ylab("Whole bacterial composition") +
  scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) + #remove the space below the 0 of the y axis in the graph
  theme_bw() +
  theme(legend.text = element_text(face = "italic")) +
  xlab("ASV in Animal only")+
  theme(axis.text.x = element_text(angle=30, colour = "black", vjust=1, hjust = 1, size=10),
        axis.ticks.x = element_blank(),
        panel.background = element_blank(), 
        panel.grid.major = element_blank(),  #remove major-grid labels
        panel.grid.minor = element_blank())  #remove minor-grid labels
c5

#Composition for ASV in Human only
c6 <- ggplot(H_sample, aes(x = "", fill = Genus)) + 
  geom_bar(position ="fill") + scale_colour_brewer(palette="Set1") +
  ylab("Whole bacterial composition") +
  scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) + #remove the space below the 0 of the y axis in the graph
  theme_bw() +
  theme(legend.text = element_text(face = "italic")) +
  xlab("ASV in Human only")+
  theme(axis.text.x = element_text(angle=30, colour = "black", vjust=1, hjust = 1, size=10),
        axis.ticks.x = element_blank(),
        panel.background = element_blank(), 
        panel.grid.major = element_blank(),  #remove major-grid labels
        panel.grid.minor = element_blank())  #remove minor-grid labels
c6

#Composition for ASV in all 3 biosamples
c7 <- ggplot(all_sample, aes(x = "", fill = Genus)) + 
  geom_bar(position ="fill") + scale_colour_brewer(palette="Set1") +
  xlab("ASV appear in all biosamples")+
  ylab("Whole bacterial composition") +
  scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) + #remove the space below the 0 of the y axis in the graph
  theme_bw() +
  theme(legend.text = element_text(face = "italic")) +
  theme(axis.text.x = element_text(angle=30, colour = "black", vjust=1, hjust = 1, size=10),
        axis.ticks.x = element_blank(),
        panel.background = element_blank(), 
        panel.grid.major = element_blank(),  #remove major-grid labels
        panel.grid.minor = element_blank())  #remove minor-grid labels
c7