Custom scripts

Before analyzing, run the following codes: ## OTU to matrix

otu.matrix <- function(ps, force.taxa.cols = TRUE) {
  mat <- as(phyloseq::otu_table(ps), "matrix")
  if (taxa_are_rows(ps) & force.taxa.cols) {
    mat <- t(mat)
  }
  return(mat)
}

Scripts of `ggrare`

ggrare <- function(physeq, step = 10, label = NULL, color = NULL,
                   plot = TRUE, parallel = FALSE, se = TRUE) {
  
  require("ggplot2")
  
  x <- as(phyloseq::otu_table(physeq), "matrix")
  if (phyloseq::taxa_are_rows(physeq)) x <- t(x)
  ## This script is adapted from vegan `rarecurve` function
  tot <- rowSums(x)
  S <- rowSums(x > 0)
  nr <- nrow(x)
  
  rarefun <- function(i) {
    cat(paste("rarefying sample", rownames(x)[i]), sep = "\n")
    n <- seq(1, tot[i], by = step)
    if (n[length(n)] != tot[i]) {
      n <- c(n, tot[i])
    }
    y <- vegan::rarefy(x[i, ,drop = FALSE], n, se = se)
    if (nrow(y) != 1) {
      rownames(y) <- c(".S", ".se")
      return(data.frame(t(y), Size = n, Sample = rownames(x)[i]))
    } else {
      return(data.frame(.S = y[1, ], Size = n, Sample = rownames(x)[i]))
    }
  }
  
  if (parallel) {
    out <- parallel::mclapply(seq_len(nr), rarefun, mc.preschedule = FALSE)
  } else {
    out <- lapply(seq_len(nr), rarefun)
  }
  
  df <- do.call(rbind, out)
  
  ## Get sample data 
  if (!is.null(phyloseq::sample_data(physeq, FALSE))) {
    sdf <- as(phyloseq::sample_data(physeq), "data.frame")
    sdf$Sample <- rownames(sdf)
    data <- merge(df, sdf, by = "Sample")
    labels <- data.frame(x = tot, y = S, Sample = rownames(x))
    labels <- merge(labels, sdf, by = "Sample")
  }
  
  ## Add, any custom-supplied plot-mapped variables
  if (length(color) > 1) {
    data$color <- color
    names(data)[names(data) == "color"] <- deparse(substitute(color))
    color <- deparse(substitute(color))
  }
  
  if (length(label) > 1) {
    labels$label <- label
    names(labels)[names(labels) == "label"] <- deparse(substitute(label))
    label <- deparse(substitute(label))
  }
  
  p <- ggplot(data = data,
              aes_string(x = "Size", y = ".S",
                         group = "Sample", color = color)) +
    labs(x = "Sample Size", y = "Species Richness")
  
  if (!is.null(label)) {
    p <- p + geom_text(data = labels,
                       aes_string(x = "x", y = "y",
                                  label = label, color = color),
                       size = 4, hjust = 0)
  }
  
  p <- p + geom_line()
  
  if (se) { ## add standard error if available
    p <- p +
      geom_ribbon(aes_string(ymin = ".S - .se", ymax = ".S + .se",
                             color = NULL, fill = color),
                  alpha = 0.2)
  }
  
  if (plot) {
    plot(p)
  }
  
  invisible(p)
  
}

#' Check for normal distribution of alpha diversity measurements
#'
#'
#' @return Print ggplot rarefaction curves
#' @param rich A dataframe with alpha diversity indices as columns
#' and samples as rows as the output of pyloseq::estimate_richness()
#' @param file Output file name

indices_normality <- function(rich, nrow, ncol) {
  
  ### p-value < 0.05 means data failed normality test
  
  par(mfrow = c(nrow, ncol))
  
  for (i in names(rich)) {
    shap <- shapiro.test(rich[, i])
    qqnorm(rich[, i], main = i, sub = shap$p.value)
    qqline(rich[, i])
  }
  
  par(mfrow = c(1, 1))
}

Library loading

library("seqinr")
library(Biostrings)
library(dplyr)
library(phyloseq)
library(writexl)
library(tidyr)
library(ggplot2)
library(readr)
library(ggpubr)
library(reshape)
library(microbiomeMarker)
library(ANCOMBC)
library(tidyverse)
library(DT)
library(ggVennDiagram)
library(VennDiagram)

Load context

Importing ASV table of Nem

asv_table <- read.table("/Users/qcvp/R/outputs/dada2_total/asv_table/asv_table.tsv", sep="\t", row.names=1, header=T)

Invert the ASV table in case sample IDs are on row not column

asv_inverted<-as.data.frame(t(asv_table))

Load the ASV sequences (previously created with DADA2)

asv_seq <- readDNAStringSet(file = "/Users/qcvp/R/outputs/dada2_total/asv_table/asv.fasta", 
                            format = "fasta",
                            nrec = -1L, 
                            skip = 0L, 
                            seek.first.rec = FALSE, 
                            use.names = TRUE)
head(asv_seq, 1)

## DNAStringSet object of length 1:
##     width seq                                               names               
## [1]   370 TACGGAAGGTCCGGGCGTTATCC...TGGGGAGTACGCCGGCAACGGTG ASV1

Load asv tax of Nem

asv_tax <- read.table("/Users/qcvp/R/outputs/dada2_total/asv_table/taxonomy.tsv", header = TRUE, row.names = 1, sep="\t")
asv_tax <- as.matrix(asv_tax)

head(asv_tax, 1)

##      Kingdom    Phylum         Class         Order           Family          
## ASV1 "Bacteria" "Bacteroidota" "Bacteroidia" "Bacteroidales" "Prevotellaceae"
##      Genus         
## ASV1 "Prevotella_9"

Import additional the metadata information of Nem

context <- read.table("/Users/qcvp/R/outputs/dada2_total/asv_table/metadata.txt", header = TRUE, row.names = 1, sep="\t")
rownames(context) <- gsub("-",".",rownames(context))
head(context)

##          Sequencing     Date Transect Station   Biosample Biosample_type
## BD.S.1.M Metagenome Jul_2023 Bich_Dam      BD Environment       Sediment
## BD.S.2.M Metagenome Jul_2023 Bich_Dam      BD Environment       Sediment
## BD.S.3.M Metagenome Jul_2023 Bich_Dam      BD Environment       Sediment
## BD.S.4.B  Barcoding Dec_2023 Bich_Dam      BD Environment       Sediment
## BD.S.5.B  Barcoding Dec_2023 Bich_Dam      BD Environment       Sediment
## BD.S.6.B  Barcoding Dec_2023 Bich_Dam      BD Environment       Sediment
##          Biosample_cluster   Host_type Host_subtype Host_genus Host_species
## BD.S.1.M          Sediment Environment     Sediment       <NA>         <NA>
## BD.S.2.M          Sediment Environment     Sediment       <NA>         <NA>
## BD.S.3.M          Sediment Environment     Sediment       <NA>         <NA>
## BD.S.4.B          Sediment Environment     Sediment       <NA>         <NA>
## BD.S.5.B          Sediment Environment     Sediment       <NA>         <NA>
## BD.S.6.B          Sediment Environment     Sediment       <NA>         <NA>
##          Species_ID
## BD.S.1.M       <NA>
## BD.S.2.M       <NA>
## BD.S.3.M       <NA>
## BD.S.4.B       <NA>
## BD.S.5.B       <NA>
## BD.S.6.B       <NA>

#Checking samples IDs uniformity 
setdiff(x = row.names(asv_inverted), y = row.names(context))

## character(0)

Making raw phyloseq objects

physeq_Nem <- phyloseq(
  otu_table(asv_inverted, taxa_are_rows = F), #if you have taxa on rows = T
  sample_data(context),
  refseq(asv_seq),
  tax_table(asv_tax)
)
physeq_Nem

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 111454 taxa and 412 samples ]
## sample_data() Sample Data:       [ 412 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 111454 taxa by 6 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 111454 reference sequences ]

Decontamination process

Remove MOCK and Control samples from the phyloseq object

physeq_Nem_1 <- subset_samples(physeq_Nem, !Biosample =="Mock")
physeq_Nem_1

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 111454 taxa and 412 samples ]
## sample_data() Sample Data:       [ 412 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 111454 taxa by 6 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 111454 reference sequences ]

physeq_Nem_2 <- subset_samples(physeq_Nem_1, Biosample != "Control")
physeq_Nem_2

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 111454 taxa and 403 samples ]
## sample_data() Sample Data:       [ 403 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 111454 taxa by 6 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 111454 reference sequences ]

Quick checking library size of each Biomsaple types

df <- as.data.frame(sample_data(physeq_Nem_2))  # Put sample_data into a ggplot-friendly data.frame
df$LibrarySize <- sample_sums(physeq_Nem_2)
df <- df[order(df$LibrarySize),]
df$Index <- seq(nrow(df))
ggplot(data=df, aes(x=Index, y=LibrarySize, color=as.factor(Biosample))) + 
  geom_point() + geom_text(aes(label=Index), hjust=2, nudge_y = 0.05)

Identification of contaminants - prevalence

Remove comtaminant ASVs

physeq_Nem_3 <- subset_taxa(physeq_Nem_2, !taxa_names(physeq_Nem) %in% c("ASV2", "ASV8", "ASV52", "ASV97", "ASV128", "ASV318", "ASV356", "ASV471", "ASV708", "ASV914", "ASV1101", "ASV1127", "ASV1915", "ASV1952", "ASV2892", "ASV3517", "ASV4859", "ASV9739", "ASV26448", "ASV30253"))
physeq_Nem_3

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 111434 taxa and 403 samples ]
## sample_data() Sample Data:       [ 403 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 111434 taxa by 6 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 111434 reference sequences ]

Known kit Contaminant detection

contaminant_genera <- read.table("/Users/qcvp/R/Contaminant.txt", header = T)
a <- contaminant_genera[, ]
physeq_Nem_4 <- subset_taxa(physeq_Nem_3, !Genus %in% a)
physeq_Nem_4

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 109183 taxa and 403 samples ]
## sample_data() Sample Data:       [ 403 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 109183 taxa by 6 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 109183 reference sequences ]

Removing samples with < 5000 reads

physeq_Nem_5 <- subset_samples(physeq_Nem_4, !sample_names(physeq_Nem_4) %in% c("Fi.4.3.GM", "Hu.35.SM","Sq.11.3.GB", "Fi.4.1.GB", "Hu.40.SM", "Oy.13.3.GM", "Fi.14.1.GM", "Hu.33.SM", "Hu.34.SM", "Hu.32.SM", "Fo.10.4.BM", "Fo.10.3.BM", "Fo.17.3.BM", "Fo.15.3.BM", "Fo.17.2.BM", "Oy.13.2.GB", "Fo.17.1.BM", "Fo.15.1.BM", "Fi.17.8.GM", "Fo.15.2.BM", "Oy.15.7.GB", "Lo.9.1.GB", "Lo.5.3.GB", "Lo.2.6.GB"))
physeq_Nem_5

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 109183 taxa and 379 samples ]
## sample_data() Sample Data:       [ 379 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 109183 taxa by 6 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 109183 reference sequences ]

Checking data

max(rowSums(physeq_Nem_5@otu_table))

## [1] 79551

min(rowSums(physeq_Nem_5@otu_table))

## [1] 6723

mean(rowSums(physeq_Nem_5@otu_table))

## [1] 41402.22

Plotting bar plot of reads

# Extract row sum of physeq_Nemt_3
OTU_asmatrix_3 <- otu.matrix(physeq_Nem_3, force.taxa.cols = TRUE)
sample_reads_3 <- rowSums(OTU_asmatrix_3)

# Extract row sum of physeq_Nem_4
OTU_asmatrix_4 <- otu.matrix(physeq_Nem_4, force.taxa.cols = TRUE)
sample_reads_4 <- rowSums(OTU_asmatrix_4)

#Combine
sample_reads_total <- cbind(sample_reads_4, sample_reads_3)


# Load data
Abs_abun_arg <- sample_reads_total
# Reshape the data to long format
  Abs_abun_arg_long <- melt(Abs_abun_arg, id.vars = "Sample.name", variable.name = "Runs", value.name = "Reads")

## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE
## Warning in type.convert.default(X[[i]], ...): 'as.is' should be specified by
## the caller; using TRUE

  Biosample_type <- physeq_Nem_4@sam_data$Biosample_type
  Abs_abun_arg_long <- cbind( Abs_abun_arg_long, Biosample_type)

#Calculate errors
Abs_abun_summary <- Abs_abun_arg_long %>%
  group_by(Biosample_type, X2) %>%
  summarise(
    Mean = mean(value, na.rm = TRUE),
    SD = sd(value, na.rm = TRUE))
Abs_abun_summary

## # A tibble: 12 × 4
## # Groups:   Biosample_type [6]
##    Biosample_type X2               Mean     SD
##    <chr>          <fct>           <dbl>  <dbl>
##  1 Biofilm        sample_reads_4 44370. 15625.
##  2 Biofilm        sample_reads_3 48793. 12809.
##  3 Gut            sample_reads_4 40302. 17142.
##  4 Gut            sample_reads_3 41953. 17625.
##  5 Sediment       sample_reads_4 31652. 12182.
##  6 Sediment       sample_reads_3 31951. 12768.
##  7 Skin           sample_reads_4 41408. 17116.
##  8 Skin           sample_reads_3 46206. 17859.
##  9 Water          sample_reads_4 45188. 10212.
## 10 Water          sample_reads_3 45539. 10277.
## 11 Wholebody      sample_reads_4 19216. 19883.
## 12 Wholebody      sample_reads_3 20532. 20549.

# Plot using ggplot2
# Plot for sediment data
ggplot(Abs_abun_summary, aes(x = Biosample_type, y = Mean, fill = X2)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(ymin =  Mean-SD, ymax = Mean+SD), width = 0.2, position = position_dodge(0.9)) +
  theme_minimal() +
  theme(legend.text = element_text(size = 15),
        legend.position = "top",
        plot.title = element_text(size = 20, face = "bold"),
        legend.title = element_text(size = 18),
        axis.title.x = element_blank(),
        axis.title.y = element_text(size = 18),
        axis.text.y = element_text(size = 15),
        strip.text = element_text(size = 15),
        axis.text.x = element_text(angle = 45, colour = "black", vjust = 1, hjust = 1, size = 15)) +
  labs(x = "biosample type", y = "number of read") +
  facet_grid( scales = "free")

Calculate relative abundance

#Tax 
whole_bacteria_ASV_TAX <- data.frame(physeq_Nem_5@tax_table)
ID <- rownames(whole_bacteria_ASV_TAX)
whole_bacteria_ASV_TAX <- cbind(ID=ID, whole_bacteria_ASV_TAX)

#Dataset with Relative abundance
physeq_Nem_rel <- transform_sample_counts(physeq_Nem_5, function(x) x / sum(x) )
rowSums(physeq_Nem_rel@otu_table@.Data)

##       BD.S.1.M       BD.S.2.M       BD.S.3.M       BD.S.4.B       BD.S.5.B 
##              1              1              1              1              1 
##       BD.S.6.B       BD.W.1.M       BD.W.2.M       BD.W.3.M       BD.W.4.B 
##              1              1              1              1              1 
##       BD.W.5.B       BD.W.6.B          Bds1b          Bds2b          Bds3b 
##              1              1              1              1              1 
##          Bdw1b          Bdw2b          Bdw3b   BioFilm.21.1   BioFilm.21.2 
##              1              1              1              1              1 
##   BioFilm.21.3       CR.S.1.M       CR.S.2.M       CR.S.3.M       CR.S.4.B 
##              1              1              1              1              1 
##       CR.S.5.B       CR.S.6.B       CR.W.1.M       CR.W.2.M       CR.W.3.M 
##              1              1              1              1              1 
##       CR.W.4.B       CR.W.5.B       CR.W.6.B          F1s1b          F1s2b 
##              1              1              1              1              1 
##          F1s3b          F1w1b          F1w2b          F1w3b          F2s1b 
##              1              1              1              1              1 
##          F2s2b          F2s3b          F2w1b          F2w2b          F2w3b 
##              1              1              1              1              1 
##          F3s1b          F3s2b          F3s3b          F3w1b          F3w2b 
##              1              1              1              1              1 
##          F3w3b          F4s1b          F4s2b          F4s3b          F4w1b 
##              1              1              1              1              1 
##          F4w2b          F4w3b     Fi.10.3.GM     Fi.10.4.GB     Fi.10.5.GB 
##              1              1              1              1              1 
##     Fi.10.6.GB    Fi.12.10.GB    Fi.12.11.GB    Fi.12.13.GB     Fi.12.4.GB 
##              1              1              1              1              1 
##     Fi.12.5.GB     Fi.12.6.GB     Fi.12.7.GB     Fi.12.8.GB     Fi.12.9.GB 
##              1              1              1              1              1 
##     Fi.13.1.GM     Fi.13.2.GB     Fi.13.3.GB     Fi.14.2.GM     Fi.14.5.GB 
##              1              1              1              1              1 
##     Fi.14.6.GB     Fi.18.1.GB     Fi.18.2.GB     Fi.18.3.GB     Fi.19.1.GB 
##              1              1              1              1              1 
##     Fi.20.1.GB     Fi.20.2.GB      Fi.3.6.GB      Fi.3.7.GB      Fi.4.4.GB 
##              1              1              1              1              1 
##      Fi.5.3.GB      Fi.5.4.GB      Fi.5.5.GB      Fi.6.4.GB      Fi.6.5.GB 
##              1              1              1              1              1 
##      Fi.6.6.GM      Fi.6.7.GB      Fi.6.8.GB      Fi.6.9.GB       Fi10.1gb 
##              1              1              1              1              1 
##       Fi10.2gb       Fi11.1gb       Fi11.2gb       Fi12.1gb       Fi12.2gb 
##              1              1              1              1              1 
##       Fi12.3gb        Fi3.1gb        Fi3.2gb        Fi3.3gb        Fi3.4gb 
##              1              1              1              1              1 
##        Fi3.5gb        Fi5.1gb        Fi5.2gb        Fi6.1gb        Fi6.2gb 
##              1              1              1              1              1 
##        Fi6.3gb        Fi7.1gb        Fi7.2gb        Fi7.3gb        Fi7.4gb 
##              1              1              1              1              1 
##        Fi7.5gb     Fo.17.5.BM      Fo.4.1.BM      Fo.4.2.BM      Fo.4.3.BM 
##              1              1              1              1              1 
##      Fo.4.4.BM      Fo.4.5.BM       HM.S.1.M       HM.S.2.M       HM.S.3.M 
##              1              1              1              1              1 
##       HM.S.4.B       HM.S.5.B       HM.S.6.B       HM.W.1.M       HM.W.2.M 
##              1              1              1              1              1 
##       HM.W.3.M       HM.W.4.B       HM.W.5.B       HM.W.6.B       Hu.18.GM 
##              1              1              1              1              1 
##       Hu.18.SM       Hu.19.GM       Hu.19.SM       Hu.20.GM       Hu.20.SM 
##              1              1              1              1              1 
##       Hu.21.GM       Hu.21.SM       Hu.22.GM       Hu.22.SM       Hu.23.GM 
##              1              1              1              1              1 
##       Hu.23.SM       Hu.25.GM       Hu.25.SM       Hu.26.GM       Hu.26.SM 
##              1              1              1              1              1 
##       Hu.27.GM       Hu.27.SM       Hu.28.GM       Hu.28.SM       Hu.29.GM 
##              1              1              1              1              1 
##       Hu.29.SM       Hu.30.GM       Hu.30.SM       Hu.31.GM       Hu.31.SM 
##              1              1              1              1              1 
##       Hu.32.GM       Hu.33.GM       Hu.34.GM       Hu.35.GM       Hu.36.GM 
##              1              1              1              1              1 
##       Hu.36.SM       Hu.37.GM       Hu.37.SM       Hu.38.GM       Hu.38.SM 
##              1              1              1              1              1 
##       Hu.39.GM       Hu.39.SM       Hu.41.GM       Hu.41.SM       Hu.43.GM 
##              1              1              1              1              1 
##       Hu.43.SM       Hu.44.GM       Hu.44.SM       Hu.45.GB       Hu.45.SB 
##              1              1              1              1              1 
##       Hu.46.GB       Hu.46.SB       Hu.47.GB       Hu.47.SB       Hu.48.GB 
##              1              1              1              1              1 
##       Hu.48.SB       Hu.49.GB       Hu.49.SB       Hu.50.GB       Hu.50.SB 
##              1              1              1              1              1 
##       Hu.51.GB       Hu.51.SB       Hu.52.GB       Hu.52.SB       Hu.53.GB 
##              1              1              1              1              1 
##       Hu.53.SB       Hu.54.GB       Hu.54.SB       Hu.55.GB       Hu.55.SB 
##              1              1              1              1              1 
##       Hu.56.GB       Hu.56.SB       Hu.57.GB       Hu.57.SB       Hu.58.GB 
##              1              1              1              1              1 
##       Hu.58.SB       Hu.59.GB       Hu.59.SB       Hu.60.GB       Hu.60.SB 
##              1              1              1              1              1 
##       Hu.61.GB       Hu.61.SB       Hu.63.GB       Hu.63.SB       Hu.64.GB 
##              1              1              1              1              1 
##       Hu.64.SB       Hu.66.GB       Hu.66.SB       Hu.67.GB       Hu.67.SB 
##              1              1              1              1              1 
##       Hu.68.GB       Hu.68.SB       Hu.69.GB       Hu.69.SB       Hu.70.GB 
##              1              1              1              1              1 
##       Hu.70.SB       Hu.71.GB       Hu.71.SB  Hu10.1.biof.6       Hu10.1gb 
##              1              1              1              1              1 
##       Hu10.1sb  Hu10.2.biof.6  Hu10.3.biof.6  Hu11.1.biof.7       Hu11.1gb 
##              1              1              1              1              1 
##       Hu11.1sb  Hu11.2.biof.7  Hu11.3.biof.7  Hu12.1.biof.8       Hu12.1gb 
##              1              1              1              1              1 
##       Hu12.1sb  Hu12.2.biof.8  Hu12.3.biof.8  Hu13.1.biof.9       Hu13.1gb 
##              1              1              1              1              1 
##       Hu13.1sb  Hu13.2.biof.9  Hu13.3.biof.9       Hu14.1gb       Hu14.1sb 
##              1              1              1              1              1 
## Hu15.1.biof.11 Hu15.2.biof.11 Hu15.3.biof.11       Hu15.3gb       Hu15.3sb 
##              1              1              1              1              1 
## Hu16.1.biof.12 Hu16.2.biof.12 Hu16.3.biof.12       Hu16.3gb       Hu16.3sb 
##              1              1              1              1              1 
##       Hu17.3gb       Hu17.3sb        Hu3.1gb        Hu3.1sb        Hu4.1gb 
##              1              1              1              1              1 
##        Hu4.1sb   Hu5.1.biof.3        Hu5.1gb        Hu5.1sb   Hu5.2.biof.3 
##              1              1              1              1              1 
##   Hu5.3.biof.3        Hu6.1gb        Hu6.1sb   Hu7.1.biof.4        Hu7.1gb 
##              1              1              1              1              1 
##        Hu7.1sb   Hu7.2.biof.4   Hu7.3.biof.4   Hu8.1.biof.5        Hu8.1gb 
##              1              1              1              1              1 
##        Hu8.1sb   Hu8.2.biof.5   Hu8.3.biof.5        Hu9.1gb        Hu9.1sb 
##              1              1              1              1              1 
##      Lo.2.4.GB      Lo.2.5.GB      Lo.2.7.GB      Lo.2.8.GB      Lo.2.9.GB 
##              1              1              1              1              1 
##      Lo.5.1.GM      Lo.5.4.GB      Lo.5.5.GB      Lo.9.2.GB        Lo2.1gb 
##              1              1              1              1              1 
##        Lo2.2gb        Lo2.3gb        Lo4.1gb        Lo4.2gb        Lo8.1gb 
##              1              1              1              1              1 
##        Lo8.2gb        Lo8.3gb        Lo8.4gb        Lo8.5gb       NP.S.1.M 
##              1              1              1              1              1 
##       NP.S.2.M       NP.S.3.M       NP.S.4.B       NP.S.5.B       NP.S.6.B 
##              1              1              1              1              1 
##       NP.W.1.M       NP.W.2.M       NP.W.3.M       NP.W.4.B       NP.W.5.B 
##              1              1              1              1              1 
##       NP.W.6.B     Oy.13.7.GB     Oy.15.4.GM     Oy.15.5.GM     Oy.15.6.GB 
##              1              1              1              1              1 
##     Oy.15.8.GB          R1s1b          R1s2b          R1s3b          R1w1b 
##              1              1              1              1              1 
##          R1w2b          R1w3b          R2s1b          R2s2b          R2s3b 
##              1              1              1              1              1 
##          R2w1b          R2w2b          R2w3b          R3s1b          R3s2b 
##              1              1              1              1              1 
##          R3s3b          R3w1b          R3w2b          R3w3b          R4s1b 
##              1              1              1              1              1 
##          R4s2b          R4s3b          R4w1b          R4w2b          R4w3b 
##              1              1              1              1              1 
##         Rivs1b         Rivs2b         Rivs3b         Rivw1b         Rivw2b 
##              1              1              1              1              1 
##         Rivw3b     Sq.11.1.GM     Sq.11.2.GM     Sq.11.4.GB     Sq.11.5.GB 
##              1              1              1              1              1 
##          U1s1b          U1s2b          U1s3b          U1w1b          U1w2b 
##              1              1              1              1              1 
##          U1w3b          U2s1b          U2s2b          U2s3b          U2w1b 
##              1              1              1              1              1 
##          U2w2b          U2w3b          U3s1b          U3s2b          U3s3b 
##              1              1              1              1              1 
##          U3w1b          U3w2b          U3w3b          U4s1b          U4s2b 
##              1              1              1              1              1 
##          U4s3b          U4w1b          U4w2b          U4w3b 
##              1              1              1              1

Rarefy at minimum sample size (for Alpha diversity analysis)

ps_Nem_rarefy <- phyloseq::rarefy_even_depth(physeq_Nem_5, rngseed = TRUE, replace = FALSE, sample.size = min(rowSums(physeq_Nem_5@otu_table)))

#Check rarefied curves
p01 <- ggrare(ps_Nem_rarefy, step = 100, color = "Biosample_type", se = TRUE)

## rarefying sample BD.S.1.M
## rarefying sample BD.S.2.M
## rarefying sample BD.S.3.M
## rarefying sample BD.S.4.B
## rarefying sample BD.S.5.B
## rarefying sample BD.S.6.B
## rarefying sample BD.W.1.M
## rarefying sample BD.W.2.M
## rarefying sample BD.W.3.M
## rarefying sample BD.W.4.B
## rarefying sample BD.W.5.B
## rarefying sample BD.W.6.B
## rarefying sample Bds1b
## rarefying sample Bds2b
## rarefying sample Bds3b
## rarefying sample Bdw1b
## rarefying sample Bdw2b
## rarefying sample Bdw3b
## rarefying sample BioFilm.21.1
## rarefying sample BioFilm.21.2
## rarefying sample BioFilm.21.3
## rarefying sample CR.S.1.M
## rarefying sample CR.S.2.M
## rarefying sample CR.S.3.M
## rarefying sample CR.S.4.B
## rarefying sample CR.S.5.B
## rarefying sample CR.S.6.B
## rarefying sample CR.W.1.M
## rarefying sample CR.W.2.M
## rarefying sample CR.W.3.M
## rarefying sample CR.W.4.B
## rarefying sample CR.W.5.B
## rarefying sample CR.W.6.B
## rarefying sample F1s1b
## rarefying sample F1s2b
## rarefying sample F1s3b
## rarefying sample F1w1b
## rarefying sample F1w2b
## rarefying sample F1w3b
## rarefying sample F2s1b
## rarefying sample F2s2b
## rarefying sample F2s3b
## rarefying sample F2w1b
## rarefying sample F2w2b
## rarefying sample F2w3b
## rarefying sample F3s1b
## rarefying sample F3s2b
## rarefying sample F3s3b
## rarefying sample F3w1b
## rarefying sample F3w2b
## rarefying sample F3w3b
## rarefying sample F4s1b
## rarefying sample F4s2b
## rarefying sample F4s3b
## rarefying sample F4w1b
## rarefying sample F4w2b
## rarefying sample F4w3b
## rarefying sample Fi.10.3.GM
## rarefying sample Fi.10.4.GB
## rarefying sample Fi.10.5.GB
## rarefying sample Fi.10.6.GB
## rarefying sample Fi.12.10.GB
## rarefying sample Fi.12.11.GB
## rarefying sample Fi.12.13.GB
## rarefying sample Fi.12.4.GB
## rarefying sample Fi.12.5.GB
## rarefying sample Fi.12.6.GB
## rarefying sample Fi.12.7.GB
## rarefying sample Fi.12.8.GB
## rarefying sample Fi.12.9.GB
## rarefying sample Fi.13.1.GM
## rarefying sample Fi.13.2.GB
## rarefying sample Fi.13.3.GB
## rarefying sample Fi.14.2.GM
## rarefying sample Fi.14.5.GB
## rarefying sample Fi.14.6.GB
## rarefying sample Fi.18.1.GB
## rarefying sample Fi.18.2.GB
## rarefying sample Fi.18.3.GB
## rarefying sample Fi.19.1.GB
## rarefying sample Fi.20.1.GB
## rarefying sample Fi.20.2.GB
## rarefying sample Fi.3.6.GB
## rarefying sample Fi.3.7.GB
## rarefying sample Fi.4.4.GB
## rarefying sample Fi.5.3.GB
## rarefying sample Fi.5.4.GB
## rarefying sample Fi.5.5.GB
## rarefying sample Fi.6.4.GB
## rarefying sample Fi.6.5.GB
## rarefying sample Fi.6.6.GM
## rarefying sample Fi.6.7.GB
## rarefying sample Fi.6.8.GB
## rarefying sample Fi.6.9.GB
## rarefying sample Fi10.1gb
## rarefying sample Fi10.2gb
## rarefying sample Fi11.1gb
## rarefying sample Fi11.2gb
## rarefying sample Fi12.1gb
## rarefying sample Fi12.2gb
## rarefying sample Fi12.3gb
## rarefying sample Fi3.1gb
## rarefying sample Fi3.2gb
## rarefying sample Fi3.3gb
## rarefying sample Fi3.4gb
## rarefying sample Fi3.5gb
## rarefying sample Fi5.1gb

## rarefying sample Fi5.2gb
## rarefying sample Fi6.1gb
## rarefying sample Fi6.2gb
## rarefying sample Fi6.3gb
## rarefying sample Fi7.1gb
## rarefying sample Fi7.2gb
## rarefying sample Fi7.3gb
## rarefying sample Fi7.4gb
## rarefying sample Fi7.5gb
## rarefying sample Fo.17.5.BM
## rarefying sample Fo.4.1.BM
## rarefying sample Fo.4.2.BM
## rarefying sample Fo.4.3.BM
## rarefying sample Fo.4.4.BM
## rarefying sample Fo.4.5.BM
## rarefying sample HM.S.1.M
## rarefying sample HM.S.2.M
## rarefying sample HM.S.3.M
## rarefying sample HM.S.4.B
## rarefying sample HM.S.5.B
## rarefying sample HM.S.6.B
## rarefying sample HM.W.1.M
## rarefying sample HM.W.2.M
## rarefying sample HM.W.3.M
## rarefying sample HM.W.4.B
## rarefying sample HM.W.5.B
## rarefying sample HM.W.6.B
## rarefying sample Hu.18.GM
## rarefying sample Hu.18.SM
## rarefying sample Hu.19.GM
## rarefying sample Hu.19.SM
## rarefying sample Hu.20.GM
## rarefying sample Hu.20.SM
## rarefying sample Hu.21.GM
## rarefying sample Hu.21.SM
## rarefying sample Hu.22.GM
## rarefying sample Hu.22.SM
## rarefying sample Hu.23.GM
## rarefying sample Hu.23.SM
## rarefying sample Hu.25.GM
## rarefying sample Hu.25.SM
## rarefying sample Hu.26.GM
## rarefying sample Hu.26.SM
## rarefying sample Hu.27.GM
## rarefying sample Hu.27.SM
## rarefying sample Hu.28.GM
## rarefying sample Hu.28.SM
## rarefying sample Hu.29.GM
## rarefying sample Hu.29.SM
## rarefying sample Hu.30.GM
## rarefying sample Hu.30.SM
## rarefying sample Hu.31.GM
## rarefying sample Hu.31.SM
## rarefying sample Hu.32.GM
## rarefying sample Hu.33.GM
## rarefying sample Hu.34.GM
## rarefying sample Hu.35.GM
## rarefying sample Hu.36.GM
## rarefying sample Hu.36.SM
## rarefying sample Hu.37.GM
## rarefying sample Hu.37.SM

## rarefying sample Hu.38.GM
## rarefying sample Hu.38.SM
## rarefying sample Hu.39.GM
## rarefying sample Hu.39.SM
## rarefying sample Hu.41.GM
## rarefying sample Hu.41.SM
## rarefying sample Hu.43.GM
## rarefying sample Hu.43.SM
## rarefying sample Hu.44.GM
## rarefying sample Hu.44.SM
## rarefying sample Hu.45.GB
## rarefying sample Hu.45.SB
## rarefying sample Hu.46.GB
## rarefying sample Hu.46.SB
## rarefying sample Hu.47.GB
## rarefying sample Hu.47.SB
## rarefying sample Hu.48.GB
## rarefying sample Hu.48.SB
## rarefying sample Hu.49.GB
## rarefying sample Hu.49.SB
## rarefying sample Hu.50.GB
## rarefying sample Hu.50.SB
## rarefying sample Hu.51.GB
## rarefying sample Hu.51.SB
## rarefying sample Hu.52.GB
## rarefying sample Hu.52.SB
## rarefying sample Hu.53.GB
## rarefying sample Hu.53.SB
## rarefying sample Hu.54.GB
## rarefying sample Hu.54.SB
## rarefying sample Hu.55.GB
## rarefying sample Hu.55.SB
## rarefying sample Hu.56.GB
## rarefying sample Hu.56.SB
## rarefying sample Hu.57.GB
## rarefying sample Hu.57.SB
## rarefying sample Hu.58.GB
## rarefying sample Hu.58.SB
## rarefying sample Hu.59.GB
## rarefying sample Hu.59.SB
## rarefying sample Hu.60.GB
## rarefying sample Hu.60.SB
## rarefying sample Hu.61.GB
## rarefying sample Hu.61.SB
## rarefying sample Hu.63.GB
## rarefying sample Hu.63.SB
## rarefying sample Hu.64.GB
## rarefying sample Hu.64.SB
## rarefying sample Hu.66.GB
## rarefying sample Hu.66.SB
## rarefying sample Hu.67.GB
## rarefying sample Hu.67.SB
## rarefying sample Hu.68.GB
## rarefying sample Hu.68.SB
## rarefying sample Hu.69.GB
## rarefying sample Hu.69.SB
## rarefying sample Hu.70.GB
## rarefying sample Hu.70.SB
## rarefying sample Hu.71.GB
## rarefying sample Hu.71.SB
## rarefying sample Hu10.1.biof.6
## rarefying sample Hu10.1gb
## rarefying sample Hu10.1sb
## rarefying sample Hu10.2.biof.6
## rarefying sample Hu10.3.biof.6
## rarefying sample Hu11.1.biof.7
## rarefying sample Hu11.1gb
## rarefying sample Hu11.1sb
## rarefying sample Hu11.2.biof.7
## rarefying sample Hu11.3.biof.7
## rarefying sample Hu12.1.biof.8
## rarefying sample Hu12.1gb
## rarefying sample Hu12.1sb
## rarefying sample Hu12.2.biof.8
## rarefying sample Hu12.3.biof.8
## rarefying sample Hu13.1.biof.9
## rarefying sample Hu13.1gb
## rarefying sample Hu13.1sb
## rarefying sample Hu13.2.biof.9
## rarefying sample Hu13.3.biof.9
## rarefying sample Hu14.1gb
## rarefying sample Hu14.1sb
## rarefying sample Hu15.1.biof.11
## rarefying sample Hu15.2.biof.11
## rarefying sample Hu15.3.biof.11
## rarefying sample Hu15.3gb
## rarefying sample Hu15.3sb
## rarefying sample Hu16.1.biof.12
## rarefying sample Hu16.2.biof.12
## rarefying sample Hu16.3.biof.12
## rarefying sample Hu16.3gb
## rarefying sample Hu16.3sb
## rarefying sample Hu17.3gb
## rarefying sample Hu17.3sb
## rarefying sample Hu3.1gb
## rarefying sample Hu3.1sb
## rarefying sample Hu4.1gb
## rarefying sample Hu4.1sb
## rarefying sample Hu5.1.biof.3
## rarefying sample Hu5.1gb
## rarefying sample Hu5.1sb
## rarefying sample Hu5.2.biof.3
## rarefying sample Hu5.3.biof.3
## rarefying sample Hu6.1gb
## rarefying sample Hu6.1sb
## rarefying sample Hu7.1.biof.4
## rarefying sample Hu7.1gb
## rarefying sample Hu7.1sb
## rarefying sample Hu7.2.biof.4
## rarefying sample Hu7.3.biof.4
## rarefying sample Hu8.1.biof.5
## rarefying sample Hu8.1gb
## rarefying sample Hu8.1sb
## rarefying sample Hu8.2.biof.5
## rarefying sample Hu8.3.biof.5
## rarefying sample Hu9.1gb
## rarefying sample Hu9.1sb
## rarefying sample Lo.2.4.GB
## rarefying sample Lo.2.5.GB
## rarefying sample Lo.2.7.GB
## rarefying sample Lo.2.8.GB

## rarefying sample Lo.2.9.GB
## rarefying sample Lo.5.1.GM
## rarefying sample Lo.5.4.GB
## rarefying sample Lo.5.5.GB

## rarefying sample Lo.9.2.GB

## rarefying sample Lo2.1gb
## rarefying sample Lo2.2gb
## rarefying sample Lo2.3gb
## rarefying sample Lo4.1gb
## rarefying sample Lo4.2gb
## rarefying sample Lo8.1gb
## rarefying sample Lo8.2gb
## rarefying sample Lo8.3gb
## rarefying sample Lo8.4gb
## rarefying sample Lo8.5gb
## rarefying sample NP.S.1.M
## rarefying sample NP.S.2.M
## rarefying sample NP.S.3.M
## rarefying sample NP.S.4.B
## rarefying sample NP.S.5.B
## rarefying sample NP.S.6.B
## rarefying sample NP.W.1.M
## rarefying sample NP.W.2.M
## rarefying sample NP.W.3.M
## rarefying sample NP.W.4.B
## rarefying sample NP.W.5.B
## rarefying sample NP.W.6.B
## rarefying sample Oy.13.7.GB
## rarefying sample Oy.15.4.GM
## rarefying sample Oy.15.5.GM
## rarefying sample Oy.15.6.GB
## rarefying sample Oy.15.8.GB
## rarefying sample R1s1b
## rarefying sample R1s2b
## rarefying sample R1s3b
## rarefying sample R1w1b
## rarefying sample R1w2b
## rarefying sample R1w3b
## rarefying sample R2s1b
## rarefying sample R2s2b
## rarefying sample R2s3b
## rarefying sample R2w1b
## rarefying sample R2w2b
## rarefying sample R2w3b
## rarefying sample R3s1b
## rarefying sample R3s2b
## rarefying sample R3s3b
## rarefying sample R3w1b
## rarefying sample R3w2b
## rarefying sample R3w3b
## rarefying sample R4s1b
## rarefying sample R4s2b
## rarefying sample R4s3b
## rarefying sample R4w1b
## rarefying sample R4w2b
## rarefying sample R4w3b
## rarefying sample Rivs1b
## rarefying sample Rivs2b
## rarefying sample Rivs3b
## rarefying sample Rivw1b
## rarefying sample Rivw2b
## rarefying sample Rivw3b
## rarefying sample Sq.11.1.GM
## rarefying sample Sq.11.2.GM
## rarefying sample Sq.11.4.GB
## rarefying sample Sq.11.5.GB
## rarefying sample U1s1b
## rarefying sample U1s2b
## rarefying sample U1s3b
## rarefying sample U1w1b
## rarefying sample U1w2b
## rarefying sample U1w3b
## rarefying sample U2s1b
## rarefying sample U2s2b
## rarefying sample U2s3b
## rarefying sample U2w1b
## rarefying sample U2w2b
## rarefying sample U2w3b
## rarefying sample U3s1b
## rarefying sample U3s2b
## rarefying sample U3s3b
## rarefying sample U3w1b
## rarefying sample U3w2b
## rarefying sample U3w3b
## rarefying sample U4s1b
## rarefying sample U4s2b
## rarefying sample U4s3b
## rarefying sample U4w1b
## rarefying sample U4w2b
## rarefying sample U4w3b

#check Zhang & Huang's coverage estimator
library(entropart)

Coverage_estimator_rarefied <- Coverage(ps_Nem_rarefy@otu_table, Estimator = "Best", Level = NULL, CheckArguments = TRUE)

Coverage_estimator_rarefied

## ZhangHuang 
##  0.9792839

#Remove unnecessary samples 
#ps_Nem_rarefy <- subset_samples(ps_Nem_rarefy, !(sample_names(ps_Nem_rarefy) %in% c("R1w2b", "JCA.U2S2B", "U1w2b")))


#Check sample size
dim(ps_Nem_rarefy@otu_table)

## [1]   379 70796

PHPB analysis

Introducing the human pathogens list: `PathoHuman.txt` (of Nem data)

PathoHuman_Nem_TAX1 <- read.table ("/Users/qcvp/R/outputs/dada2_total/asv_table/PathoHuman.txt", sep="\t", row.names=1)

head(PathoHuman_Nem_TAX1, 1)

##                   V2  V3  V4 V5
## ASV1 MG592701.1.1238 100 370  0
##                                                                                                                                                                                                                                                                                                                                                                                      V6
## ASV1 TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGACGCTCAACGTCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCACAAAGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGAGCGCAACTGACGCTGAAGCTCGAAAGTGCGGGTATCGAACAGGATTAGATACCCTGGTAGTCCGCACGGTAAACGATGGATGCCCGCTGTTGGTCTGAATAGGTCAGCGGCCAAGCGAAAGCATTAAGCATCCCACCTGGGGAGTACGCCGGCAACGGTG
##      V7  V8  V9 V10
## ASV1  1 370 442 811
##                                                                                                               V11
## ASV1 MG592701.1.1238 Bacteria;Bacteroidota;Bacteroidia;Bacteroidales;Prevotellaceae;Prevotella_9;Prevotella copri
##      V12
## ASV1 100

Add names to each column of the table

colnames(PathoHuman_Nem_TAX1) <- c("sseqid", "pident", "length","mismatch", "qseq", 
                                       "qstart", "qend", "sstart","send","stitle", "qcovs")

head(PathoHuman_Nem_TAX1, 1)

##               sseqid pident length mismatch
## ASV1 MG592701.1.1238    100    370        0
##                                                                                                                                                                                                                                                                                                                                                                                    qseq
## ASV1 TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGACGCTCAACGTCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCACAAAGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGAGCGCAACTGACGCTGAAGCTCGAAAGTGCGGGTATCGAACAGGATTAGATACCCTGGTAGTCCGCACGGTAAACGATGGATGCCCGCTGTTGGTCTGAATAGGTCAGCGGCCAAGCGAAAGCATTAAGCATCCCACCTGGGGAGTACGCCGGCAACGGTG
##      qstart qend sstart send
## ASV1      1  370    442  811
##                                                                                                            stitle
## ASV1 MG592701.1.1238 Bacteria;Bacteroidota;Bacteroidia;Bacteroidales;Prevotellaceae;Prevotella_9;Prevotella copri
##      qcovs
## ASV1   100

Separating multiple information column to our desire columns

PathoHuman_Nem_TAX2 <- separate(data = PathoHuman_Nem_TAX1, col = stitle, 
                                    into = c("ACC_Kingdom", "Phylum", 
                                             "Class", "Order", "Family", "Genus_Species"), sep = ";", extra = "merge")

head(PathoHuman_Nem_TAX2, 1)

##               sseqid pident length mismatch
## ASV1 MG592701.1.1238    100    370        0
##                                                                                                                                                                                                                                                                                                                                                                                    qseq
## ASV1 TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGACGCTCAACGTCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCACAAAGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGAGCGCAACTGACGCTGAAGCTCGAAAGTGCGGGTATCGAACAGGATTAGATACCCTGGTAGTCCGCACGGTAAACGATGGATGCCCGCTGTTGGTCTGAATAGGTCAGCGGCCAAGCGAAAGCATTAAGCATCCCACCTGGGGAGTACGCCGGCAACGGTG
##      qstart qend sstart send              ACC_Kingdom       Phylum       Class
## ASV1      1  370    442  811 MG592701.1.1238 Bacteria Bacteroidota Bacteroidia
##              Order         Family                 Genus_Species qcovs
## ASV1 Bacteroidales Prevotellaceae Prevotella_9;Prevotella copri   100

PathoHuman_Nem_TAX3 <- separate(data = PathoHuman_Nem_TAX2, col = Genus_Species, into = c("Genus", "Species"), sep = ";", extra = "merge")

head(PathoHuman_Nem_TAX3, 1)

##               sseqid pident length mismatch
## ASV1 MG592701.1.1238    100    370        0
##                                                                                                                                                                                                                                                                                                                                                                                    qseq
## ASV1 TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGACGCTCAACGTCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCACAAAGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGAGCGCAACTGACGCTGAAGCTCGAAAGTGCGGGTATCGAACAGGATTAGATACCCTGGTAGTCCGCACGGTAAACGATGGATGCCCGCTGTTGGTCTGAATAGGTCAGCGGCCAAGCGAAAGCATTAAGCATCCCACCTGGGGAGTACGCCGGCAACGGTG
##      qstart qend sstart send              ACC_Kingdom       Phylum       Class
## ASV1      1  370    442  811 MG592701.1.1238 Bacteria Bacteroidota Bacteroidia
##              Order         Family        Genus          Species qcovs
## ASV1 Bacteroidales Prevotellaceae Prevotella_9 Prevotella copri   100

PathoHuman_Nem_TAX4 <- separate(data = PathoHuman_Nem_TAX3, col = ACC_Kingdom, into = c("ACC", "Kingdom"), sep = "\\s")

head(PathoHuman_Nem_TAX4, 1)

##               sseqid pident length mismatch
## ASV1 MG592701.1.1238    100    370        0
##                                                                                                                                                                                                                                                                                                                                                                                    qseq
## ASV1 TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGACGCTCAACGTCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCACAAAGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGAGCGCAACTGACGCTGAAGCTCGAAAGTGCGGGTATCGAACAGGATTAGATACCCTGGTAGTCCGCACGGTAAACGATGGATGCCCGCTGTTGGTCTGAATAGGTCAGCGGCCAAGCGAAAGCATTAAGCATCCCACCTGGGGAGTACGCCGGCAACGGTG
##      qstart qend sstart send             ACC  Kingdom       Phylum       Class
## ASV1      1  370    442  811 MG592701.1.1238 Bacteria Bacteroidota Bacteroidia
##              Order         Family        Genus          Species qcovs
## ASV1 Bacteroidales Prevotellaceae Prevotella_9 Prevotella copri   100

PathoHuman_Nem_TAX5 <- separate(data = PathoHuman_Nem_TAX4, col = Species, into = c("Genus", "Species"), sep = " ", extra = "merge" )

head(PathoHuman_Nem_TAX5, 1)

##               sseqid pident length mismatch
## ASV1 MG592701.1.1238    100    370        0
##                                                                                                                                                                                                                                                                                                                                                                                    qseq
## ASV1 TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGACGCTCAACGTCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCACAAAGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGAGCGCAACTGACGCTGAAGCTCGAAAGTGCGGGTATCGAACAGGATTAGATACCCTGGTAGTCCGCACGGTAAACGATGGATGCCCGCTGTTGGTCTGAATAGGTCAGCGGCCAAGCGAAAGCATTAAGCATCCCACCTGGGGAGTACGCCGGCAACGGTG
##      qstart qend sstart send             ACC  Kingdom       Phylum       Class
## ASV1      1  370    442  811 MG592701.1.1238 Bacteria Bacteroidota Bacteroidia
##              Order         Family      Genus Species qcovs
## ASV1 Bacteroidales Prevotellaceae Prevotella   copri   100

PathoHuman_Nem_TAX6 <- separate(data = PathoHuman_Nem_TAX5, col = Species, into = c("Species", "Suplement_info"), sep =" ", extra = "merge")

head(PathoHuman_Nem_TAX6, 1)

##               sseqid pident length mismatch
## ASV1 MG592701.1.1238    100    370        0
##                                                                                                                                                                                                                                                                                                                                                                                    qseq
## ASV1 TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGACGCTCAACGTCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCACAAAGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGAGCGCAACTGACGCTGAAGCTCGAAAGTGCGGGTATCGAACAGGATTAGATACCCTGGTAGTCCGCACGGTAAACGATGGATGCCCGCTGTTGGTCTGAATAGGTCAGCGGCCAAGCGAAAGCATTAAGCATCCCACCTGGGGAGTACGCCGGCAACGGTG
##      qstart qend sstart send             ACC  Kingdom       Phylum       Class
## ASV1      1  370    442  811 MG592701.1.1238 Bacteria Bacteroidota Bacteroidia
##              Order         Family      Genus Species Suplement_info qcovs
## ASV1 Bacteroidales Prevotellaceae Prevotella   copri           <NA>   100

Filter the bacterial pathogens with coverage score = 100, length >= 239bp

PathoHuman_Nem_TAX_filtered <- PathoHuman_Nem_TAX6[PathoHuman_Nem_TAX6$qcovs == 100 
                                                           & PathoHuman_Nem_TAX6$length > 239 
                                                           & PathoHuman_Nem_TAX6$Kingdom == "Bacteria",]

head(PathoHuman_Nem_TAX_filtered, 1)

##               sseqid pident length mismatch
## ASV1 MG592701.1.1238    100    370        0
##                                                                                                                                                                                                                                                                                                                                                                                    qseq
## ASV1 TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGACGCTCAACGTCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCACAAAGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGAGCGCAACTGACGCTGAAGCTCGAAAGTGCGGGTATCGAACAGGATTAGATACCCTGGTAGTCCGCACGGTAAACGATGGATGCCCGCTGTTGGTCTGAATAGGTCAGCGGCCAAGCGAAAGCATTAAGCATCCCACCTGGGGAGTACGCCGGCAACGGTG
##      qstart qend sstart send             ACC  Kingdom       Phylum       Class
## ASV1      1  370    442  811 MG592701.1.1238 Bacteria Bacteroidota Bacteroidia
##              Order         Family      Genus Species Suplement_info qcovs
## ASV1 Bacteroidales Prevotellaceae Prevotella   copri           <NA>   100

Creating a table with ASV IDs and taxonomy of human pathogens

Patho_Hu_vec <- row.names(PathoHuman_Nem_TAX_filtered)

#Only select taxo cols
Patho_TAX_Hu <- as.matrix(PathoHuman_Nem_TAX_filtered[,c(11:17)])

Patho_TAX_Hu = tax_table(Patho_TAX_Hu)

#Adding ASV sorted by ACC col to new dataframe
row.names(Patho_TAX_Hu) <- Patho_Hu_vec 

head(Patho_TAX_Hu, 5)

## Taxonomy Table:     [5 taxa by 7 taxonomic ranks]:
##       Kingdom    Phylum           Class                 Order             
## ASV1  "Bacteria" "Bacteroidota"   "Bacteroidia"         "Bacteroidales"   
## ASV7  "Bacteria" "Proteobacteria" "Gammaproteobacteria" "Enterobacterales"
## ASV8  "Bacteria" "Firmicutes"     "Bacilli"             "Lactobacillales" 
## ASV10 "Bacteria" "Firmicutes"     "Bacilli"             "Staphylococcales"
## ASV13 "Bacteria" "Firmicutes"     "Clostridia"          "Oscillospirales" 
##       Family              Genus              Species      
## ASV1  "Prevotellaceae"    "Prevotella"       "copri"      
## ASV7  "Vibrionaceae"      "Photobacterium"   "damselae"   
## ASV8  "Listeriaceae"      "Listeria"         "welshimeri" 
## ASV10 "Staphylococcaceae" "Staphylococcus"   "warneri"    
## ASV13 "Ruminococcaceae"   "Faecalibacterium" "prausnitzii"

Extract sample for longitudinal paper

Extracting samples

# Extract samples names
longitudinal_sample <- read.table("/Users/qcvp/R/outputs/dada2_total/asv_table/metadata_longitudinal.txt", header = TRUE, row.names = 1, sep="\t")
rownames(longitudinal_sample) <- gsub("-",".",rownames(longitudinal_sample))
longitudinal_sample_names <- rownames(longitudinal_sample)
# Absolute abundance
physeq_Nem_long <- subset_samples(physeq_Nem_5, sample_names(physeq_Nem_5) %in% longitudinal_sample_names)
physeq_Nem_long

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 109183 taxa and 48 samples ]
## sample_data() Sample Data:       [ 48 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 109183 taxa by 6 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 109183 reference sequences ]

# Relative abundance 
physeq_Nem_long_rel <- subset_samples(physeq_Nem_rel, sample_names(physeq_Nem_rel) %in% longitudinal_sample_names)
physeq_Nem_long_rel

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 109183 taxa and 48 samples ]
## sample_data() Sample Data:       [ 48 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 109183 taxa by 6 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 109183 reference sequences ]

Selecting the Pathogens in the datasets

Unrarefied samples for beta div

Raw dataset

Nem_raw_ASV <- data.frame(physeq_Nem_long@otu_table@.Data)
Nem_raw_hu_patho_ASV <- Nem_raw_ASV[,colnames(Nem_raw_ASV) %in% Patho_Hu_vec]

# Create new physeq object
Nem_hu_unrarefied <- phyloseq(otu_table(Nem_raw_hu_patho_ASV, taxa_are_rows = FALSE),
                       sample_data(context),
                       tax_table(as.matrix(Patho_TAX_Hu)))
Nem_hu_unrarefied

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 600 taxa and 48 samples ]
## sample_data() Sample Data:       [ 48 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 600 taxa by 7 taxonomic ranks ]

Relative dataset

Nem_raw_unrarefied_ASV_rel <- data.frame(physeq_Nem_long_rel@otu_table@.Data)
Nem_raw_unrarefied_hu_patho_ASV_rel <- Nem_raw_unrarefied_ASV_rel

#Create new physeq object
Nem_hu_unrarefied_rel <- phyloseq(otu_table(Nem_raw_unrarefied_hu_patho_ASV_rel, taxa_are_rows = FALSE),
                       sample_data(context),
                       tax_table(as.matrix(Patho_TAX_Hu)))
Nem_hu_unrarefied_rel

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 600 taxa and 48 samples ]
## sample_data() Sample Data:       [ 48 samples by 12 sample variables ]
## tax_table()   Taxonomy Table:    [ 600 taxa by 7 taxonomic ranks ]

Rarefied samples for alphadiv

Bacterial composition analysis

Prepare data for bacterial composition analysis

# Melting absolute abundance
Melted_Nem_hu_unrarefied_species <- Nem_hu_unrarefied %>%
  tax_glom(taxrank = "Species") %>%                     # agglomerate at rank level
  psmelt() %>%                                         # Melt to long format
  filter(Abundance > 0) %>%                            # Filter out absent taxa reads
  arrange(Species)                                      # Sort data frame alphabetically by family

# Melting Relative Abundance
Melted_Nem_hu_unrarefied_species_rel <- Nem_hu_unrarefied_rel %>%
  tax_glom(taxrank = "Species") %>%                     # agglomerate at rank level
  psmelt() %>%                                         # Melt to long format    
  filter(Abundance > 0) %>%                            # Filter out absent taxa reads
  arrange(Species)       

# Adding pathogen column for pie chart
#physeq_whole_Nem_species$Pathogen_type <- ifelse(physeq_whole_Nem_species$OTU %in% Patho_Hu_vec, physeq_whole_Nem_species$Biosample, "Other")
#physeq_whole_Nem_species$Pathogen <- ifelse(physeq_whole_Nem_species$OTU %in% Patho_Hu_vec, physeq_whole_Nem_species$Class, "Other")

Extrat samples occur in each part of Venn diagram

# Sample occur in each Biosample
animal_sample <- subset(Melted_Nem_hu_unrarefied_species, Biosample == "Animals")
Envi_sample <- subset(Melted_Nem_hu_unrarefied_species, Biosample == "Environment")
Human_sample <- subset(Melted_Nem_hu_unrarefied_species, Biosample == "Human")

#Extract list of ASV for each components
list_ASV_all <- Reduce(intersect, list(animal_sample$OTU, Envi_sample$OTU, Human_sample$OTU)) #ASV occur in all biosample types
list_ASV_all

##  [1] "ASV1"   "ASV145" "ASV7"   "ASV383" "ASV182" "ASV121" "ASV263" "ASV107"
##  [9] "ASV279" "ASV261" "ASV260" "ASV115" "ASV112" "ASV40"  "ASV13"  "ASV48" 
## [17] "ASV74"  "ASV32"  "ASV391" "ASV108" "ASV18"  "ASV106" "ASV10"  "ASV110"
## [25] "ASV663"

list_ASV_EA <- setdiff(intersect(Envi_sample$OTU,animal_sample$OTU), list_ASV_all) #ASV occur in both Environment and animal but not in Human
list_ASV_EA

## character(0)

list_ASV_EH <- setdiff(intersect(Envi_sample$OTU,Human_sample$OTU), list_ASV_all) #ASV occur in both Environment and human but not in animal
list_ASV_EH

##  [1] "ASV28"   "ASV172"  "ASV245"  "ASV309"  "ASV1192" "ASV2582" "ASV185" 
##  [8] "ASV811"  "ASV348"  "ASV38"   "ASV2330" "ASV720"  "ASV1706" "ASV2156"
## [15] "ASV817"  "ASV844"  "ASV9796" "ASV1271"

list_ASV_AH <- setdiff(intersect(animal_sample$OTU,Human_sample$OTU), list_ASV_all) #ASV occur in both human and animal but not in environment
list_ASV_AH

##  [1] "ASV1276"  "ASV72"    "ASV57955" "ASV431"   "ASV496"   "ASV191"  
##  [7] "ASV7760"  "ASV1190"  "ASV950"   "ASV223"   "ASV522"   "ASV1842" 
## [13] "ASV737"   "ASV70"    "ASV9460"

list_ASV_A <- Reduce(setdiff, list(animal_sample$OTU, list_ASV_EA, list_ASV_AH, list_ASV_all)) # ASV occur in only Animal
list_ASV_A

##  [1] "ASV73"    "ASV4092"  "ASV24639" "ASV2427"  "ASV6340"  "ASV738"  
##  [7] "ASV547"   "ASV943"   "ASV6423"  "ASV9725"  "ASV679"   "ASV1533" 
## [13] "ASV23547" "ASV613"   "ASV53236" "ASV640"

list_ASV_H <- Reduce(setdiff, list(Human_sample$OTU, list_ASV_EH, list_ASV_AH, list_ASV_all)) # ASV occur in only Human
list_ASV_H

##  [1] "ASV3220"  "ASV316"   "ASV1088"  "ASV4028"  "ASV1334"  "ASV7426" 
##  [7] "ASV6858"  "ASV5245"  "ASV426"   "ASV7427"  "ASV6337"  "ASV2934" 
## [13] "ASV4500"  "ASV6368"  "ASV1740"  "ASV167"   "ASV783"   "ASV6880" 
## [19] "ASV1991"  "ASV5244"  "ASV1349"  "ASV2167"  "ASV1539"  "ASV398"  
## [25] "ASV16681" "ASV2815"  "ASV3654"  "ASV265"   "ASV2983"  "ASV2070" 
## [31] "ASV612"   "ASV10033" "ASV826"   "ASV1640"  "ASV9787"  "ASV2524" 
## [37] "ASV2790"  "ASV14047" "ASV4272"  "ASV3456"  "ASV7610"  "ASV2092" 
## [43] "ASV1237"  "ASV1391"  "ASV53743" "ASV2996"  "ASV23530" "ASV6711" 
## [49] "ASV718"   "ASV5981"  "ASV2893"  "ASV32069" "ASV6765"  "ASV6175" 
## [55] "ASV1582"  "ASV1050"  "ASV22648" "ASV411"   "ASV1177"  "ASV15653"
## [61] "ASV13593" "ASV3237"  "ASV2062"  "ASV8852"  "ASV342"

list_ASV_E <- Reduce(setdiff, list(Envi_sample$OTU, list_ASV_EH, list_ASV_EA, list_ASV_all)) # ASV occur in only Environment
list_ASV_E

##  [1] "ASV5193"  "ASV8778"  "ASV5532"  "ASV2891"  "ASV123"   "ASV1435" 
##  [7] "ASV16429" "ASV2649"  "ASV5502"  "ASV11514" "ASV34362" "ASV2204" 
## [13] "ASV14165"

# Checking
length(unique(Melted_Nem_hu_unrarefied_species$OTU))

## [1] 152

length(list_ASV_E) + length(list_ASV_H) + length(list_ASV_A) + length(list_ASV_all) + length(list_ASV_EA) + length(list_ASV_EH) + length(list_ASV_AH)

## [1] 152

Composition of human pathogenic ASV

Relative abundance in all biosample

# Extract samples reads and add metadata
longitudinan_sample_rel <- as.data.frame(rowSums(Nem_hu_unrarefied_rel@otu_table@.Data))
longitudinan_sample_rel <- cbind(longitudinan_sample_rel, physeq_Nem_long@sam_data)

custom_colors <- c( "#CBD588", "#5F7FC7","#DA5724", "#508578", "#CD9BCD",
                    "#AD6F3B", "#673770","#D14285", "#652926", "#C84248", 
                    "#8569D5", "#5E738F","#D1A33D", "#8A7C64", "#599861", "black", "red", "green")

a <- ggplot(longitudinan_sample_rel, aes(x = Biosample, y = rowSums(Nem_hu_unrarefied_rel@otu_table@.Data), color = Biosample)) + 
  geom_jitter(position = position_jitter(width = .20), alpha = 0.5, size = 3, aes(colour = Biosample), show.legend = TRUE) + theme_bw() +
  geom_boxplot(aes(colour = Biosample), alpha=0.1, outlier.colour = NA) + 
   theme(axis.title.x = element_blank(),
        legend.position = "bottom",
        legend.title = element_blank(),
        legend.text = element_text(size = 13),
        strip.text = element_text(size = 18),
        axis.title.y = element_text(size = 20),
        axis.text.y = element_text(size=20),
        axis.text.x = element_blank()) + 
  ylab("Relative Abundance") +   
  facet_wrap(~Date, nrow=1, scales = "free_x") + scale_fill_manual(values = custom_colors) 
a

Calculate sample reads

# prepare data for bacterial composition
all_sample <- subset(Melted_Nem_hu_unrarefied_species, OTU %in% list_ASV_all)
EA_sample <- subset(Melted_Nem_hu_unrarefied_species, OTU %in% list_ASV_EA)
EH_sample <- subset(Melted_Nem_hu_unrarefied_species, OTU %in% list_ASV_EH)
AH_sample <- subset(Melted_Nem_hu_unrarefied_species, OTU %in% list_ASV_AH)
A_sample <- subset(Melted_Nem_hu_unrarefied_species, OTU %in% list_ASV_A)
E_sample <- subset(Melted_Nem_hu_unrarefied_species, OTU %in% list_ASV_E)
H_sample <- subset(Melted_Nem_hu_unrarefied_species, OTU %in% list_ASV_H)
# calculate read
colSums(as.data.frame(all_sample[,3]))

## all_sample[, 3] 
##          510804

colSums(as.data.frame(E_sample[,3]))

## E_sample[, 3] 
##           165

colSums(as.data.frame(EA_sample[,3]))

## EA_sample[, 3] 
##              0

colSums(as.data.frame(EH_sample[,3]))

## EH_sample[, 3] 
##          31450

colSums(as.data.frame(AH_sample[,3]))

## AH_sample[, 3] 
##          27393

colSums(as.data.frame(H_sample[,3]))

## H_sample[, 3] 
##         12466

colSums(as.data.frame(A_sample[,3]))

## A_sample[, 3] 
##          2474

Compositional distribution of pathogenic ASV (venn diagram)

# Number of ASV
x <- list(A = animal_sample$OTU, B = Envi_sample$OTU, C = Human_sample$OTU)
c1 <- ggVennDiagram(x, 
              category.names = c("Animal","Environment","Human"),
              label = "count",) +
              theme(legend.position = "none") +
              scale_fill_gradient(low = "#F4FAFE", high = "#4981BF")
c1

Bacterial composition of each biosample(animal, human, environment) compositional bar chart

# Composition for whole community
c1 <- ggplot(Melted_Nem_hu_unrarefied_species_rel, aes(x = Biosample, fill = Class)) + 
  geom_bar(position ="fill") + scale_colour_brewer(palette="Set1") +
  ylab("Whole bacterial composition") +
  scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) + #remove the space below the 0 of the y axis in the graph
  theme_bw() +
  theme(legend.text = element_text(face = "italic")) +
  theme(axis.title.x = element_blank(),
        axis.text.x = element_text(angle=30, colour = "black", vjust=1, hjust = 1, size=10),
        axis.ticks.x = element_blank(),
        panel.background = element_blank(), 
        panel.grid.major = element_blank(),  #remove major-grid labels
        panel.grid.minor = element_blank())  #remove minor-grid labels
c1

#Composition for ASV in Enviroment and Animal

#Composition for ASV in Enviroment and Human
c2 <- ggplot(EH_sample, aes(x = "", fill = Genus)) + 
  geom_bar(position ="fill") + scale_colour_brewer(palette="Set1") +
  ylab("Whole bacterial composition") +
  scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) + #remove the space below the 0 of the y axis in the graph
  theme_bw() +
  theme(legend.text = element_text(face = "italic")) +
  xlab("ASV in Environment and Human only")+
  theme(axis.text.x = element_text(angle=30, colour = "black", vjust=1, hjust = 1, size=10),
        axis.ticks.x = element_blank(),
        panel.background = element_blank(), 
        panel.grid.major = element_blank(),  #remove major-grid labels
        panel.grid.minor = element_blank())  #remove minor-grid labels
c2

#Composition for ASV in Human and Animal
c3 <- ggplot(AH_sample, aes(x = "", fill = Genus)) + 
  geom_bar(position ="fill") + scale_colour_brewer(palette="Set1") +
  ylab("Whole bacterial composition") +
  scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) + #remove the space below the 0 of the y axis in the graph
  theme_bw() +
  xlab("ASV in Human and Animal only")+
  theme(legend.text = element_text(face = "italic")) +
  theme(axis.text.x = element_text(angle=30, colour = "black", vjust=1, hjust = 1, size=10),
        axis.ticks.x = element_blank(),
        panel.background = element_blank(), 
        panel.grid.major = element_blank(),  #remove major-grid labels
        panel.grid.minor = element_blank())  #remove minor-grid labels
c3

#Composition for ASV in Enviroment only
c4 <- ggplot(E_sample, aes(x = "", fill = Genus)) + 
  geom_bar(position ="fill") + scale_colour_brewer(palette="Set1") +
  ylab("Whole bacterial composition") +
  scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) + #remove the space below the 0 of the y axis in the graph
  theme_bw() +
  theme(legend.text = element_text(face = "italic")) +
  xlab("ASV in Environment only")+
  theme(axis.text.x = element_text(angle=30, colour = "black", vjust=1, hjust = 1, size=10),
        axis.ticks.x = element_blank(),
        panel.background = element_blank(), 
        panel.grid.major = element_blank(),  #remove major-grid labels
        panel.grid.minor = element_blank())  #remove minor-grid labels
c4

#Composition for ASV in Animal only
c5 <- ggplot(A_sample, aes(x = "", fill = Genus)) + 
  geom_bar(position ="fill") + scale_colour_brewer(palette="Set1") +
  ylab("Whole bacterial composition") +
  scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) + #remove the space below the 0 of the y axis in the graph
  theme_bw() +
  theme(legend.text = element_text(face = "italic")) +
  xlab("ASV in Animal only")+
  theme(axis.text.x = element_text(angle=30, colour = "black", vjust=1, hjust = 1, size=10),
        axis.ticks.x = element_blank(),
        panel.background = element_blank(), 
        panel.grid.major = element_blank(),  #remove major-grid labels
        panel.grid.minor = element_blank())  #remove minor-grid labels
c5

#Composition for ASV in Human only
c6 <- ggplot(H_sample, aes(x = "", fill = Genus)) + 
  geom_bar(position ="fill") + scale_colour_brewer(palette="Set1") +
  ylab("Whole bacterial composition") +
  scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) + #remove the space below the 0 of the y axis in the graph
  theme_bw() +
  theme(legend.text = element_text(face = "italic")) +
  xlab("ASV in Human only")+
  theme(axis.text.x = element_text(angle=30, colour = "black", vjust=1, hjust = 1, size=10),
        axis.ticks.x = element_blank(),
        panel.background = element_blank(), 
        panel.grid.major = element_blank(),  #remove major-grid labels
        panel.grid.minor = element_blank())  #remove minor-grid labels
c6

#Composition for ASV in all 3 biosamples
c7 <- ggplot(all_sample, aes(x = "", fill = Genus)) + 
  geom_bar(position ="fill") + scale_colour_brewer(palette="Set1") +
  xlab("ASV appear in all biosamples")+
  ylab("Whole bacterial composition") +
  scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) + #remove the space below the 0 of the y axis in the graph
  theme_bw() +
  theme(legend.text = element_text(face = "italic")) +
  theme(axis.text.x = element_text(angle=30, colour = "black", vjust=1, hjust = 1, size=10),
        axis.ticks.x = element_blank(),
        panel.background = element_blank(), 
        panel.grid.major = element_blank(),  #remove major-grid labels
        panel.grid.minor = element_blank())  #remove minor-grid labels
c7

longitudinal paper script

qcvp

2025-01-14

Custom scripts

Scripts of `ggrare`

Library loading

Load context

Importing ASV table of Nem

Load the ASV sequences (previously created with DADA2)

Load asv tax of Nem

Import additional the metadata information of Nem

Making raw phyloseq objects

Decontamination process

Identification of contaminants - prevalence

Known kit Contaminant detection

Checking data

Calculate relative abundance

Rarefy at minimum sample size (for Alpha diversity analysis)

PHPB analysis

Introducing the human pathogens list: `PathoHuman.txt` (of Nem data)

Extract sample for longitudinal paper

Selecting the Pathogens in the datasets

Unrarefied samples for beta div

Rarefied samples for alphadiv

Bacterial composition analysis

Composition of human pathogenic ASV

longitudinal paper script

qcvp

2025-01-14

Custom scripts

Scripts of ggrare

Library loading

Load context

Importing ASV table of Nem

Load the ASV sequences (previously created with DADA2)

Load asv tax of Nem

Import additional the metadata information of Nem

Making raw phyloseq objects

Decontamination process

Identification of contaminants - prevalence

Known kit Contaminant detection

Checking data

Calculate relative abundance

Rarefy at minimum sample size (for Alpha diversity analysis)

PHPB analysis

Introducing the human pathogens list: PathoHuman.txt (of Nem data)

Extract sample for longitudinal paper

Selecting the Pathogens in the datasets

Unrarefied samples for beta div

Rarefied samples for alphadiv

Bacterial composition analysis

Composition of human pathogenic ASV

Scripts of `ggrare`

Introducing the human pathogens list: `PathoHuman.txt` (of Nem data)