Custom scripts

Before analyzing, run the following codes: ## Scripts of ggrare

ggrare <- function(physeq, step = 10, label = NULL, color = NULL,
                   plot = TRUE, parallel = FALSE, se = TRUE) {
  
  require("ggplot2")
  
  x <- as(phyloseq::otu_table(physeq), "matrix")
  if (phyloseq::taxa_are_rows(physeq)) x <- t(x)
  ## This script is adapted from vegan `rarecurve` function
  tot <- rowSums(x)
  S <- rowSums(x > 0)
  nr <- nrow(x)
  
  rarefun <- function(i) {
    cat(paste("rarefying sample", rownames(x)[i]), sep = "\n")
    n <- seq(1, tot[i], by = step)
    if (n[length(n)] != tot[i]) {
      n <- c(n, tot[i])
    }
    y <- vegan::rarefy(x[i, ,drop = FALSE], n, se = se)
    if (nrow(y) != 1) {
      rownames(y) <- c(".S", ".se")
      return(data.frame(t(y), Size = n, Sample = rownames(x)[i]))
    } else {
      return(data.frame(.S = y[1, ], Size = n, Sample = rownames(x)[i]))
    }
  }
  
  if (parallel) {
    out <- parallel::mclapply(seq_len(nr), rarefun, mc.preschedule = FALSE)
  } else {
    out <- lapply(seq_len(nr), rarefun)
  }
  
  df <- do.call(rbind, out)
  
  ## Get sample data 
  if (!is.null(phyloseq::sample_data(physeq, FALSE))) {
    sdf <- as(phyloseq::sample_data(physeq), "data.frame")
    sdf$Sample <- rownames(sdf)
    data <- merge(df, sdf, by = "Sample")
    labels <- data.frame(x = tot, y = S, Sample = rownames(x))
    labels <- merge(labels, sdf, by = "Sample")
  }
  
  ## Add, any custom-supplied plot-mapped variables
  if (length(color) > 1) {
    data$color <- color
    names(data)[names(data) == "color"] <- deparse(substitute(color))
    color <- deparse(substitute(color))
  }
  
  if (length(label) > 1) {
    labels$label <- label
    names(labels)[names(labels) == "label"] <- deparse(substitute(label))
    label <- deparse(substitute(label))
  }
  
  p <- ggplot(data = data,
              aes_string(x = "Size", y = ".S",
                         group = "Sample", color = color)) +
    labs(x = "Sample Size", y = "Species Richness")
  
  if (!is.null(label)) {
    p <- p + geom_text(data = labels,
                       aes_string(x = "x", y = "y",
                                  label = label, color = color),
                       size = 4, hjust = 0)
  }
  
  p <- p + geom_line()
  
  if (se) { ## add standard error if available
    p <- p +
      geom_ribbon(aes_string(ymin = ".S - .se", ymax = ".S + .se",
                             color = NULL, fill = color),
                  alpha = 0.2)
  }
  
  if (plot) {
    plot(p)
  }
  
  invisible(p)
  
}

#' Check for normal distribution of alpha diversity measurements
#'
#'
#' @return Print ggplot rarefaction curves
#' @param rich A dataframe with alpha diversity indices as columns
#' and samples as rows as the output of pyloseq::estimate_richness()
#' @param file Output file name

indices_normality <- function(rich, nrow, ncol) {
  
  ### p-value < 0.05 means data failed normality test
  
  par(mfrow = c(nrow, ncol))
  
  for (i in names(rich)) {
    shap <- shapiro.test(rich[, i])
    qqnorm(rich[, i], main = i, sub = shap$p.value)
    qqline(rich[, i])
  }
  
  par(mfrow = c(1, 1))
}

Script for ancombc

run_ancombc_patched <- function(
    ps,
    group,
    confounders = character(0),
    contrast = NULL,
    taxa_rank = "all",
    transform = c("identity", "log10", "log10p"),
    norm = "none",
    norm_para = list(),
    p_adjust = c(
        "none", "fdr", "bonferroni", "holm",
        "hochberg", "hommel", "BH", "BY"
    ),
    prv_cut = 0.1,
    lib_cut = 0,
    struc_zero = FALSE,
    neg_lb = FALSE,
    tol = 1e-05,
    max_iter = 100,
    conserve = FALSE,
    pvalue_cutoff = 0.05) {

    stopifnot(inherits(ps, "phyloseq"))

    ps <- microbiomeMarker:::check_rank_names(ps) |>
        microbiomeMarker:::check_taxa_rank(taxa_rank)

    if (length(confounders)) {
        confounders <- microbiomeMarker:::check_confounder(ps, group, confounders)
    }

    # if it contains missing values for any
    # variable specified in the formula, the corresponding sampling fraction
    # estimate for this sample will return NA since the sampling fraction is
    # not estimable with the presence of missing values.
    # remove this samples
    fml_char <- ifelse(
        length(confounders),
        paste(c(confounders, group), collapse = " + "),
        group
    )

    # fml_char <- paste(c(confounders, group), collapse = " + ")
    # fml <- stats::as.formula(paste("~", fml_char))
    # vars_fml <- all.vars(fml)

    for (var in c(confounders, group)) {
        ps <- microbiomeMarker:::remove_na_samples(ps, var)
    }

    # check whether group is valid, write a function
    meta <- phyloseq::sample_data(ps)

    groups <- meta[[group]]
    groups <- make.names(groups)
    if (!is.null(contrast)) {
        contrast <- make.names(contrast)
    }
    if (!is.factor(groups)) {
        groups <- factor(groups)
    }
    groups <- microbiomeMarker:::set_lvl(groups, contrast)
    phyloseq::sample_data(ps)[[group]] <- groups
    lvl <- levels(groups)
    n_lvl <- length(lvl)

    contrast <- microbiomeMarker:::check_contrast(contrast)

    transform <- match.arg(transform, c("identity", "log10", "log10p"))
    p_adjust <- match.arg(
        p_adjust,
        c(
            "none", "fdr", "bonferroni", "holm",
            "hochberg", "hommel", "BH", "BY"
        )
    )

    # set the reference level for pair-wise comparison from mutliple groups
    # if (!is.null(contrast) && n_lvl > 2) {
    #     groups <- relevel(groups, ref = contrast[1])
    #     sample_data(ps)[[group]] <- groups
    # }

    # preprocess phyloseq object
    ps <- microbiomeMarker:::preprocess_ps(ps)
    ps <- microbiomeMarker::transform_abundances(ps, transform = transform)

    # normalize the data
    norm_para <- c(norm_para, method = norm, object = list(ps))
    ps_normed <- do.call(microbiomeMarker::normalize, norm_para)
    ps_summarized <- microbiomeMarker:::pre_ps_taxa_rank(ps_normed, taxa_rank)

    global <- ifelse(n_lvl > 2, TRUE, FALSE)
    # ancombc differential abundance analysis

    if (taxa_rank == "all") {
        ancombc_taxa_rank <- phyloseq::rank_names(ps_summarized)[1]
    } else if(taxa_rank == "none") {
        ancombc_taxa_rank <- NULL
    } else {
        ancombc_taxa_rank <- taxa_rank
    }

    ancombc_out <- ANCOMBC::ancombc(
        ps_summarized,
        tax_level = ancombc_taxa_rank,
        formula = fml_char,
        p_adj_method = p_adjust,
        prv_cut = prv_cut,
        lib_cut = lib_cut,
        group = group,
        struc_zero = struc_zero,
        neg_lb = neg_lb,
        tol = tol,
        max_iter = max_iter,
        conserve = conserve,
        alpha = pvalue_cutoff,
        global = global
    )

    # multiple-group comparison will be performed while the group
    # variable has > 2 levels
    keep_var <- c("W", "p_val", "q_val", "diff_abn")
    if (n_lvl > 2) {
        # ANCOM-BC global test to determine taxa that are differentially
        # abundant between three or more groups of multiple samples.
        # global result to marker_table
        if (is.null(contrast)) {
            mtab <- ancombc_out$res_global
        } else {
            exp_lvl <- paste0(group, contrast[2])
            ancombc_res <- ancombc_out$res
            mtab <- lapply(keep_var, function(x) ancombc_res[[x]][exp_lvl])
            mtab <- do.call(cbind, mtab)
        }
    } else {
        ancombc_out_res <- ancombc_out$res
        # drop intercept
        ancombc_out_res <- lapply(
            ancombc_out_res,
            function(x) data.frame(x, row.names = "taxon")[-1])
        mtab <- do.call(
            cbind,
            ancombc_out_res[c("W", "p_val", "q_val", "diff_abn")]
        )
    }
    names(mtab) <- keep_var

    # determine enrich group based on coefficients
    # drop intercept
    cf <- data.frame(ancombc_out$res$lfc, row.names = "taxon")[-1]
    if (n_lvl > 2) {
        if (!is.null(contrast)) {
            cf <- cf[exp_lvl]
            enrich_group <- ifelse(cf[[1]] > 0, contrast[2], contrast[1])
        } else {
            cf <- cbind(0, cf)
            enrich_group <- lvl[apply(cf, 1, which.max)]
        }
    } else {
        enrich_group <- ifelse(cf[[1]] > 0, lvl[2], lvl[1])
    }

    # # enriched group
    enrich_abd <- microbiomeMarker:::get_ancombc_enrich_group(ps_summarized, ancombc_out, group)
    norm_abd <- enrich_abd$abd

    mtab <- cbind(feature = row.names(mtab), mtab, enrich_group)
    mtab_sig <- mtab[mtab$diff_abn, ]
    mtab_sig <- mtab_sig[c("feature", "enrich_group", "W", "p_val", "q_val")]
    names(mtab_sig) <- c("feature", "enrich_group", "ef_W", "pvalue", "padj")
    marker <- microbiomeMarker:::return_marker(mtab_sig, mtab)
    marker <- microbiomeMarker::microbiomeMarker(
        marker_table = marker,
        norm_method = microbiomeMarker:::get_norm_method(norm),
        diff_method = "ancombc",
        sam_data = phyloseq::sample_data(ps_summarized),
        otu_table = phyloseq::otu_table(norm_abd, taxa_are_rows = TRUE),
        tax_table = phyloseq::tax_table(ps_summarized)
    )

    marker
}

Colors list (make your own)

library(RColorBrewer)

# Define the number of colors from each palette
num_colors <- 100

custom_colors <- colorRampPalette(brewer.pal(8, "Paired"))(num_colors)

custom_colors2 <- c( "#CBD588", "#5F7FC7","#DA5724", "#508578", "#CD9BCD",
                    "#AD6F3B", "#673770","#D14285", "#652926", "#C84248", 
                    "#8569D5", "#5E738F","#D1A33D", "#8A7C64", "#599861", "black", "red", "green")

Main analysis

library("seqinr")
library(Biostrings)
library(dplyr)
library(phyloseq)
library(writexl)
library(tidyr)
library(ggplot2)
library(readr)
library(ggpubr)

Load context

Importing ASV table of Nem

asv_table <- read.table("/Users/qcvp/R/outputs/dada2_total/asv_table/asv_table.tsv", sep="\t", row.names=1, header=T)

Invert the ASV table in case sample IDs are on row not column

asv_inverted<-as.data.frame(t(asv_table))

Load the ASV sequences (previously created with DADA2 {https://benjjneb.github.io/dada2/index.html})

asv_seq <- readDNAStringSet(file = "/Users/qcvp/R/outputs/dada2_total/asv_table/asv.fasta", 
                            format = "fasta",
                            nrec = -1L, 
                            skip = 0L, 
                            seek.first.rec = FALSE, 
                            use.names = TRUE)
head(asv_seq, 1)
## DNAStringSet object of length 1:
##     width seq                                               names               
## [1]   370 TACGGAAGGTCCGGGCGTTATCC...TGGGGAGTACGCCGGCAACGGTG ASV1

Load asv tax of Nem

asv_tax <- read.table("/Users/qcvp/R/outputs/dada2_total/asv_table/taxonomy.tsv", header = TRUE, row.names = 1, sep="\t")
asv_tax <- as.matrix(asv_tax)

head(asv_tax, 1)
##      Kingdom    Phylum         Class         Order           Family          
## ASV1 "Bacteria" "Bacteroidota" "Bacteroidia" "Bacteroidales" "Prevotellaceae"
##      Genus         
## ASV1 "Prevotella_9"

Import additional the metadata information of Nem

context <- read.table("/Users/qcvp/R/outputs/dada2_total/asv_table/metadata.txt", header = TRUE, row.names = 1, sep="\t")
rownames(context) <- gsub("-",".",rownames(context))
head(context)
##       Sequencing     Date Transect Station   Biosample Biosample_type
## Bds1b  Barcoding Nov_2022 Bich_Dam      BD Environment       Sediment
## Bds2b  Barcoding Nov_2022 Bich_Dam      BD Environment       Sediment
## Bds3b  Barcoding Nov_2022 Bich_Dam      BD Environment       Sediment
## Bdw1b  Barcoding Nov_2022 Bich_Dam      BD Environment          Water
## Bdw2b  Barcoding Nov_2022 Bich_Dam      BD Environment          Water
## Bdw3b  Barcoding Nov_2022 Bich_Dam      BD Environment          Water
##       Biosample_cluster   Host_type Host_subtype Host_genus Host_species
## Bds1b          Sediment Environment     Sediment       <NA>         <NA>
## Bds2b          Sediment Environment     Sediment       <NA>         <NA>
## Bds3b          Sediment Environment     Sediment       <NA>         <NA>
## Bdw1b             Water Environment        Water       <NA>         <NA>
## Bdw2b             Water Environment        Water       <NA>         <NA>
## Bdw3b             Water Environment        Water       <NA>         <NA>
##       Species_ID
## Bds1b       <NA>
## Bds2b       <NA>
## Bds3b       <NA>
## Bdw1b       <NA>
## Bdw2b       <NA>
## Bdw3b       <NA>

Checking samples IDs uniformity

setdiff(x = row.names(asv_inverted), y = row.names(context)) 
## character(0)
#(result = 0 : IDs are uniform within the data)

Making raw phyloseq objects

physeq_Nem_transect <- phyloseq(
  otu_table(asv_inverted, taxa_are_rows = F), #if you have taxa on rows = T
  sample_data(context),
  refseq(asv_seq),
  tax_table(asv_tax)
)
dim(physeq_Nem_transect@otu_table)
## [1]    412 111454

Remove MOCK

Mock-community composition

physeq_mock <- subset_samples(physeq_Nem_transect, Biosample_type=="Mock")

Filter the genus taxonomy in Mock samples -> meaning of this step

physeq_phylum_mock <- physeq_mock %>%
  tax_glom(taxrank = "Genus") %>%                         # agglomerate at Class level
  transform_sample_counts(function(x) {x/sum(x)} ) %>%    # Transform to rel. abundance
  psmelt() %>%                                            # Melt to long format
  arrange(Genus) %>%                                      # Sort data frame alphabetically by phylum
  filter(Genus %in% c("Listeria", "Pseudomonas", "Bacillus", "Saccharomyces", 
                      "Escherichia", "Salmonella", "Lactobacillus", 
                      "Enterococcus","Cryptococcus", "Staphylococcus" ))

Making Treemap of the whole Mock composition

library("treemap")

treemap(physeq_phylum_mock, index=c("Family", "Genus"), 
        vSize="Abundance", type="index",
        fontsize.labels=c(15,12),                
        fontcolor.labels=c("white","black"),     
        fontface.labels=c(2,1),                  
        bg.labels=c("transparent"),              
        align.labels=list(
          c("center", "center"), 
          c("left", "bottom")),                  
        overlap.labels=0.5,                      
        inflate.labels=F,                        
        palette = "Set1",                        
        fontsize.title=12)

Barplot of Mock composition

library(ggplot2)
ggplot(physeq_phylum_mock, aes(x = Sample, y = Abundance, fill = Genus)) + 
  geom_bar(stat = "identity") + 
  ylab("Relative Abundance (Class > 2%)") +
  scale_y_continuous(expand = c(0,0), labels = scales::percent_format(scale = 100)) #remove the space below the 0 of the y axis in the graph

Decontamination process

Remove MOCK from the phyloseq object

physeq_Nem_transect <- subset_samples(physeq_Nem_transect, !Biosample_type =="Mock")

dim(physeq_Nem_transect@otu_table)
## [1]    410 111454

Quick checking library size of each Biomsaple types

df <- as.data.frame(sample_data(physeq_Nem_transect))  # Put sample_data into a ggplot-friendly data.frame
df$LibrarySize <- sample_sums(physeq_Nem_transect)
df <- df[order(df$LibrarySize),]
df$Index <- seq(nrow(df))
ggplot(data=df, aes(x=Index, y=LibrarySize, color=as.factor(Biosample))) + 
  geom_point() + geom_text(aes(label=Index), hjust=2, nudge_y = 0.05)

Identification of contaminants - prevalence

sample_data(physeq_Nem_transect)$is.neg <- sample_data(physeq_Nem_transect)$Biosample == "Control"
library(decontam)
contam_df_prev05 <- isContaminant(physeq_Nem_transect, method="prevalence", neg="is.neg", threshold=0.5)
table(contam_df_prev05$contaminant)
## 
##  FALSE   TRUE 
## 111115    339

List of contaminant

which(contam_df_prev05$contaminant)
##   [1]     2     4     8    10    21    24    29    34    35    36    37    48
##  [13]    49    52    54    61    70    72    73    87    93    97   102   107
##  [25]   121   128   130   138   140   142   143   149   168   171   172   176
##  [37]   183   191   195   204   228   229   248   272   282   284   290   293
##  [49]   301   306   309   311   318   334   350   353   355   356   376   380
##  [61]   381   382   383   391   401   405   406   412   430   436   442   445
##  [73]   456   468   471   481   490   509   520   521   522   558   570   577
##  [85]   581   591   621   642   671   674   704   706   708   712   722   724
##  [97]   726   730   743   744   747   761   762   776   795   798   799   802
## [109]   816   820   824   830   844   862   871   904   909   914   915   949
## [121]   965   975   991   996  1000  1017  1034  1056  1101  1104  1105  1116
## [133]  1126  1127  1146  1151  1190  1198  1215  1217  1224  1280  1281  1284
## [145]  1295  1301  1302  1332  1347  1348  1349  1367  1376  1391  1405  1462
## [157]  1464  1472  1534  1598  1607  1613  1640  1642  1666  1699  1711  1752
## [169]  1758  1776  1798  1816  1850  1859  1884  1915  1918  1919  1930  1952
## [181]  1990  1991  2069  2129  2137  2144  2178  2233  2274  2289  2321  2355
## [193]  2360  2366  2390  2397  2404  2419  2427  2515  2530  2583  2595  2596
## [205]  2601  2641  2647  2694  2713  2763  2892  2924  2983  2995  3007  3041
## [217]  3052  3079  3178  3205  3213  3215  3228  3302  3414  3415  3485  3517
## [229]  3520  3564  3580  3598  3676  3747  3831  3855  3869  3871  3902  3922
## [241]  3967  3970  4028  4125  4134  4144  4150  4442  4453  4548  4626  4657
## [253]  4658  4710  4859  4905  4934  5066  5155  5163  5223  5244  5327  5333
## [265]  5384  5403  5538  5574  5621  5650  5767  5932  5981  6077  6103  6177
## [277]  6265  6276  6296  6337  6962  7029  7101  7144  7382  7548  7592  7645
## [289]  7647  7652  7808  7848  8222  8329  8389  8666  8947  8991  9080  9305
## [301]  9471  9643  9739  9969 10144 10273 10372 10384 10513 10599 10706 10956
## [313] 11190 11359 11902 12278 12435 12588 13964 14121 14642 15508 16642 17814
## [325] 19218 20331 21825 22059 24068 24298 24625 24837 26448 27917 29044 30253
## [337] 36085 46475 64190
row_indices <- which(contam_df_prev05$contaminant) 
tax <- data.frame(physeq_Nem_transect@tax_table)
Contam_taxonomy_table <- tibble()

for (i in row_indices){
  loc <-  contam_df_prev05[i, 0]
  tax_key <- row.names(loc)
  tax_value <- tax[tax_key, ]
  Contam_taxonomy_table <- rbind(Contam_taxonomy_table, tax_value)
}

head(Contam_taxonomy_table)
##        Kingdom         Phylum               Class
## ASV2  Bacteria Proteobacteria Alphaproteobacteria
## ASV4  Bacteria   Bacteroidota         Bacteroidia
## ASV8  Bacteria     Firmicutes             Bacilli
## ASV10 Bacteria     Firmicutes             Bacilli
## ASV21 Bacteria     Firmicutes          Clostridia
## ASV24 Bacteria     Firmicutes          Clostridia
##                                     Order                Family
## ASV2                          Rhizobiales                  <NA>
## ASV4                        Bacteroidales        Prevotellaceae
## ASV8                      Lactobacillales          Listeriaceae
## ASV10                    Staphylococcales     Staphylococcaceae
## ASV21                       Clostridiales        Clostridiaceae
## ASV24 Peptostreptococcales-Tissierellales Peptostreptococcaceae
##                             Genus
## ASV2                         <NA>
## ASV4                 Prevotella_9
## ASV8                     Listeria
## ASV10              Staphylococcus
## ASV21 Clostridium sensu stricto 4
## ASV24             Paraclostridium

Remaking phyloseq object of presence-absence in negative controls and true samples

ps.pa <- transform_sample_counts(physeq_Nem_transect, function(abund) 1*(abund>0))
ps.pa.negative <- prune_samples(sample_data(ps.pa)$Biosample_type == "Negative", ps.pa)
ps.pa.positive <- prune_samples(sample_data(ps.pa)$Biosample_type != "Negative", ps.pa)

Make a data.frame of prevalence in positive and negative samples

df.pa <- data.frame(pa.pos=taxa_sums(ps.pa.positive), pa.neg=taxa_sums(ps.pa.negative),
                    contaminant=contam_df_prev05$contaminant)
ggplot(data=df.pa, aes(x=pa.neg, y=pa.pos, color=contaminant)) + 
  geom_point() +
  xlab("Prevalence (Negative Controls)") + 
  ylab("Prevalence (True Samples)")

Known kit Contaminant detection

We use the Known kit contaminant that contains several genera of bacteria

contaminant_genera <- read.table("/Users/qcvp/R/Contaminant.txt", header = T)
contam_vec <- which (Contam_taxonomy_table[,6] %in% contaminant_genera$contaminant) 
#to be removed: Cupriavidus, Bradyrhizobium, Pelomonas)
contam_ASV <- Contam_taxonomy_table[contam_vec,]
contam_ASV_vec = row.names(contam_ASV)

badTaxa = contam_ASV_vec
allTaxa = taxa_names(physeq_Nem_transect)
myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
physeq_Nem_transect = prune_taxa(myTaxa, physeq_Nem_transect)
rowSums(physeq_Nem_transect@otu_table@.Data)
##        BD.S.1.M        BD.S.2.M        BD.S.3.M        BD.S.4.B        BD.S.5.B 
##           17384           20729           20081           24016           24748 
##        BD.S.6.B        BD.W.1.M        BD.W.2.M        BD.W.3.M        BD.W.4.B 
##           32220           33322           45583           40746           42418 
##        BD.W.5.B        BD.W.6.B           Bds1b           Bds2b           Bds3b 
##           39323           49036           39896           49676           37365 
##           Bdw1b           Bdw2b           Bdw3b    BioFilm.21.1    BioFilm.21.2 
##           59577           52525           55784           42785           44011 
##    BioFilm.21.3        CR.S.1.M        CR.S.2.M        CR.S.3.M        CR.S.4.B 
##           43184           24922           21449           22532           23224 
##        CR.S.5.B        CR.S.6.B        CR.W.1.M        CR.W.2.M        CR.W.3.M 
##           15892           24986           34954           33516           34999 
##        CR.W.4.B        CR.W.5.B        CR.W.6.B           F1s1b           F1s2b 
##           37623           42641           42044           46340           37233 
##           F1s3b           F1w1b           F1w2b           F1w3b           F2s1b 
##           39302           53450           52458           46446           69226 
##           F2s2b           F2s3b           F2w1b           F2w2b           F2w3b 
##           24993           46974           78105           51493           54343 
##           F3s1b           F3s2b           F3s3b           F3w1b           F3w2b 
##           63068           26605           18960           52664           57871 
##           F3w3b           F4s1b           F4s2b           F4s3b           F4w1b 
##           59711           45102           39027           38422           49239 
##           F4w2b           F4w3b      Fi.10.3.GM      Fi.10.4.GB      Fi.10.5.GB 
##           54946           49988           38667           45102           62324 
##      Fi.10.6.GB     Fi.12.10.GB     Fi.12.11.GB     Fi.12.13.GB      Fi.12.4.GB 
##           44576           48504           47979           47759           41067 
##      Fi.12.5.GB      Fi.12.6.GB      Fi.12.7.GB      Fi.12.8.GB      Fi.12.9.GB 
##           34256           46838           37780           27074           43780 
##      Fi.13.1.GM      Fi.13.2.GB      Fi.13.3.GB      Fi.14.1.GM      Fi.14.2.GM 
##           34618           47340           63412              48           58342 
##      Fi.14.5.GB      Fi.14.6.GB      Fi.17.8.GM      Fi.18.1.GB      Fi.18.2.GB 
##           30505           38585            4190           57713           76741 
##      Fi.18.3.GB      Fi.19.1.GB      Fi.20.1.GB      Fi.20.2.GB       Fi.3.6.GB 
##           65929           52368           53187           39624           55979 
##       Fi.3.7.GB       Fi.4.1.GB       Fi.4.3.GM       Fi.4.4.GB       Fi.5.3.GB 
##           54735              24               0           12667           57701 
##       Fi.5.4.GB       Fi.5.5.GB       Fi.6.4.GB       Fi.6.5.GB       Fi.6.6.GM 
##           36549           55675           68344           15672           33512 
##       Fi.6.7.GB       Fi.6.8.GB       Fi.6.9.GB        Fi10.1gb        Fi10.2gb 
##           49797           46385           51144           32550           31806 
##        Fi11.1gb        Fi11.2gb        Fi12.1gb        Fi12.2gb        Fi12.3gb 
##           55545           63243           24026           33020            6767 
##         Fi3.1gb         Fi3.2gb         Fi3.3gb         Fi3.4gb         Fi3.5gb 
##           38892           48920           74022           71343           19574 
##         Fi5.1gb         Fi5.2gb         Fi6.1gb         Fi6.2gb         Fi6.3gb 
##           16770           28877           67338           80000           66382 
##         Fi7.1gb         Fi7.2gb         Fi7.3gb         Fi7.4gb         Fi7.5gb 
##           53019           62106           80805           40597           11573 
##      Fo.10.3.BM      Fo.10.4.BM      Fo.15.1.BM      Fo.15.2.BM      Fo.15.3.BM 
##             494             129            4237            6953            1203 
##      Fo.17.1.BM      Fo.17.2.BM      Fo.17.3.BM      Fo.17.5.BM       Fo.4.1.BM 
##            1300            1188             802           46665           43909 
##       Fo.4.2.BM       Fo.4.3.BM       Fo.4.4.BM       Fo.4.5.BM        HM.S.1.M 
##           44995           47057           50131           46596           17413 
##        HM.S.2.M        HM.S.3.M        HM.S.4.B        HM.S.5.B        HM.S.6.B 
##           27386           30272           26399           21458           28357 
##        HM.W.1.M        HM.W.2.M        HM.W.3.M        HM.W.4.B        HM.W.5.B 
##           47665           41343           36076           41295           39602 
##        HM.W.6.B        Hu.18.GM        Hu.18.SM        Hu.19.GM        Hu.19.SM 
##           45497           41400           39107           37295           50870 
##        Hu.20.GM        Hu.20.SM        Hu.21.GM        Hu.21.SM        Hu.22.GM 
##           38696           48402           45042           60192           43200 
##        Hu.22.SM        Hu.23.GM        Hu.23.SM        Hu.25.GM        Hu.25.SM 
##           34622           37508           40087           42442           41232 
##        Hu.26.GM        Hu.26.SM        Hu.27.GM        Hu.27.SM        Hu.28.GM 
##           52161           56539           39403           44690           40056 
##        Hu.28.SM        Hu.29.GM        Hu.29.SM        Hu.30.GM        Hu.30.SM 
##           71235           54112           48482           48293           44349 
##        Hu.31.GM        Hu.31.SM        Hu.32.GM        Hu.32.SM        Hu.33.GM 
##           50562           16459           43063             132           60630 
##        Hu.33.SM        Hu.34.GM        Hu.34.SM        Hu.35.GM        Hu.35.SM 
##              67           33303              65           46535               6 
##        Hu.36.GM        Hu.36.SM        Hu.37.GM        Hu.37.SM        Hu.38.GM 
##           32848           48320           42566           10729           42762 
##        Hu.38.SM        Hu.39.GM        Hu.39.SM        Hu.40.SM        Hu.41.GM 
##           43129           79558           19260              32           33216 
##        Hu.41.SM        Hu.43.GM        Hu.43.SM        Hu.44.GM        Hu.44.SM 
##           46938           36052           44913           31070           46611 
##        Hu.45.GB        Hu.45.SB        Hu.46.GB        Hu.46.SB        Hu.47.GB 
##           52692           59655           54423           63010           45657 
##        Hu.47.SB        Hu.48.GB        Hu.48.SB        Hu.49.GB        Hu.49.SB 
##           53277           41631           42073           40763           44233 
##        Hu.50.GB        Hu.50.SB        Hu.51.GB        Hu.51.SB        Hu.52.GB 
##           46471           49526           41804           51318           50735 
##        Hu.52.SB        Hu.53.GB        Hu.53.SB        Hu.54.GB        Hu.54.SB 
##           48581           54047           41886           42380           46406 
##        Hu.55.GB        Hu.55.SB        Hu.56.GB        Hu.56.SB        Hu.57.GB 
##           48470           49217           42845           46861           46429 
##        Hu.57.SB        Hu.58.GB        Hu.58.SB        Hu.59.GB        Hu.59.SB 
##           50859           45358           53071           37581           49169 
##        Hu.60.GB        Hu.60.SB        Hu.61.GB        Hu.61.SB        Hu.63.GB 
##           32066           54238           40039           31009           37881 
##        Hu.63.SB        Hu.64.GB        Hu.64.SB        Hu.66.GB        Hu.66.SB 
##           51367           43374           60904           44264           63190 
##        Hu.67.GB        Hu.67.SB        Hu.68.GB        Hu.68.SB        Hu.69.GB 
##           45774           54810           46395           54079           49964 
##        Hu.69.SB        Hu.70.GB        Hu.70.SB        Hu.71.GB        Hu.71.SB 
##           44318           37418           42300           40912           52890 
##   Hu10.1.biof.6        Hu10.1gb        Hu10.1sb   Hu10.2.biof.6   Hu10.3.biof.6 
##           39421           54265           69275           44557           54545 
##   Hu11.1.biof.7        Hu11.1gb        Hu11.1sb   Hu11.2.biof.7   Hu11.3.biof.7 
##           67304           41851           46258           52496           60087 
##   Hu12.1.biof.8        Hu12.1gb        Hu12.1sb   Hu12.2.biof.8   Hu12.3.biof.8 
##           64061           36465           44376           40395           42529 
##   Hu13.1.biof.9        Hu13.1gb        Hu13.1sb   Hu13.2.biof.9   Hu13.3.biof.9 
##           67256           49820           63817           37187           34213 
##        Hu14.1gb        Hu14.1sb  Hu15.1.biof.11  Hu15.2.biof.11  Hu15.3.biof.11 
##           48048           50917           53169           40287           43736 
##        Hu15.3gb        Hu15.3sb  Hu16.1.biof.12  Hu16.2.biof.12  Hu16.3.biof.12 
##           58690           52095           70632           22943           41331 
##        Hu16.3gb        Hu16.3sb        Hu17.3gb        Hu17.3sb         Hu3.1gb 
##           28483           60154           40364           52663           48364 
##         Hu3.1sb         Hu4.1gb         Hu4.1sb    Hu5.1.biof.3         Hu5.1gb 
##           67419           40637           71386           71724           38520 
##         Hu5.1sb    Hu5.2.biof.3    Hu5.3.biof.3         Hu6.1gb         Hu6.1sb 
##           57928           35086           40797           44588           58763 
##    Hu7.1.biof.4         Hu7.1gb         Hu7.1sb    Hu7.2.biof.4    Hu7.3.biof.4 
##           61112           39896           51487           35515           33193 
##    Hu8.1.biof.5         Hu8.1gb         Hu8.1sb    Hu8.2.biof.5    Hu8.3.biof.5 
##           53698           41631           72114           55398           62447 
##         Hu9.1gb         Hu9.1sb       Lo.2.4.GB       Lo.2.5.GB       Lo.2.6.GB 
##           51214           53181           69876           60560           73771 
##       Lo.2.7.GB       Lo.2.8.GB       Lo.2.9.GB       Lo.5.1.GM       Lo.5.3.GB 
##           69345           69516           40094           55895           61487 
##       Lo.5.4.GB       Lo.5.5.GB       Lo.9.1.GB       Lo.9.2.GB         Lo2.1gb 
##           64312           67930           42361           59388           72620 
##         Lo2.2gb         Lo2.3gb         Lo4.1gb         Lo4.2gb         Lo8.1gb 
##           53676           54559           59000           78560           49743 
##         Lo8.2gb         Lo8.3gb         Lo8.4gb         Lo8.5gb  Nega.control.3 
##           33285           51387           63526           36797           78845 
## Nega.control.3b  Nega.control.4        NP.S.1.M        NP.S.2.M        NP.S.3.M 
##           23451           37487           24387           20484           23028 
##        NP.S.4.B        NP.S.5.B        NP.S.6.B        NP.W.1.M        NP.W.2.M 
##           20495           26227           22777           38950           25978 
##        NP.W.3.M        NP.W.4.B        NP.W.5.B        NP.W.6.B      Oy.13.2.GB 
##           28335           36448           34863           32156            1003 
##      Oy.13.3.GM      Oy.13.7.GB      Oy.15.4.GM      Oy.15.5.GM      Oy.15.6.GB 
##              32           27640           33788           16113           20909 
##      Oy.15.7.GB      Oy.15.8.GB    PCR.caouanem    PCR.p1.dec23    PCR.p1.jul23 
##            4651           46010              49           49092           43541 
##    PCR.p2.dec23           R1s1b           R1s2b           R1s3b           R1w1b 
##           26122           28579           31247           31796           44413 
##           R1w2b           R1w3b           R2s1b           R2s2b           R2s3b 
##           46360           41145           32331           34914           24316 
##           R2w1b           R2w2b           R2w3b           R3s1b           R3s2b 
##           53480           49822           58653           61408           17070 
##           R3s3b           R3w1b           R3w2b           R3w3b           R4s1b 
##           20450           53804           46279           54693           20098 
##           R4s2b           R4s3b           R4w1b           R4w2b           R4w3b 
##           22962           27924           50250           45740           74869 
##          Rivs1b          Rivs2b          Rivs3b          Rivw1b          Rivw2b 
##           42887           63430           51925           52235           42333 
##          Rivw3b      Sq.11.1.GM      Sq.11.2.GM      Sq.11.3.GB      Sq.11.4.GB 
##           48254           55471           44175              20           72909 
##      Sq.11.5.GB           U1s1b           U1s2b           U1s3b           U1w1b 
##           61280           40996           43815           29806           40347 
##           U1w2b           U1w3b           U2s1b           U2s2b           U2s3b 
##           39343           40884           25676           41921           31856 
##           U2w1b           U2w2b           U2w3b           U3s1b           U3s2b 
##           35068           77228           30097           24109           21444 
##           U3s3b           U3w1b           U3w2b           U3w3b           U4s1b 
##           31839           45735           43213           44819           30082 
##           U4s2b           U4s3b           U4w1b           U4w2b           U4w3b 
##           29571           59626           68842           47283           42721

We also want to remove ASVs accounting for the 2 genera “Cupriavidus” and “Bradyrhizobium” that are know to be absent in marine environments and Listeria from MOCK

physeq_Nem_transect <- subset_taxa(physeq_Nem_transect, !Genus %in% c("Cupriavidus", "Bradyrhizobium", "Listeria"))
physeq_Nem_transect
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 111357 taxa and 410 samples ]
## sample_data() Sample Data:       [ 410 samples by 13 sample variables ]
## tax_table()   Taxonomy Table:    [ 111357 taxa by 6 taxonomic ranks ]
## refseq()      DNAStringSet:      [ 111357 reference sequences ]

Remove MOCK and control samples

physeq_Nem_transect <- subset_samples(physeq_Nem_transect, Biosample != "Control") 

dim(physeq_Nem_transect@otu_table)
## [1]    403 111357

Checking data

#Tax 
whole_bacteria_ASV_TAX <- data.frame(physeq_Nem_transect@tax_table)
ID <- rownames(whole_bacteria_ASV_TAX)
whole_bacteria_ASV_TAX <- cbind(ID=ID, whole_bacteria_ASV_TAX)

#Dataset with Relative abundance
physeq_Nem_transect_rel <- transform_sample_counts(physeq_Nem_transect, function(x) x / sum(x) )
rowSums(physeq_Nem_transect_rel@otu_table@.Data)
##       BD.S.1.M       BD.S.2.M       BD.S.3.M       BD.S.4.B       BD.S.5.B 
##              1              1              1              1              1 
##       BD.S.6.B       BD.W.1.M       BD.W.2.M       BD.W.3.M       BD.W.4.B 
##              1              1              1              1              1 
##       BD.W.5.B       BD.W.6.B          Bds1b          Bds2b          Bds3b 
##              1              1              1              1              1 
##          Bdw1b          Bdw2b          Bdw3b   BioFilm.21.1   BioFilm.21.2 
##              1              1              1              1              1 
##   BioFilm.21.3       CR.S.1.M       CR.S.2.M       CR.S.3.M       CR.S.4.B 
##              1              1              1              1              1 
##       CR.S.5.B       CR.S.6.B       CR.W.1.M       CR.W.2.M       CR.W.3.M 
##              1              1              1              1              1 
##       CR.W.4.B       CR.W.5.B       CR.W.6.B          F1s1b          F1s2b 
##              1              1              1              1              1 
##          F1s3b          F1w1b          F1w2b          F1w3b          F2s1b 
##              1              1              1              1              1 
##          F2s2b          F2s3b          F2w1b          F2w2b          F2w3b 
##              1              1              1              1              1 
##          F3s1b          F3s2b          F3s3b          F3w1b          F3w2b 
##              1              1              1              1              1 
##          F3w3b          F4s1b          F4s2b          F4s3b          F4w1b 
##              1              1              1              1              1 
##          F4w2b          F4w3b     Fi.10.3.GM     Fi.10.4.GB     Fi.10.5.GB 
##              1              1              1              1              1 
##     Fi.10.6.GB    Fi.12.10.GB    Fi.12.11.GB    Fi.12.13.GB     Fi.12.4.GB 
##              1              1              1              1              1 
##     Fi.12.5.GB     Fi.12.6.GB     Fi.12.7.GB     Fi.12.8.GB     Fi.12.9.GB 
##              1              1              1              1              1 
##     Fi.13.1.GM     Fi.13.2.GB     Fi.13.3.GB     Fi.14.1.GM     Fi.14.2.GM 
##              1              1              1              1              1 
##     Fi.14.5.GB     Fi.14.6.GB     Fi.17.8.GM     Fi.18.1.GB     Fi.18.2.GB 
##              1              1              1              1              1 
##     Fi.18.3.GB     Fi.19.1.GB     Fi.20.1.GB     Fi.20.2.GB      Fi.3.6.GB 
##              1              1              1              1              1 
##      Fi.3.7.GB      Fi.4.1.GB      Fi.4.3.GM      Fi.4.4.GB      Fi.5.3.GB 
##              1              1            NaN              1              1 
##      Fi.5.4.GB      Fi.5.5.GB      Fi.6.4.GB      Fi.6.5.GB      Fi.6.6.GM 
##              1              1              1              1              1 
##      Fi.6.7.GB      Fi.6.8.GB      Fi.6.9.GB       Fi10.1gb       Fi10.2gb 
##              1              1              1              1              1 
##       Fi11.1gb       Fi11.2gb       Fi12.1gb       Fi12.2gb       Fi12.3gb 
##              1              1              1              1              1 
##        Fi3.1gb        Fi3.2gb        Fi3.3gb        Fi3.4gb        Fi3.5gb 
##              1              1              1              1              1 
##        Fi5.1gb        Fi5.2gb        Fi6.1gb        Fi6.2gb        Fi6.3gb 
##              1              1              1              1              1 
##        Fi7.1gb        Fi7.2gb        Fi7.3gb        Fi7.4gb        Fi7.5gb 
##              1              1              1              1              1 
##     Fo.10.3.BM     Fo.10.4.BM     Fo.15.1.BM     Fo.15.2.BM     Fo.15.3.BM 
##              1              1              1              1              1 
##     Fo.17.1.BM     Fo.17.2.BM     Fo.17.3.BM     Fo.17.5.BM      Fo.4.1.BM 
##              1              1              1              1              1 
##      Fo.4.2.BM      Fo.4.3.BM      Fo.4.4.BM      Fo.4.5.BM       HM.S.1.M 
##              1              1              1              1              1 
##       HM.S.2.M       HM.S.3.M       HM.S.4.B       HM.S.5.B       HM.S.6.B 
##              1              1              1              1              1 
##       HM.W.1.M       HM.W.2.M       HM.W.3.M       HM.W.4.B       HM.W.5.B 
##              1              1              1              1              1 
##       HM.W.6.B       Hu.18.GM       Hu.18.SM       Hu.19.GM       Hu.19.SM 
##              1              1              1              1              1 
##       Hu.20.GM       Hu.20.SM       Hu.21.GM       Hu.21.SM       Hu.22.GM 
##              1              1              1              1              1 
##       Hu.22.SM       Hu.23.GM       Hu.23.SM       Hu.25.GM       Hu.25.SM 
##              1              1              1              1              1 
##       Hu.26.GM       Hu.26.SM       Hu.27.GM       Hu.27.SM       Hu.28.GM 
##              1              1              1              1              1 
##       Hu.28.SM       Hu.29.GM       Hu.29.SM       Hu.30.GM       Hu.30.SM 
##              1              1              1              1              1 
##       Hu.31.GM       Hu.31.SM       Hu.32.GM       Hu.32.SM       Hu.33.GM 
##              1              1              1              1              1 
##       Hu.33.SM       Hu.34.GM       Hu.34.SM       Hu.35.GM       Hu.35.SM 
##              1              1              1              1              1 
##       Hu.36.GM       Hu.36.SM       Hu.37.GM       Hu.37.SM       Hu.38.GM 
##              1              1              1              1              1 
##       Hu.38.SM       Hu.39.GM       Hu.39.SM       Hu.40.SM       Hu.41.GM 
##              1              1              1              1              1 
##       Hu.41.SM       Hu.43.GM       Hu.43.SM       Hu.44.GM       Hu.44.SM 
##              1              1              1              1              1 
##       Hu.45.GB       Hu.45.SB       Hu.46.GB       Hu.46.SB       Hu.47.GB 
##              1              1              1              1              1 
##       Hu.47.SB       Hu.48.GB       Hu.48.SB       Hu.49.GB       Hu.49.SB 
##              1              1              1              1              1 
##       Hu.50.GB       Hu.50.SB       Hu.51.GB       Hu.51.SB       Hu.52.GB 
##              1              1              1              1              1 
##       Hu.52.SB       Hu.53.GB       Hu.53.SB       Hu.54.GB       Hu.54.SB 
##              1              1              1              1              1 
##       Hu.55.GB       Hu.55.SB       Hu.56.GB       Hu.56.SB       Hu.57.GB 
##              1              1              1              1              1 
##       Hu.57.SB       Hu.58.GB       Hu.58.SB       Hu.59.GB       Hu.59.SB 
##              1              1              1              1              1 
##       Hu.60.GB       Hu.60.SB       Hu.61.GB       Hu.61.SB       Hu.63.GB 
##              1              1              1              1              1 
##       Hu.63.SB       Hu.64.GB       Hu.64.SB       Hu.66.GB       Hu.66.SB 
##              1              1              1              1              1 
##       Hu.67.GB       Hu.67.SB       Hu.68.GB       Hu.68.SB       Hu.69.GB 
##              1              1              1              1              1 
##       Hu.69.SB       Hu.70.GB       Hu.70.SB       Hu.71.GB       Hu.71.SB 
##              1              1              1              1              1 
##  Hu10.1.biof.6       Hu10.1gb       Hu10.1sb  Hu10.2.biof.6  Hu10.3.biof.6 
##              1              1              1              1              1 
##  Hu11.1.biof.7       Hu11.1gb       Hu11.1sb  Hu11.2.biof.7  Hu11.3.biof.7 
##              1              1              1              1              1 
##  Hu12.1.biof.8       Hu12.1gb       Hu12.1sb  Hu12.2.biof.8  Hu12.3.biof.8 
##              1              1              1              1              1 
##  Hu13.1.biof.9       Hu13.1gb       Hu13.1sb  Hu13.2.biof.9  Hu13.3.biof.9 
##              1              1              1              1              1 
##       Hu14.1gb       Hu14.1sb Hu15.1.biof.11 Hu15.2.biof.11 Hu15.3.biof.11 
##              1              1              1              1              1 
##       Hu15.3gb       Hu15.3sb Hu16.1.biof.12 Hu16.2.biof.12 Hu16.3.biof.12 
##              1              1              1              1              1 
##       Hu16.3gb       Hu16.3sb       Hu17.3gb       Hu17.3sb        Hu3.1gb 
##              1              1              1              1              1 
##        Hu3.1sb        Hu4.1gb        Hu4.1sb   Hu5.1.biof.3        Hu5.1gb 
##              1              1              1              1              1 
##        Hu5.1sb   Hu5.2.biof.3   Hu5.3.biof.3        Hu6.1gb        Hu6.1sb 
##              1              1              1              1              1 
##   Hu7.1.biof.4        Hu7.1gb        Hu7.1sb   Hu7.2.biof.4   Hu7.3.biof.4 
##              1              1              1              1              1 
##   Hu8.1.biof.5        Hu8.1gb        Hu8.1sb   Hu8.2.biof.5   Hu8.3.biof.5 
##              1              1              1              1              1 
##        Hu9.1gb        Hu9.1sb      Lo.2.4.GB      Lo.2.5.GB      Lo.2.6.GB 
##              1              1              1              1              1 
##      Lo.2.7.GB      Lo.2.8.GB      Lo.2.9.GB      Lo.5.1.GM      Lo.5.3.GB 
##              1              1              1              1              1 
##      Lo.5.4.GB      Lo.5.5.GB      Lo.9.1.GB      Lo.9.2.GB        Lo2.1gb 
##              1              1              1              1              1 
##        Lo2.2gb        Lo2.3gb        Lo4.1gb        Lo4.2gb        Lo8.1gb 
##              1              1              1              1              1 
##        Lo8.2gb        Lo8.3gb        Lo8.4gb        Lo8.5gb       NP.S.1.M 
##              1              1              1              1              1 
##       NP.S.2.M       NP.S.3.M       NP.S.4.B       NP.S.5.B       NP.S.6.B 
##              1              1              1              1              1 
##       NP.W.1.M       NP.W.2.M       NP.W.3.M       NP.W.4.B       NP.W.5.B 
##              1              1              1              1              1 
##       NP.W.6.B     Oy.13.2.GB     Oy.13.3.GM     Oy.13.7.GB     Oy.15.4.GM 
##              1              1              1              1              1 
##     Oy.15.5.GM     Oy.15.6.GB     Oy.15.7.GB     Oy.15.8.GB          R1s1b 
##              1              1              1              1              1 
##          R1s2b          R1s3b          R1w1b          R1w2b          R1w3b 
##              1              1              1              1              1 
##          R2s1b          R2s2b          R2s3b          R2w1b          R2w2b 
##              1              1              1              1              1 
##          R2w3b          R3s1b          R3s2b          R3s3b          R3w1b 
##              1              1              1              1              1 
##          R3w2b          R3w3b          R4s1b          R4s2b          R4s3b 
##              1              1              1              1              1 
##          R4w1b          R4w2b          R4w3b         Rivs1b         Rivs2b 
##              1              1              1              1              1 
##         Rivs3b         Rivw1b         Rivw2b         Rivw3b     Sq.11.1.GM 
##              1              1              1              1              1 
##     Sq.11.2.GM     Sq.11.3.GB     Sq.11.4.GB     Sq.11.5.GB          U1s1b 
##              1              1              1              1              1 
##          U1s2b          U1s3b          U1w1b          U1w2b          U1w3b 
##              1              1              1              1              1 
##          U2s1b          U2s2b          U2s3b          U2w1b          U2w2b 
##              1              1              1              1              1 
##          U2w3b          U3s1b          U3s2b          U3s3b          U3w1b 
##              1              1              1              1              1 
##          U3w2b          U3w3b          U4s1b          U4s2b          U4s3b 
##              1              1              1              1              1 
##          U4w1b          U4w2b          U4w3b 
##              1              1              1
max(rowSums(physeq_Nem_transect@otu_table))
## [1] 80805
min(rowSums(physeq_Nem_transect@otu_table))
## [1] 0
mean(rowSums(physeq_Nem_transect@otu_table))
## [1] 42461.69