Metabarcoding: bacterial dynamics in the GoA 2020-22

~~~

phyloseq analysis of 16S

Samples were sequenced and prepared beforehand with Qiime2. This project begins after obtaining 3 files for phyloseq analysis:

metadata
OTU table
taxonomy

###sample data
meta16s=read.csv("metadata_16s.csv",sep=",",row.names=1) ### metadata

tax16s=read.csv("taxonomy_16s.csv",sep=",",row.names=1) ### taxonomy

asv16s=read.csv("otu_table_16s.csv",sep=",",row.names=1) ### OTU table (named asv due to code syntax)

### preparing to phyloseq format
tax16s=as.matrix(tax16s)

asv16s=data.matrix(asv16s)

### Obtaining 3 files to merge
OTU=otu_table(asv16s,taxa_are_rows=TRUE)

TAX=tax_table(tax16s)

META=sample_data(meta16s)

Testing that the tables are in the same length

tax_names<-taxa_names(TAX)
otu_names<-taxa_names(OTU) ### These lists are not shown due to their length (over 8000 values)

sample_names(OTU)

##  [1] "A001_220406_3"     "A002_220419_2"     "A003_220407_2"    
##  [4] "A004_220321_3"     "A005_220405_2"     "A006_220403_3"    
##  [7] "A007_220328_2"     "A008_220511_srf_2" "A009_220404_2"    
## [10] "A010_220407_3"     "A011_220404_3"     "A012_220405_3"    
## [13] "A013_220406_2"     "A014_220321_1"     "A015_220419_3"    
## [16] "A016_220403_2"     "A017_220328_1"     "A018_220511_dcm_2"
## [19] "A019_210331_1"     "A020_210422_2"     "A021_210422_1"    
## [22] "A022_210310_1"     "A023_210310_2"     "A024_210404_1"    
## [25] "A025_210404_3"     "A026_210414_2"     "A027_210414_3"    
## [28] "A029_210331_2"     "A030_210620_srf_1" "A031_210620_srf_2"
## [31] "A032_210620_dcm_1" "A033_210620_dcm_2" "A034_210817_srf_1"
## [34] "A035_210817_srf_2" "A036_210817_dcm_1" "A037_210817_dcm_2"
## [37] "A038_210427_srf_1" "A039_210427_srf_2" "A040_210427_dcm_1"
## [40] "A041_210427_dcm_2" "A042_210512_srf_1" "A043_210512_srf_2"
## [43] "A044_210512_dcm_1" "A045_210512_dcm_2" "A046_211010_3"    
## [46] "A047_210224_1"     "A048_210224_3"     "A049_210322_2"    
## [49] "A050_210322_3"     "A051_210419_2"     "A052_210419_3"    
## [52] "A053_211010_2"     "A054_211019_2"     "A055_211019_3"    
## [55] "A056_201216_1"     "A057_201216_2"     "A058_210107_3"    
## [58] "A059_210127_1"     "A060_210127_2"     "A061_210210_1"    
## [61] "A062_210210_2"     "A063_210315_1"     "A064_210315_2"    
## [64] "A066_201125_1"     "A067_201125_2"     "A068_201230_1"    
## [67] "A069_201230_2"     "A070_210107_1"     "A071_210324_1"    
## [70] "A072_210324_2"     "A073_220308_2"     "A074_220308_3"    
## [73] "A075_211208_1"     "A076_211208_2"     "A077_211222_1"    
## [76] "A078_220104_1"     "A079_220104_3"     "A080_220214_1"    
## [79] "A081_220214_2"     "A082_220411_1"     "A083_220411_2"    
## [82] "A084_211103_1"     "A085_211103_3"     "A086_211116_2"    
## [85] "A087_211116_3"     "A088_211222_2"     "A089_220118_2"    
## [88] "A090_220118_3"     "A091_220207_1"     "A092_220207_2"    
## [91] "A093_220315_2"     "A094_220315_3"     "A095_220511_srf_1"
## [94] "A096_220511_dcm_1"

sample_names(META)

##  [1] "A001_220406_3"     "A002_220419_2"     "A003_220407_2"    
##  [4] "A004_220321_3"     "A005_220405_2"     "A006_220403_3"    
##  [7] "A007_220328_2"     "A008_220511_srf_2" "A009_220404_2"    
## [10] "A010_220407_3"     "A011_220404_3"     "A012_220405_3"    
## [13] "A013_220406_2"     "A014_220321_1"     "A015_220419_3"    
## [16] "A016_220403_2"     "A017_220328_1"     "A018_220511_dcm_2"
## [19] "A019_210331_1"     "A020_210422_2"     "A021_210422_1"    
## [22] "A022_210310_1"     "A023_210310_2"     "A024_210404_1"    
## [25] "A025_210404_3"     "A026_210414_2"     "A027_210414_3"    
## [28] "A029_210331_2"     "A030_210620_srf_1" "A031_210620_srf_2"
## [31] "A032_210620_dcm_1" "A033_210620_dcm_2" "A034_210817_srf_1"
## [34] "A035_210817_srf_2" "A036_210817_dcm_1" "A037_210817_dcm_2"
## [37] "A038_210427_srf_1" "A039_210427_srf_2" "A040_210427_dcm_1"
## [40] "A041_210427_dcm_2" "A042_210512_srf_1" "A043_210512_srf_2"
## [43] "A044_210512_dcm_1" "A045_210512_dcm_2" "A046_211010_3"    
## [46] "A047_210224_1"     "A048_210224_3"     "A049_210322_2"    
## [49] "A050_210322_3"     "A051_210419_2"     "A052_210419_3"    
## [52] "A053_211010_2"     "A054_211019_2"     "A055_211019_3"    
## [55] "A056_201216_1"     "A057_201216_2"     "A058_210107_3"    
## [58] "A059_210127_1"     "A060_210127_2"     "A061_210210_1"    
## [61] "A062_210210_2"     "A063_210315_1"     "A064_210315_2"    
## [64] "A066_201125_1"     "A067_201125_2"     "A068_201230_1"    
## [67] "A069_201230_2"     "A070_210107_1"     "A071_210324_1"    
## [70] "A072_210324_2"     "A073_220308_2"     "A074_220308_3"    
## [73] "A075_211208_1"     "A076_211208_2"     "A077_211222_1"    
## [76] "A078_220104_1"     "A079_220104_3"     "A080_220214_1"    
## [79] "A081_220214_2"     "A082_220411_1"     "A083_220411_2"    
## [82] "A084_211103_1"     "A085_211103_3"     "A086_211116_2"    
## [85] "A087_211116_3"     "A088_211222_2"     "A089_220118_2"    
## [88] "A090_220118_3"     "A091_220207_1"     "A092_220207_2"    
## [91] "A093_220315_2"     "A094_220315_3"     "A095_220511_srf_1"
## [94] "A096_220511_dcm_1"

Merging the tables to a phyloseq file

mon = phyloseq(OTU,TAX,META)

Next, a cleanup from artifacts in the data

# remove bad esvs
mon= subset_taxa(mon, Kingdom != "d__Eukaryota")
mon = subset_taxa(mon, Kingdom != "Unassigned")
mon = subset_taxa(mon, Order != "o__Chloroplast")
mon = subset_taxa(mon, Family!= "f__Mitochondria")

Raw data of OTUs abundance (phylum level)

raw_data<-datatable(otutax)

Relative abundance (phylum level)

This plot contains only surface samples

Top 10 phyla

Relative abundance (family level)

(of phylum cyanobacteria)

Relative abundance (species level)

(of phylum proteobacteria)

Heatmap

Code:

obj <- mon1
otutable <- data.frame(OTU = rownames(phyloseq::otu_table(obj)@.Data),
                       phyloseq::otu_table(obj)@.Data,
                       phyloseq::tax_table(obj)@.Data,
                       check.names = FALSE
)

#Extract metadata from the phyloseq object:
metadata <- data.frame(phyloseq::sample_data(obj), 
                       check.names = FALSE
)

#Load the data with amp_load:
##write.csv (otutable, "asv.csv")
##write.csv (metadata, "meta.csv")


myotutable <- read.csv("asv.csv", check.names = FALSE) #remove first column from the asv table
mymetadata <- read.csv("meta.csv", check.names = FALSE) # add sampleID to first column of metadata CSV

forheatmap <- amp_load(myotutable, mymetadata)

## 484 OTUs with 0 total abundance across all samples have been removed.

Create datasets for bacteria and archaea:

arch <- amp_subset_taxa(forheatmap, tax_vector = "d__Archaea", normalise = TRUE, remove = FALSE)

## 5546 OTUs have been filtered 
## Before: 5768 OTUs
## After: 222 OTUs

bac <- amp_subset_taxa(forheatmap, tax_vector = "d__Bacteria", normalise = TRUE, remove = FALSE)

## 222 OTUs have been filtered 
## Before: 5768 OTUs
## After: 5546 OTUs

Heatmaps are divided by months, each month contain 2 years of sampling, to detect differences in abundance. The 2 heatmaps shows:

Bacteria
Archaea

alpha diversity (rarefied example)

Before testing alpha diversity indices, the data (including surface & dcm) was rarefied

rar <- rarefy_even_depth(mon1, sample.size = min(sample_sums(mon)),
                         rngseed = FALSE, replace = FALSE, trimOTUs = TRUE, verbose = TRUE)

## You set `rngseed` to FALSE. Make sure you've set & recorded
##  the random seed of your session for reproducibility.
## See `?set.seed`

## ...

## 1311OTUs were removed because they are no longer 
## present in any sample after random subsampling

## ...

The 3 indices are:

ACE (richness)
Shannon (diversity)
Simpson (eveness)

Rarefaction curves were plotted as well (color-coded by ACE index):

beta diversity

Comparison based on difference between months

data_grp <- read.csv("metadata_16s_srf.csv", header=TRUE, stringsAsFactors = TRUE)
data_otu_filt_rar = t(data.frame(otu_table(mon1)))

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = data_otu_filt_rar ~ month, data = data_grp, permutations = 9999, method = "bray")
##          Df SumOfSqs      R2      F Pr(>F)    
## month     1   1.5520 0.15318 14.832 0.0001 ***
## Residual 82   8.5803 0.84682                  
## Total    83  10.1323 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

ps_norm_df <- as(sample_data(mon1), "data.frame")
ps.dist <- phyloseq::distance(mon1, method="bray")
pa<-pairwise.adonis(ps.dist,ps_norm_df$month)

##       pairs Df  SumsOfSqs   F.Model         R2    p.value p.adjusted sig
## 1    4 vs 3  1 0.66465277  7.204914 0.15263060 0.00100000      0.045   .
## 2    4 vs 5  1 0.67324322  6.016810 0.18792660 0.00100000      0.045   .
## 3    4 vs 6  1 0.56029318  4.820213 0.16725112 0.00100000      0.045   .
## 4    4 vs 8  1 0.71195300  6.124614 0.20330928 0.00500000      0.225    
## 5   4 vs 10  1 0.96298399  8.922327 0.25549062 0.00100000      0.045   .
## 6    4 vs 2  1 0.53819969  5.304325 0.15024575 0.00100000      0.045   .
## 7   4 vs 12  1 0.80801639  7.891149 0.20825838 0.00100000      0.045   .
## 8    4 vs 1  1 0.68477767  6.674753 0.18199859 0.00100000      0.045   .
## 9   4 vs 11  1 1.03749587  9.684885 0.25699653 0.00100000      0.045   .
## 10   3 vs 5  1 0.89407493 16.963204 0.45892136 0.00100000      0.045   .
## 11   3 vs 6  1 0.60843266 11.717950 0.39430546 0.00300000      0.135    
## 12   3 vs 8  1 0.73350722 14.124508 0.43968014 0.00700000      0.315    
## 13  3 vs 10  1 0.89609410 18.843916 0.48511885 0.00100000      0.045   .
## 14   3 vs 2  1 0.09490400  1.915918 0.07392822 0.09000000      1.000    
## 15  3 vs 12  1 0.62362963 12.300830 0.33885809 0.00100000      0.045   .
## 16   3 vs 1  1 0.30164277  5.921026 0.19788846 0.00100000      0.045   .
## 17  3 vs 11  1 0.94851564 18.234221 0.45320179 0.00100000      0.045   .
## 18   5 vs 6  1 0.16164213  4.202427 0.51233943 0.13333333      1.000    
## 19   5 vs 8  1 0.26751024  6.948002 0.63463653 0.06666667      1.000    
## 20  5 vs 10  1 0.33282783 11.725514 0.66150489 0.02900000      1.000    
## 21   5 vs 2  1 0.72079111 17.663538 0.63851334 0.00400000      0.180    
## 22  5 vs 12  1 0.47590567 10.915372 0.52188277 0.00500000      0.225    
## 23   5 vs 1  1 0.62822789 14.216393 0.58705659 0.00300000      0.135    
## 24  5 vs 11  1 0.35944389  7.907574 0.49709490 0.00200000      0.090    
## 25   6 vs 8  1 0.10111454  5.863602 0.74566364 0.33333333      1.000    
## 26  6 vs 10  1 0.11690390  9.206587 0.69712083 0.06666667      1.000    
## 27   6 vs 2  1 0.54429724 15.090596 0.65353861 0.03500000      1.000    
## 28  6 vs 12  1 0.35946524  9.086650 0.53179821 0.02700000      1.000    
## 29   6 vs 1  1 0.47710766 11.839424 0.59676249 0.02700000      1.000    
## 30  6 vs 11  1 0.17814787  4.378405 0.42187650 0.03800000      1.000    
## 31  8 vs 10  1 0.11174113  8.773915 0.68686186 0.06666667      1.000    
## 32   8 vs 2  1 0.64216794 17.794740 0.68985925 0.02100000      0.945    
## 33  8 vs 12  1 0.37520067  9.479890 0.54233122 0.01900000      0.855    
## 34   8 vs 1  1 0.53734722 13.328026 0.62490668 0.02100000      0.945    
## 35  8 vs 11  1 0.17246322  4.236071 0.41383761 0.02900000      1.000    
## 36  10 vs 2  1 0.70746147 23.195257 0.69875214 0.00200000      0.090    
## 37 10 vs 12  1 0.35569963 10.683877 0.51653164 0.00300000      0.135    
## 38  10 vs 1  1 0.56422678 16.651753 0.62479016 0.00600000      0.270    
## 39 10 vs 11  1 0.09449954  2.901199 0.26613580 0.01700000      0.765    
## 40  2 vs 12  1 0.34544539  8.474381 0.37706850 0.00100000      0.045   .
## 41   2 vs 1  1 0.09345006  2.269004 0.13946792 0.04900000      1.000    
## 42  2 vs 11  1 0.65814379 15.848121 0.56909121 0.00100000      0.045   .
## 43  12 vs 1  1 0.14368415  3.327532 0.19203728 0.00800000      0.360    
## 44 12 vs 11  1 0.20257691  4.619183 0.27794284 0.00300000      0.135    
## 45  1 vs 11  1 0.45433985 10.244899 0.46055049 0.00100000      0.045   .

Differential expression of abundance (Family level) between the years of sampling

physeq <- taxa_level(mon1,"Family")
NB_sig <- differential_abundance(physeq, grouping_column = "year",output_norm="log-relative",pvalue.threshold=0.05,lfc.threshold=0, filename = "1.csv")
p<-plot_signif(NB_sig$plotdata, top.taxa=30)
p <- plot_MA(NB_sig$SignFeaturesTable, label=TRUE)

NMDS

The code for phyloseq ordination

sems.ord <- ordinate(mon1, "NMDS", "bray")

## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.09274357 
## Run 1 stress 0.1017237 
## Run 2 stress 0.1444514 
## Run 3 stress 0.09274357 
## ... New best solution
## ... Procrustes: rmse 0.000004397437  max resid 0.00002287361 
## ... Similar to previous best
## Run 4 stress 0.09274357 
## ... New best solution
## ... Procrustes: rmse 0.00001206128  max resid 0.00008189235 
## ... Similar to previous best
## Run 5 stress 0.1017237 
## Run 6 stress 0.09274357 
## ... Procrustes: rmse 0.00000875416  max resid 0.00005264231 
## ... Similar to previous best
## Run 7 stress 0.133378 
## Run 8 stress 0.09274357 
## ... New best solution
## ... Procrustes: rmse 0.000005337812  max resid 0.0000252863 
## ... Similar to previous best
## Run 9 stress 0.1417975 
## Run 10 stress 0.09582354 
## Run 11 stress 0.1594529 
## Run 12 stress 0.09274357 
## ... Procrustes: rmse 0.000006934365  max resid 0.00005192101 
## ... Similar to previous best
## Run 13 stress 0.1195385 
## Run 14 stress 0.1159907 
## Run 15 stress 0.09582354 
## Run 16 stress 0.1195385 
## Run 17 stress 0.09582354 
## Run 18 stress 0.171139 
## Run 19 stress 0.09582354 
## Run 20 stress 0.09274357 
## ... Procrustes: rmse 0.000003443438  max resid 0.00001955893 
## ... Similar to previous best
## *** Solution reached

## p1 = plot_ordination(mon1, sems.ord, type="samples", color="shannon") <-The code might not work. One solution is to start the session again, and work only with the relevant packages to run this code.

Samples are color-coded by Shannon index

Metabarcoding: bacterial dynamics in the GoA 2020-22

Yoav Avrahami

02/28/2023

~~~

phyloseq analysis of 16S

Raw data of OTUs abundance (phylum level)

Relative abundance (phylum level)

Relative abundance (family level)

Relative abundance (family level)

Relative abundance (species level)

Heatmap

alpha diversity (rarefied example)

beta diversity

NMDS