~~~

 

phyloseq analysis of 18S

 

Samples were sequenced and prepared beforehand with Qiime2. This project begins after obtaining 3 files for phyloseq analysis:

###sample data
meta18s=read.csv("metadata_18s.csv",sep=",",row.names=1) ### metadata

tax18s=read.csv("pr2.csv",sep=",",row.names=1) ### taxonomy

asv18s=read.csv("otu_table_18s.csv",sep=",",row.names=1) ### OTU table (named asv due to code syntax)

### preparing to phyloseq format
tax18s=as.matrix(tax18s)

asv18s=data.matrix(asv18s)

### Obtaining 3 files to merge
OTU=otu_table(asv18s,taxa_are_rows=TRUE)

TAX=tax_table(tax18s)

META=sample_data(meta18s)

 

Testing that the tables are in the same length

 

tax_names<-taxa_names(TAX)
otu_names<-taxa_names(OTU) ### These lists are not shown due to their length (over 8000 values)
sample_names(OTU)
##  [1] "A097_220406_3"     "A098_220419_2"     "A099_220407_2"    
##  [4] "A100_220321_3"     "A101_220405_2"     "A102_220403_3"    
##  [7] "A103_220328_2"     "A104_220511_srf_2" "A105_220404_2"    
## [10] "A106_220407_3"     "A107_220404_3"     "A108_220405_3"    
## [13] "A109_220406_2"     "A110_220321_1"     "A111_220419_3"    
## [16] "A112_220403_2"     "A113_220328_1"     "A114_220511_dcm_2"
## [19] "A115_210331_1"     "A116_210422_2"     "A117_210422_1"    
## [22] "A118_210310_1"     "A119_210310_2"     "A120_210404_1"    
## [25] "A121_210404_3"     "A122_210414_2"     "A123_210414_3"    
## [28] "A125_210331_2"     "A126_210620_srf_1" "A127_210620_srf_2"
## [31] "A128_210620_dcm_1" "A129_210620_dcm_2" "A130_210817_srf_1"
## [34] "A131_210817_srf_2" "A132_210817_dcm_1" "A133_210817_dcm_2"
## [37] "A134_210427_srf_1" "A135_210427_srf_2" "A136_210427_dcm_1"
## [40] "A137_210427_dcm_2" "A138_210512_srf_1" "A139_210512_srf_2"
## [43] "A140_210512_dcm_1" "A141_210512_dcm_2" "A142_211010_3"    
## [46] "A143_210224_1"     "A144_210224_3"     "A145_210322_2"    
## [49] "A146_210322_3"     "A147_210419_2"     "A148_210419_3"    
## [52] "A149_211010_2"     "A150_211019_2"     "A151_211019_3"    
## [55] "A152_201216_1"     "A153_201216_2"     "A154_210107_3"    
## [58] "A155_210127_1"     "A156_210127_2"     "A157_210210_1"    
## [61] "A158_210210_2"     "A159_210315_1"     "A160_210315_2"    
## [64] "A162_201125_1"     "A163_201125_2"     "A164_201230_1"    
## [67] "A165_201230_2"     "A166_210107_1"     "A167_210324_1"    
## [70] "A168_210324_2"     "A169_220308_2"     "A170_220308_3"    
## [73] "A171_211208_1"     "A172_211208_2"     "A173_211222_1"    
## [76] "A174_220104_1"     "A175_220104_3"     "A176_220214_1"    
## [79] "A177_220214_2"     "A178_220411_1"     "A179_220411_2"    
## [82] "A180_211103_1"     "A181_211103_3"     "A182_211116_2"    
## [85] "A183_211116_3"     "A184_211222_2"     "A185_220118_2"    
## [88] "A186_220118_3"     "A187_220207_1"     "A188_220207_2"    
## [91] "A189_220315_2"     "A190_220315_3"     "A191_220511_srf_1"
## [94] "A192_220511_dcm_1"
sample_names(META)
##  [1] "A097_220406_3"     "A098_220419_2"     "A099_220407_2"    
##  [4] "A100_220321_3"     "A101_220405_2"     "A102_220403_3"    
##  [7] "A103_220328_2"     "A104_220511_srf_2" "A105_220404_2"    
## [10] "A106_220407_3"     "A107_220404_3"     "A108_220405_3"    
## [13] "A109_220406_2"     "A110_220321_1"     "A111_220419_3"    
## [16] "A112_220403_2"     "A113_220328_1"     "A114_220511_dcm_2"
## [19] "A115_210331_1"     "A116_210422_2"     "A117_210422_1"    
## [22] "A118_210310_1"     "A119_210310_2"     "A120_210404_1"    
## [25] "A121_210404_3"     "A122_210414_2"     "A123_210414_3"    
## [28] "A125_210331_2"     "A126_210620_srf_1" "A127_210620_srf_2"
## [31] "A128_210620_dcm_1" "A129_210620_dcm_2" "A130_210817_srf_1"
## [34] "A131_210817_srf_2" "A132_210817_dcm_1" "A133_210817_dcm_2"
## [37] "A134_210427_srf_1" "A135_210427_srf_2" "A136_210427_dcm_1"
## [40] "A137_210427_dcm_2" "A138_210512_srf_1" "A139_210512_srf_2"
## [43] "A140_210512_dcm_1" "A141_210512_dcm_2" "A142_211010_3"    
## [46] "A143_210224_1"     "A144_210224_3"     "A145_210322_2"    
## [49] "A146_210322_3"     "A147_210419_2"     "A148_210419_3"    
## [52] "A149_211010_2"     "A150_211019_2"     "A151_211019_3"    
## [55] "A152_201216_1"     "A153_201216_2"     "A154_210107_3"    
## [58] "A155_210127_1"     "A156_210127_2"     "A157_210210_1"    
## [61] "A158_210210_2"     "A159_210315_1"     "A160_210315_2"    
## [64] "A162_201125_1"     "A163_201125_2"     "A164_201230_1"    
## [67] "A165_201230_2"     "A166_210107_1"     "A167_210324_1"    
## [70] "A168_210324_2"     "A169_220308_2"     "A170_220308_3"    
## [73] "A171_211208_1"     "A172_211208_2"     "A173_211222_1"    
## [76] "A174_220104_1"     "A175_220104_3"     "A176_220214_1"    
## [79] "A177_220214_2"     "A178_220411_1"     "A179_220411_2"    
## [82] "A180_211103_1"     "A181_211103_3"     "A182_211116_2"    
## [85] "A183_211116_3"     "A184_211222_2"     "A185_220118_2"    
## [88] "A186_220118_3"     "A187_220207_1"     "A188_220207_2"    
## [91] "A189_220315_2"     "A190_220315_3"     "A191_220511_srf_1"
## [94] "A192_220511_dcm_1"

 

Merging the tables to a phyloseq file

 

mon = phyloseq(OTU,TAX,META)

 

Next, a cleanup from artifacts in the data

 

# remove bad esvs

mon = subset_taxa(mon, Kingdom != "Unassigned")

 

Raw data of OTUs abundance (phylum level)

 

raw_data<-datatable(otutax)

 

 

Relative abundance (phylum level)

 

This plot contains only surface samples

 

Top 10 phyla

 

 

Top phytoplankton phyla (Metazoans excluded) plotted in context of chlorophyll levels at each time point (Bar level= chlorophyll, Bar fill= Relative abundance of top phyto phyla)

 

 

Phylum: Chlorophyta

 

top 10 families

 

 

Main families are Bathycoccaceae and Mamiellaceae. Bathycoccaceae is composed of 2 main genera (Species level is not informative in this case)

 

 

Mamiellaceae is dominated by species of Micromonas:

 

 

Phylum: Ciliophora

 

 

Two Families contain additional information on Genus level: Dictyocystidae and Tintinnidae

 

 

 

Phylum: Cryptophyta

 

 

The family ‘Cryptomonadales_X’ is informative on genus level:

 

 

Phylum: Dinoflagellata

 

 

Most families are not informative in higher taxonomic level, except Ceratiaceae and Gymnodiniaceae.
Within Ceratiaceae, species of the genus Tripos are the most dominant:

 

 

Within Gymnodiniaceae, there are two main genera

 

 

Only genus gyrodinium is informative on the species level

 

 

Phylum: Haptophyta

 

 

The family Prymnesiaceae is informative on genus level

 

 

Note: pr2 database does not show species such as Emiliania huxleyi

Therefore, the data was analysed again with Silva database, for comparison. Silva database contains E.hux identification. Need to assess what database is more appropriate to use.

Phylum: Haptophyta (Silva database)

 

 

The Prymnesiales family is informative on genus level:

 

 

Phylum: Metazoa

 

 

To inspect if there is not a contamination of human DNA in the samples, zooming in on top 10 families within Metazoa:

 

 

Apparently the most dominant family is Maxillopoda (arthropods, crustacea), which is composed of several genera:

 

 

Phylum: Ochrophyta

 

 

Phylum: Opalozoa

 

 

Phylum: Radiolaria

 

 

Phylum: Sagenista

 

 

Relative abundance of diatoms (class level)

 

 

Relative abundance of diatoms (Genus level)

 

top 10 Genus

 

 

Heatmap

 

Code:

 

obj <- mon1
otutable <- data.frame(OTU = rownames(phyloseq::otu_table(obj)@.Data),
                       phyloseq::otu_table(obj)@.Data,
                       phyloseq::tax_table(obj)@.Data,
                       check.names = FALSE
)

#Extract metadata from the phyloseq object:
metadata <- data.frame(phyloseq::sample_data(obj), 
                       check.names = FALSE
)

#Load the data with amp_load:
##write.csv (otutable, "asv.csv")
##write.csv (metadata, "meta.csv")


myotutable <- read.csv("asv.csv", check.names = FALSE) #remove first column from the asv table
mymetadata <- read.csv("meta.csv", check.names = FALSE) # add sampleID to first column of metadata CSV

forheatmap <- amp_load(myotutable, mymetadata)
## 1088 OTUs with 0 total abundance across all samples have been removed.

 

 

 

alpha diversity (rarefied example)

 

Before testing alpha diversity indices, the data (including surface & dcm) was rarefied

 

rar <- rarefy_even_depth(mon1, sample.size = min(sample_sums(mon)),
                         rngseed = FALSE, replace = FALSE, trimOTUs = TRUE, verbose = TRUE)
## You set `rngseed` to FALSE. Make sure you've set & recorded
##  the random seed of your session for reproducibility.
## See `?set.seed`
## ...
## 1062OTUs were removed because they are no longer 
## present in any sample after random subsampling
## ...

 

The 3 indices are:

 

 

Rarefaction curves were plotted as well (color-coded by ACE index):

 

 

beta diversity

 

Comparison based on difference between months

 

data_grp <- read.csv("metadata_18s_srf.csv", header=TRUE, stringsAsFactors = TRUE)
data_otu_filt_rar = t(data.frame(otu_table(mon1)))

 

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = data_otu_filt_rar ~ month, data = data_grp, permutations = 9999, method = "bray")
##          Df SumOfSqs      R2      F Pr(>F)    
## month     1   1.9867 0.09135 8.2437 0.0001 ***
## Residual 82  19.7618 0.90865                  
## Total    83  21.7485 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

 

ps_norm_df <- as(sample_data(mon1), "data.frame")
ps.dist <- phyloseq::distance(mon1, method="bray")
pa<-pairwise.adonis(ps.dist,ps_norm_df$month)

 

##       pairs Df SumsOfSqs  F.Model        R2    p.value p.adjusted sig
## 1    4 vs 3  1 0.9822401 4.689232 0.1049298 0.00100000      0.045   .
## 2    4 vs 5  1 0.8539472 3.684620 0.1241256 0.00100000      0.045   .
## 3    4 vs 6  1 0.7539076 3.266669 0.1198045 0.00100000      0.045   .
## 4    4 vs 8  1 0.8188600 3.557772 0.1291023 0.00500000      0.225    
## 5   4 vs 10  1 1.3199165 5.909946 0.1852070 0.00100000      0.045   .
## 6    4 vs 2  1 1.1548582 5.283730 0.1497497 0.00100000      0.045   .
## 7   4 vs 12  1 1.8854915 8.884302 0.2284804 0.00100000      0.045   .
## 8    4 vs 1  1 1.5809122 7.200763 0.1935649 0.00100000      0.045   .
## 9   4 vs 11  1 1.6108865 7.338148 0.2076551 0.00100000      0.045   .
## 10   3 vs 5  1 1.1662375 6.775789 0.2530566 0.00100000      0.045   .
## 11   3 vs 6  1 0.8257629 5.029145 0.2183818 0.00900000      0.405    
## 12   3 vs 8  1 0.8914104 5.456742 0.2326300 0.00400000      0.180    
## 13  3 vs 10  1 1.2855474 7.976338 0.2851102 0.00100000      0.045   .
## 14   3 vs 2  1 0.5049262 3.049630 0.1127420 0.00200000      0.090    
## 15  3 vs 12  1 1.4871142 9.433439 0.2821558 0.00100000      0.045   .
## 16   3 vs 1  1 1.0729710 6.432928 0.2113805 0.00100000      0.045   .
## 17  3 vs 11  1 1.4625183 9.029838 0.2910050 0.00100000      0.045   .
## 18   5 vs 6  1 0.3455834 2.293985 0.3644726 0.13333333      1.000    
## 19   5 vs 8  1 0.4583437 3.120412 0.4382347 0.13333333      1.000    
## 20  5 vs 10  1 0.5637045 3.885498 0.3930503 0.03200000      1.000    
## 21   5 vs 2  1 0.9538350 5.885166 0.3704819 0.00300000      0.135    
## 22  5 vs 12  1 0.8861233 6.194498 0.3825063 0.00200000      0.090    
## 23   5 vs 1  1 0.9545622 5.784813 0.3664796 0.00300000      0.135    
## 24  5 vs 11  1 0.6999594 4.626671 0.3664205 0.00600000      0.270    
## 25   6 vs 8  1 0.3053544 6.064975 0.7520141 0.33333333      1.000    
## 26  6 vs 10  1 0.2693405 2.808363 0.4124873 0.06666667      1.000    
## 27   6 vs 2  1 0.6735032 4.751780 0.3726366 0.02000000      0.900    
## 28  6 vs 12  1 0.6054643 5.132943 0.3908448 0.03500000      1.000    
## 29   6 vs 1  1 0.6953242 4.781861 0.3741131 0.01900000      0.855    
## 30  6 vs 11  1 0.4080339 3.384042 0.3606167 0.03400000      1.000    
## 31  8 vs 10  1 0.3556867 3.860096 0.4911004 0.06666667      1.000    
## 32   8 vs 2  1 0.7426825 5.310337 0.3989634 0.02300000      1.000    
## 33  8 vs 12  1 0.5607226 4.830669 0.3764940 0.03600000      1.000    
## 34   8 vs 1  1 0.7163020 4.990689 0.3841743 0.03000000      1.000    
## 35  8 vs 11  1 0.4239505 3.590737 0.3743963 0.03400000      1.000    
## 36  10 vs 2  1 0.9389777 6.698467 0.4011426 0.00400000      0.180    
## 37 10 vs 12  1 0.7976708 6.583961 0.3970078 0.00300000      0.135    
## 38  10 vs 1  1 0.9067406 6.335733 0.3878451 0.00400000      0.180    
## 39 10 vs 11  1 0.4080452 3.292882 0.2915891 0.00900000      0.405    
## 40  2 vs 12  1 0.6126791 4.372250 0.2379812 0.00200000      0.090    
## 41   2 vs 1  1 0.3505807 2.249967 0.1384598 0.01200000      0.540    
## 42  2 vs 11  1 0.8132083 5.603152 0.3183039 0.00100000      0.045   .
## 43  12 vs 1  1 0.3256118 2.289379 0.1405443 0.00300000      0.135    
## 44 12 vs 11  1 0.4391741 3.397065 0.2206307 0.00100000      0.045   .
## 45  1 vs 11  1 0.7321084 4.960694 0.2924818 0.00100000      0.045   .

 

Differential expression of abundance (Family level) between the years of sampling

 

physeq <- taxa_level(mon1,"Family")
NB_sig <- differential_abundance(physeq, grouping_column = "year",output_norm="log-relative",pvalue.threshold=0.05,lfc.threshold=0, filename = "1.csv")
p<-plot_signif(NB_sig$plotdata, top.taxa=30)
p <- plot_MA(NB_sig$SignFeaturesTable, label=TRUE)

 

 

NMDS

 

The code for phyloseq ordination

 

sems.ord <- ordinate(mon1, "NMDS", "bray")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.09653153 
## Run 1 stress 0.09653153 
## ... Procrustes: rmse 0.00001742751  max resid 0.00006475532 
## ... Similar to previous best
## Run 2 stress 0.1407628 
## Run 3 stress 0.1270118 
## Run 4 stress 0.1285715 
## Run 5 stress 0.1137767 
## Run 6 stress 0.1313194 
## Run 7 stress 0.1364165 
## Run 8 stress 0.1253753 
## Run 9 stress 0.1364165 
## Run 10 stress 0.09653153 
## ... New best solution
## ... Procrustes: rmse 0.000004076763  max resid 0.00001908302 
## ... Similar to previous best
## Run 11 stress 0.1238915 
## Run 12 stress 0.09653153 
## ... Procrustes: rmse 0.000001142074  max resid 0.000005528667 
## ... Similar to previous best
## Run 13 stress 0.1382325 
## Run 14 stress 0.09653153 
## ... Procrustes: rmse 0.000008312389  max resid 0.00002685398 
## ... Similar to previous best
## Run 15 stress 0.09653153 
## ... New best solution
## ... Procrustes: rmse 0.000007371869  max resid 0.00002529074 
## ... Similar to previous best
## Run 16 stress 0.1403878 
## Run 17 stress 0.1273168 
## Run 18 stress 0.09653153 
## ... Procrustes: rmse 0.000007315178  max resid 0.0000243654 
## ... Similar to previous best
## Run 19 stress 0.1402953 
## Run 20 stress 0.1382556 
## *** Solution reached
## p1 = plot_ordination(mon1, sems.ord, type="samples", color="shannon") <-The code might not work. One solution is to start the session again, and work only with the relevant packages to run this code.

 

Samples are color-coded by Shannon index