Samples were sequenced and prepared beforehand with Qiime2. This project begins after obtaining 3 files for phyloseq analysis:
###sample data
meta18s=read.csv("metadata_18s.csv",sep=",",row.names=1) ### metadata
tax18s=read.csv("pr2.csv",sep=",",row.names=1) ### taxonomy
asv18s=read.csv("otu_table_18s.csv",sep=",",row.names=1) ### OTU table (named asv due to code syntax)
### preparing to phyloseq format
tax18s=as.matrix(tax18s)
asv18s=data.matrix(asv18s)
### Obtaining 3 files to merge
OTU=otu_table(asv18s,taxa_are_rows=TRUE)
TAX=tax_table(tax18s)
META=sample_data(meta18s)
Testing that the tables are in the same length
tax_names<-taxa_names(TAX)
otu_names<-taxa_names(OTU) ### These lists are not shown due to their length (over 8000 values)
sample_names(OTU)
## [1] "A097_220406_3" "A098_220419_2" "A099_220407_2"
## [4] "A100_220321_3" "A101_220405_2" "A102_220403_3"
## [7] "A103_220328_2" "A104_220511_srf_2" "A105_220404_2"
## [10] "A106_220407_3" "A107_220404_3" "A108_220405_3"
## [13] "A109_220406_2" "A110_220321_1" "A111_220419_3"
## [16] "A112_220403_2" "A113_220328_1" "A114_220511_dcm_2"
## [19] "A115_210331_1" "A116_210422_2" "A117_210422_1"
## [22] "A118_210310_1" "A119_210310_2" "A120_210404_1"
## [25] "A121_210404_3" "A122_210414_2" "A123_210414_3"
## [28] "A125_210331_2" "A126_210620_srf_1" "A127_210620_srf_2"
## [31] "A128_210620_dcm_1" "A129_210620_dcm_2" "A130_210817_srf_1"
## [34] "A131_210817_srf_2" "A132_210817_dcm_1" "A133_210817_dcm_2"
## [37] "A134_210427_srf_1" "A135_210427_srf_2" "A136_210427_dcm_1"
## [40] "A137_210427_dcm_2" "A138_210512_srf_1" "A139_210512_srf_2"
## [43] "A140_210512_dcm_1" "A141_210512_dcm_2" "A142_211010_3"
## [46] "A143_210224_1" "A144_210224_3" "A145_210322_2"
## [49] "A146_210322_3" "A147_210419_2" "A148_210419_3"
## [52] "A149_211010_2" "A150_211019_2" "A151_211019_3"
## [55] "A152_201216_1" "A153_201216_2" "A154_210107_3"
## [58] "A155_210127_1" "A156_210127_2" "A157_210210_1"
## [61] "A158_210210_2" "A159_210315_1" "A160_210315_2"
## [64] "A162_201125_1" "A163_201125_2" "A164_201230_1"
## [67] "A165_201230_2" "A166_210107_1" "A167_210324_1"
## [70] "A168_210324_2" "A169_220308_2" "A170_220308_3"
## [73] "A171_211208_1" "A172_211208_2" "A173_211222_1"
## [76] "A174_220104_1" "A175_220104_3" "A176_220214_1"
## [79] "A177_220214_2" "A178_220411_1" "A179_220411_2"
## [82] "A180_211103_1" "A181_211103_3" "A182_211116_2"
## [85] "A183_211116_3" "A184_211222_2" "A185_220118_2"
## [88] "A186_220118_3" "A187_220207_1" "A188_220207_2"
## [91] "A189_220315_2" "A190_220315_3" "A191_220511_srf_1"
## [94] "A192_220511_dcm_1"
sample_names(META)
## [1] "A097_220406_3" "A098_220419_2" "A099_220407_2"
## [4] "A100_220321_3" "A101_220405_2" "A102_220403_3"
## [7] "A103_220328_2" "A104_220511_srf_2" "A105_220404_2"
## [10] "A106_220407_3" "A107_220404_3" "A108_220405_3"
## [13] "A109_220406_2" "A110_220321_1" "A111_220419_3"
## [16] "A112_220403_2" "A113_220328_1" "A114_220511_dcm_2"
## [19] "A115_210331_1" "A116_210422_2" "A117_210422_1"
## [22] "A118_210310_1" "A119_210310_2" "A120_210404_1"
## [25] "A121_210404_3" "A122_210414_2" "A123_210414_3"
## [28] "A125_210331_2" "A126_210620_srf_1" "A127_210620_srf_2"
## [31] "A128_210620_dcm_1" "A129_210620_dcm_2" "A130_210817_srf_1"
## [34] "A131_210817_srf_2" "A132_210817_dcm_1" "A133_210817_dcm_2"
## [37] "A134_210427_srf_1" "A135_210427_srf_2" "A136_210427_dcm_1"
## [40] "A137_210427_dcm_2" "A138_210512_srf_1" "A139_210512_srf_2"
## [43] "A140_210512_dcm_1" "A141_210512_dcm_2" "A142_211010_3"
## [46] "A143_210224_1" "A144_210224_3" "A145_210322_2"
## [49] "A146_210322_3" "A147_210419_2" "A148_210419_3"
## [52] "A149_211010_2" "A150_211019_2" "A151_211019_3"
## [55] "A152_201216_1" "A153_201216_2" "A154_210107_3"
## [58] "A155_210127_1" "A156_210127_2" "A157_210210_1"
## [61] "A158_210210_2" "A159_210315_1" "A160_210315_2"
## [64] "A162_201125_1" "A163_201125_2" "A164_201230_1"
## [67] "A165_201230_2" "A166_210107_1" "A167_210324_1"
## [70] "A168_210324_2" "A169_220308_2" "A170_220308_3"
## [73] "A171_211208_1" "A172_211208_2" "A173_211222_1"
## [76] "A174_220104_1" "A175_220104_3" "A176_220214_1"
## [79] "A177_220214_2" "A178_220411_1" "A179_220411_2"
## [82] "A180_211103_1" "A181_211103_3" "A182_211116_2"
## [85] "A183_211116_3" "A184_211222_2" "A185_220118_2"
## [88] "A186_220118_3" "A187_220207_1" "A188_220207_2"
## [91] "A189_220315_2" "A190_220315_3" "A191_220511_srf_1"
## [94] "A192_220511_dcm_1"
Merging the tables to a phyloseq file
mon = phyloseq(OTU,TAX,META)
Next, a cleanup from artifacts in the data
# remove bad esvs
mon = subset_taxa(mon, Kingdom != "Unassigned")
raw_data<-datatable(otutax)
This plot contains only surface samples
Top 10 phyla
Top phytoplankton phyla (Metazoans excluded) plotted in context of chlorophyll levels at each time point (Bar level= chlorophyll, Bar fill= Relative abundance of top phyto phyla)
top 10 families
Main families are Bathycoccaceae and Mamiellaceae. Bathycoccaceae is composed of 2 main genera (Species level is not informative in this case)
Mamiellaceae is dominated by species of Micromonas:
Two Families contain additional information on Genus level: Dictyocystidae and Tintinnidae
The family ‘Cryptomonadales_X’ is informative on genus level:
Most families are not informative in higher taxonomic level, except Ceratiaceae and Gymnodiniaceae.
Within Gymnodiniaceae, there are two main genera
Only genus gyrodinium is informative on the species level
The family Prymnesiaceae is informative on genus level
Note: pr2 database does not show species such as Emiliania huxleyi
Therefore, the data was analysed again with Silva database, for comparison. Silva database contains E.hux identification. Need to assess what database is more appropriate to use.
Phylum: Haptophyta (Silva database)
The Prymnesiales family is informative on genus level:
To inspect if there is not a contamination of human DNA in the samples, zooming in on top 10 families within Metazoa:
Apparently the most dominant family is Maxillopoda (arthropods, crustacea), which is composed of several genera:
top 10 Genus
Code:
obj <- mon1
otutable <- data.frame(OTU = rownames(phyloseq::otu_table(obj)@.Data),
phyloseq::otu_table(obj)@.Data,
phyloseq::tax_table(obj)@.Data,
check.names = FALSE
)
#Extract metadata from the phyloseq object:
metadata <- data.frame(phyloseq::sample_data(obj),
check.names = FALSE
)
#Load the data with amp_load:
##write.csv (otutable, "asv.csv")
##write.csv (metadata, "meta.csv")
myotutable <- read.csv("asv.csv", check.names = FALSE) #remove first column from the asv table
mymetadata <- read.csv("meta.csv", check.names = FALSE) # add sampleID to first column of metadata CSV
forheatmap <- amp_load(myotutable, mymetadata)
## 1088 OTUs with 0 total abundance across all samples have been removed.
Before testing alpha diversity indices, the data (including surface & dcm) was rarefied
rar <- rarefy_even_depth(mon1, sample.size = min(sample_sums(mon)),
rngseed = FALSE, replace = FALSE, trimOTUs = TRUE, verbose = TRUE)
## You set `rngseed` to FALSE. Make sure you've set & recorded
## the random seed of your session for reproducibility.
## See `?set.seed`
## ...
## 1062OTUs were removed because they are no longer
## present in any sample after random subsampling
## ...
The 3 indices are:
Rarefaction curves were plotted as well (color-coded by ACE index):
Comparison based on difference between months
data_grp <- read.csv("metadata_18s_srf.csv", header=TRUE, stringsAsFactors = TRUE)
data_otu_filt_rar = t(data.frame(otu_table(mon1)))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = data_otu_filt_rar ~ month, data = data_grp, permutations = 9999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## month 1 1.9867 0.09135 8.2437 0.0001 ***
## Residual 82 19.7618 0.90865
## Total 83 21.7485 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ps_norm_df <- as(sample_data(mon1), "data.frame")
ps.dist <- phyloseq::distance(mon1, method="bray")
pa<-pairwise.adonis(ps.dist,ps_norm_df$month)
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
## 1 4 vs 3 1 0.9822401 4.689232 0.1049298 0.00100000 0.045 .
## 2 4 vs 5 1 0.8539472 3.684620 0.1241256 0.00100000 0.045 .
## 3 4 vs 6 1 0.7539076 3.266669 0.1198045 0.00100000 0.045 .
## 4 4 vs 8 1 0.8188600 3.557772 0.1291023 0.00500000 0.225
## 5 4 vs 10 1 1.3199165 5.909946 0.1852070 0.00100000 0.045 .
## 6 4 vs 2 1 1.1548582 5.283730 0.1497497 0.00100000 0.045 .
## 7 4 vs 12 1 1.8854915 8.884302 0.2284804 0.00100000 0.045 .
## 8 4 vs 1 1 1.5809122 7.200763 0.1935649 0.00100000 0.045 .
## 9 4 vs 11 1 1.6108865 7.338148 0.2076551 0.00100000 0.045 .
## 10 3 vs 5 1 1.1662375 6.775789 0.2530566 0.00100000 0.045 .
## 11 3 vs 6 1 0.8257629 5.029145 0.2183818 0.00900000 0.405
## 12 3 vs 8 1 0.8914104 5.456742 0.2326300 0.00400000 0.180
## 13 3 vs 10 1 1.2855474 7.976338 0.2851102 0.00100000 0.045 .
## 14 3 vs 2 1 0.5049262 3.049630 0.1127420 0.00200000 0.090
## 15 3 vs 12 1 1.4871142 9.433439 0.2821558 0.00100000 0.045 .
## 16 3 vs 1 1 1.0729710 6.432928 0.2113805 0.00100000 0.045 .
## 17 3 vs 11 1 1.4625183 9.029838 0.2910050 0.00100000 0.045 .
## 18 5 vs 6 1 0.3455834 2.293985 0.3644726 0.13333333 1.000
## 19 5 vs 8 1 0.4583437 3.120412 0.4382347 0.13333333 1.000
## 20 5 vs 10 1 0.5637045 3.885498 0.3930503 0.03200000 1.000
## 21 5 vs 2 1 0.9538350 5.885166 0.3704819 0.00300000 0.135
## 22 5 vs 12 1 0.8861233 6.194498 0.3825063 0.00200000 0.090
## 23 5 vs 1 1 0.9545622 5.784813 0.3664796 0.00300000 0.135
## 24 5 vs 11 1 0.6999594 4.626671 0.3664205 0.00600000 0.270
## 25 6 vs 8 1 0.3053544 6.064975 0.7520141 0.33333333 1.000
## 26 6 vs 10 1 0.2693405 2.808363 0.4124873 0.06666667 1.000
## 27 6 vs 2 1 0.6735032 4.751780 0.3726366 0.02000000 0.900
## 28 6 vs 12 1 0.6054643 5.132943 0.3908448 0.03500000 1.000
## 29 6 vs 1 1 0.6953242 4.781861 0.3741131 0.01900000 0.855
## 30 6 vs 11 1 0.4080339 3.384042 0.3606167 0.03400000 1.000
## 31 8 vs 10 1 0.3556867 3.860096 0.4911004 0.06666667 1.000
## 32 8 vs 2 1 0.7426825 5.310337 0.3989634 0.02300000 1.000
## 33 8 vs 12 1 0.5607226 4.830669 0.3764940 0.03600000 1.000
## 34 8 vs 1 1 0.7163020 4.990689 0.3841743 0.03000000 1.000
## 35 8 vs 11 1 0.4239505 3.590737 0.3743963 0.03400000 1.000
## 36 10 vs 2 1 0.9389777 6.698467 0.4011426 0.00400000 0.180
## 37 10 vs 12 1 0.7976708 6.583961 0.3970078 0.00300000 0.135
## 38 10 vs 1 1 0.9067406 6.335733 0.3878451 0.00400000 0.180
## 39 10 vs 11 1 0.4080452 3.292882 0.2915891 0.00900000 0.405
## 40 2 vs 12 1 0.6126791 4.372250 0.2379812 0.00200000 0.090
## 41 2 vs 1 1 0.3505807 2.249967 0.1384598 0.01200000 0.540
## 42 2 vs 11 1 0.8132083 5.603152 0.3183039 0.00100000 0.045 .
## 43 12 vs 1 1 0.3256118 2.289379 0.1405443 0.00300000 0.135
## 44 12 vs 11 1 0.4391741 3.397065 0.2206307 0.00100000 0.045 .
## 45 1 vs 11 1 0.7321084 4.960694 0.2924818 0.00100000 0.045 .
Differential expression of abundance (Family level) between the years of sampling
physeq <- taxa_level(mon1,"Family")
NB_sig <- differential_abundance(physeq, grouping_column = "year",output_norm="log-relative",pvalue.threshold=0.05,lfc.threshold=0, filename = "1.csv")
p<-plot_signif(NB_sig$plotdata, top.taxa=30)
p <- plot_MA(NB_sig$SignFeaturesTable, label=TRUE)
The code for phyloseq ordination
sems.ord <- ordinate(mon1, "NMDS", "bray")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.09653153
## Run 1 stress 0.09653153
## ... Procrustes: rmse 0.00001742751 max resid 0.00006475532
## ... Similar to previous best
## Run 2 stress 0.1407628
## Run 3 stress 0.1270118
## Run 4 stress 0.1285715
## Run 5 stress 0.1137767
## Run 6 stress 0.1313194
## Run 7 stress 0.1364165
## Run 8 stress 0.1253753
## Run 9 stress 0.1364165
## Run 10 stress 0.09653153
## ... New best solution
## ... Procrustes: rmse 0.000004076763 max resid 0.00001908302
## ... Similar to previous best
## Run 11 stress 0.1238915
## Run 12 stress 0.09653153
## ... Procrustes: rmse 0.000001142074 max resid 0.000005528667
## ... Similar to previous best
## Run 13 stress 0.1382325
## Run 14 stress 0.09653153
## ... Procrustes: rmse 0.000008312389 max resid 0.00002685398
## ... Similar to previous best
## Run 15 stress 0.09653153
## ... New best solution
## ... Procrustes: rmse 0.000007371869 max resid 0.00002529074
## ... Similar to previous best
## Run 16 stress 0.1403878
## Run 17 stress 0.1273168
## Run 18 stress 0.09653153
## ... Procrustes: rmse 0.000007315178 max resid 0.0000243654
## ... Similar to previous best
## Run 19 stress 0.1402953
## Run 20 stress 0.1382556
## *** Solution reached
## p1 = plot_ordination(mon1, sems.ord, type="samples", color="shannon") <-The code might not work. One solution is to start the session again, and work only with the relevant packages to run this code.
Samples are color-coded by Shannon index