Samples were sequenced and prepared beforehand with Qiime2. This project begins after obtaining 3 files for phyloseq analysis:
###sample data
meta16s=read.csv("metadata_16s.csv",sep=",",row.names=1) ### metadata
tax16s=read.csv("taxonomy_16s.csv",sep=",",row.names=1) ### taxonomy
asv16s=read.csv("otu_table_16s.csv",sep=",",row.names=1) ### OTU table (named asv due to code syntax)
### preparing to phyloseq format
tax16s=as.matrix(tax16s)
asv16s=data.matrix(asv16s)
### Obtaining 3 files to merge
OTU=otu_table(asv16s,taxa_are_rows=TRUE)
TAX=tax_table(tax16s)
META=sample_data(meta16s)
Testing that the tables are in the same length
tax_names<-taxa_names(TAX)
otu_names<-taxa_names(OTU) ### These lists are not shown due to their length (over 8000 values)
sample_names(OTU)
## [1] "A001_220406_3" "A002_220419_2" "A003_220407_2"
## [4] "A004_220321_3" "A005_220405_2" "A006_220403_3"
## [7] "A007_220328_2" "A008_220511_srf_2" "A009_220404_2"
## [10] "A010_220407_3" "A011_220404_3" "A012_220405_3"
## [13] "A013_220406_2" "A014_220321_1" "A015_220419_3"
## [16] "A016_220403_2" "A017_220328_1" "A018_220511_dcm_2"
## [19] "A019_210331_1" "A020_210422_2" "A021_210422_1"
## [22] "A022_210310_1" "A023_210310_2" "A024_210404_1"
## [25] "A025_210404_3" "A026_210414_2" "A027_210414_3"
## [28] "A029_210331_2" "A030_210620_srf_1" "A031_210620_srf_2"
## [31] "A032_210620_dcm_1" "A033_210620_dcm_2" "A034_210817_srf_1"
## [34] "A035_210817_srf_2" "A036_210817_dcm_1" "A037_210817_dcm_2"
## [37] "A038_210427_srf_1" "A039_210427_srf_2" "A040_210427_dcm_1"
## [40] "A041_210427_dcm_2" "A042_210512_srf_1" "A043_210512_srf_2"
## [43] "A044_210512_dcm_1" "A045_210512_dcm_2" "A046_211010_3"
## [46] "A047_210224_1" "A048_210224_3" "A049_210322_2"
## [49] "A050_210322_3" "A051_210419_2" "A052_210419_3"
## [52] "A053_211010_2" "A054_211019_2" "A055_211019_3"
## [55] "A056_201216_1" "A057_201216_2" "A058_210107_3"
## [58] "A059_210127_1" "A060_210127_2" "A061_210210_1"
## [61] "A062_210210_2" "A063_210315_1" "A064_210315_2"
## [64] "A066_201125_1" "A067_201125_2" "A068_201230_1"
## [67] "A069_201230_2" "A070_210107_1" "A071_210324_1"
## [70] "A072_210324_2" "A073_220308_2" "A074_220308_3"
## [73] "A075_211208_1" "A076_211208_2" "A077_211222_1"
## [76] "A078_220104_1" "A079_220104_3" "A080_220214_1"
## [79] "A081_220214_2" "A082_220411_1" "A083_220411_2"
## [82] "A084_211103_1" "A085_211103_3" "A086_211116_2"
## [85] "A087_211116_3" "A088_211222_2" "A089_220118_2"
## [88] "A090_220118_3" "A091_220207_1" "A092_220207_2"
## [91] "A093_220315_2" "A094_220315_3" "A095_220511_srf_1"
## [94] "A096_220511_dcm_1"
sample_names(META)
## [1] "A001_220406_3" "A002_220419_2" "A003_220407_2"
## [4] "A004_220321_3" "A005_220405_2" "A006_220403_3"
## [7] "A007_220328_2" "A008_220511_srf_2" "A009_220404_2"
## [10] "A010_220407_3" "A011_220404_3" "A012_220405_3"
## [13] "A013_220406_2" "A014_220321_1" "A015_220419_3"
## [16] "A016_220403_2" "A017_220328_1" "A018_220511_dcm_2"
## [19] "A019_210331_1" "A020_210422_2" "A021_210422_1"
## [22] "A022_210310_1" "A023_210310_2" "A024_210404_1"
## [25] "A025_210404_3" "A026_210414_2" "A027_210414_3"
## [28] "A029_210331_2" "A030_210620_srf_1" "A031_210620_srf_2"
## [31] "A032_210620_dcm_1" "A033_210620_dcm_2" "A034_210817_srf_1"
## [34] "A035_210817_srf_2" "A036_210817_dcm_1" "A037_210817_dcm_2"
## [37] "A038_210427_srf_1" "A039_210427_srf_2" "A040_210427_dcm_1"
## [40] "A041_210427_dcm_2" "A042_210512_srf_1" "A043_210512_srf_2"
## [43] "A044_210512_dcm_1" "A045_210512_dcm_2" "A046_211010_3"
## [46] "A047_210224_1" "A048_210224_3" "A049_210322_2"
## [49] "A050_210322_3" "A051_210419_2" "A052_210419_3"
## [52] "A053_211010_2" "A054_211019_2" "A055_211019_3"
## [55] "A056_201216_1" "A057_201216_2" "A058_210107_3"
## [58] "A059_210127_1" "A060_210127_2" "A061_210210_1"
## [61] "A062_210210_2" "A063_210315_1" "A064_210315_2"
## [64] "A066_201125_1" "A067_201125_2" "A068_201230_1"
## [67] "A069_201230_2" "A070_210107_1" "A071_210324_1"
## [70] "A072_210324_2" "A073_220308_2" "A074_220308_3"
## [73] "A075_211208_1" "A076_211208_2" "A077_211222_1"
## [76] "A078_220104_1" "A079_220104_3" "A080_220214_1"
## [79] "A081_220214_2" "A082_220411_1" "A083_220411_2"
## [82] "A084_211103_1" "A085_211103_3" "A086_211116_2"
## [85] "A087_211116_3" "A088_211222_2" "A089_220118_2"
## [88] "A090_220118_3" "A091_220207_1" "A092_220207_2"
## [91] "A093_220315_2" "A094_220315_3" "A095_220511_srf_1"
## [94] "A096_220511_dcm_1"
Merging the tables to a phyloseq file
mon = phyloseq(OTU,TAX,META)
Next, a cleanup from artifacts in the data
# remove bad esvs
mon= subset_taxa(mon, Kingdom != "d__Eukaryota")
mon = subset_taxa(mon, Kingdom != "Unassigned")
mon = subset_taxa(mon, Order != "o__Chloroplast")
mon = subset_taxa(mon, Family!= "f__Mitochondria")
raw_data<-datatable(otutax)
This plot contains only surface samples
Top 10 phyla
(of phylum cyanobacteria)
(of phylum proteobacteria)
Code:
obj <- mon1
otutable <- data.frame(OTU = rownames(phyloseq::otu_table(obj)@.Data),
phyloseq::otu_table(obj)@.Data,
phyloseq::tax_table(obj)@.Data,
check.names = FALSE
)
#Extract metadata from the phyloseq object:
metadata <- data.frame(phyloseq::sample_data(obj),
check.names = FALSE
)
#Load the data with amp_load:
##write.csv (otutable, "asv.csv")
##write.csv (metadata, "meta.csv")
myotutable <- read.csv("asv.csv", check.names = FALSE) #remove first column from the asv table
mymetadata <- read.csv("meta.csv", check.names = FALSE) # add sampleID to first column of metadata CSV
forheatmap <- amp_load(myotutable, mymetadata)
## 484 OTUs with 0 total abundance across all samples have been removed.
Create datasets for bacteria and archaea:
arch <- amp_subset_taxa(forheatmap, tax_vector = "d__Archaea", normalise = TRUE, remove = FALSE)
## 5546 OTUs have been filtered
## Before: 5768 OTUs
## After: 222 OTUs
bac <- amp_subset_taxa(forheatmap, tax_vector = "d__Bacteria", normalise = TRUE, remove = FALSE)
## 222 OTUs have been filtered
## Before: 5768 OTUs
## After: 5546 OTUs
Heatmaps are divided by months, each month contain 2 years of sampling, to detect differences in abundance. The 2 heatmaps shows:
Before testing alpha diversity indices, the data (including surface & dcm) was rarefied
rar <- rarefy_even_depth(mon1, sample.size = min(sample_sums(mon)),
rngseed = FALSE, replace = FALSE, trimOTUs = TRUE, verbose = TRUE)
## You set `rngseed` to FALSE. Make sure you've set & recorded
## the random seed of your session for reproducibility.
## See `?set.seed`
## ...
## 1311OTUs were removed because they are no longer
## present in any sample after random subsampling
## ...
The 3 indices are:
Rarefaction curves were plotted as well (color-coded by ACE index):
Comparison based on difference between months
data_grp <- read.csv("metadata_16s_srf.csv", header=TRUE, stringsAsFactors = TRUE)
data_otu_filt_rar = t(data.frame(otu_table(mon1)))
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = data_otu_filt_rar ~ month, data = data_grp, permutations = 9999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## month 1 1.5520 0.15318 14.832 0.0001 ***
## Residual 82 8.5803 0.84682
## Total 83 10.1323 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ps_norm_df <- as(sample_data(mon1), "data.frame")
ps.dist <- phyloseq::distance(mon1, method="bray")
pa<-pairwise.adonis(ps.dist,ps_norm_df$month)
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
## 1 4 vs 3 1 0.66465277 7.204914 0.15263060 0.00100000 0.045 .
## 2 4 vs 5 1 0.67324322 6.016810 0.18792660 0.00100000 0.045 .
## 3 4 vs 6 1 0.56029318 4.820213 0.16725112 0.00100000 0.045 .
## 4 4 vs 8 1 0.71195300 6.124614 0.20330928 0.00500000 0.225
## 5 4 vs 10 1 0.96298399 8.922327 0.25549062 0.00100000 0.045 .
## 6 4 vs 2 1 0.53819969 5.304325 0.15024575 0.00100000 0.045 .
## 7 4 vs 12 1 0.80801639 7.891149 0.20825838 0.00100000 0.045 .
## 8 4 vs 1 1 0.68477767 6.674753 0.18199859 0.00100000 0.045 .
## 9 4 vs 11 1 1.03749587 9.684885 0.25699653 0.00100000 0.045 .
## 10 3 vs 5 1 0.89407493 16.963204 0.45892136 0.00100000 0.045 .
## 11 3 vs 6 1 0.60843266 11.717950 0.39430546 0.00300000 0.135
## 12 3 vs 8 1 0.73350722 14.124508 0.43968014 0.00700000 0.315
## 13 3 vs 10 1 0.89609410 18.843916 0.48511885 0.00100000 0.045 .
## 14 3 vs 2 1 0.09490400 1.915918 0.07392822 0.09000000 1.000
## 15 3 vs 12 1 0.62362963 12.300830 0.33885809 0.00100000 0.045 .
## 16 3 vs 1 1 0.30164277 5.921026 0.19788846 0.00100000 0.045 .
## 17 3 vs 11 1 0.94851564 18.234221 0.45320179 0.00100000 0.045 .
## 18 5 vs 6 1 0.16164213 4.202427 0.51233943 0.13333333 1.000
## 19 5 vs 8 1 0.26751024 6.948002 0.63463653 0.06666667 1.000
## 20 5 vs 10 1 0.33282783 11.725514 0.66150489 0.02900000 1.000
## 21 5 vs 2 1 0.72079111 17.663538 0.63851334 0.00400000 0.180
## 22 5 vs 12 1 0.47590567 10.915372 0.52188277 0.00500000 0.225
## 23 5 vs 1 1 0.62822789 14.216393 0.58705659 0.00300000 0.135
## 24 5 vs 11 1 0.35944389 7.907574 0.49709490 0.00200000 0.090
## 25 6 vs 8 1 0.10111454 5.863602 0.74566364 0.33333333 1.000
## 26 6 vs 10 1 0.11690390 9.206587 0.69712083 0.06666667 1.000
## 27 6 vs 2 1 0.54429724 15.090596 0.65353861 0.03500000 1.000
## 28 6 vs 12 1 0.35946524 9.086650 0.53179821 0.02700000 1.000
## 29 6 vs 1 1 0.47710766 11.839424 0.59676249 0.02700000 1.000
## 30 6 vs 11 1 0.17814787 4.378405 0.42187650 0.03800000 1.000
## 31 8 vs 10 1 0.11174113 8.773915 0.68686186 0.06666667 1.000
## 32 8 vs 2 1 0.64216794 17.794740 0.68985925 0.02100000 0.945
## 33 8 vs 12 1 0.37520067 9.479890 0.54233122 0.01900000 0.855
## 34 8 vs 1 1 0.53734722 13.328026 0.62490668 0.02100000 0.945
## 35 8 vs 11 1 0.17246322 4.236071 0.41383761 0.02900000 1.000
## 36 10 vs 2 1 0.70746147 23.195257 0.69875214 0.00200000 0.090
## 37 10 vs 12 1 0.35569963 10.683877 0.51653164 0.00300000 0.135
## 38 10 vs 1 1 0.56422678 16.651753 0.62479016 0.00600000 0.270
## 39 10 vs 11 1 0.09449954 2.901199 0.26613580 0.01700000 0.765
## 40 2 vs 12 1 0.34544539 8.474381 0.37706850 0.00100000 0.045 .
## 41 2 vs 1 1 0.09345006 2.269004 0.13946792 0.04900000 1.000
## 42 2 vs 11 1 0.65814379 15.848121 0.56909121 0.00100000 0.045 .
## 43 12 vs 1 1 0.14368415 3.327532 0.19203728 0.00800000 0.360
## 44 12 vs 11 1 0.20257691 4.619183 0.27794284 0.00300000 0.135
## 45 1 vs 11 1 0.45433985 10.244899 0.46055049 0.00100000 0.045 .
Differential expression of abundance (Family level) between the years of sampling
physeq <- taxa_level(mon1,"Family")
NB_sig <- differential_abundance(physeq, grouping_column = "year",output_norm="log-relative",pvalue.threshold=0.05,lfc.threshold=0, filename = "1.csv")
p<-plot_signif(NB_sig$plotdata, top.taxa=30)
p <- plot_MA(NB_sig$SignFeaturesTable, label=TRUE)
The code for phyloseq ordination
sems.ord <- ordinate(mon1, "NMDS", "bray")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.09274357
## Run 1 stress 0.1017237
## Run 2 stress 0.1444514
## Run 3 stress 0.09274357
## ... New best solution
## ... Procrustes: rmse 0.000004397437 max resid 0.00002287361
## ... Similar to previous best
## Run 4 stress 0.09274357
## ... New best solution
## ... Procrustes: rmse 0.00001206128 max resid 0.00008189235
## ... Similar to previous best
## Run 5 stress 0.1017237
## Run 6 stress 0.09274357
## ... Procrustes: rmse 0.00000875416 max resid 0.00005264231
## ... Similar to previous best
## Run 7 stress 0.133378
## Run 8 stress 0.09274357
## ... New best solution
## ... Procrustes: rmse 0.000005337812 max resid 0.0000252863
## ... Similar to previous best
## Run 9 stress 0.1417975
## Run 10 stress 0.09582354
## Run 11 stress 0.1594529
## Run 12 stress 0.09274357
## ... Procrustes: rmse 0.000006934365 max resid 0.00005192101
## ... Similar to previous best
## Run 13 stress 0.1195385
## Run 14 stress 0.1159907
## Run 15 stress 0.09582354
## Run 16 stress 0.1195385
## Run 17 stress 0.09582354
## Run 18 stress 0.171139
## Run 19 stress 0.09582354
## Run 20 stress 0.09274357
## ... Procrustes: rmse 0.000003443438 max resid 0.00001955893
## ... Similar to previous best
## *** Solution reached
## p1 = plot_ordination(mon1, sems.ord, type="samples", color="shannon") <-The code might not work. One solution is to start the session again, and work only with the relevant packages to run this code.
Samples are color-coded by Shannon index