library(knitr)
library(readxl)
#detach(package:plyr)
library(plyr)
library(dplyr)
library(ggplot2)
library(reshape2)
library(VennDiagram)
library(vegan)
set.seed(100)
path <- "/Users/kylielanglois/OneDrive - SCCWRP/eDNA field/"
#path to parent folder
met <- read.csv(file.path(path, "BIO_eDNA_field_metadata.csv"))
met$samp.unique<-paste0(met$Project,"_", met$Site, "_R", met$Replicate)
#combined metadata
cry.bact<-read.csv(file.path(path, "BIO_eDNA_012925/eDNA_CTAB_16S_012925_merger_8_table_silva_tax.csv"))
cry.fish<-read.csv(file.path(path, "BIO_eDNA_012925/eDNA_CTAB_12S_012925_merger_12_table_MareMage_bl95_tax.csv"))
cat.bact<-read.csv(file.path(path, "BIO_eDNA_032425/BIO_eDNA_032425_16S_dada2_merger8_table_silva.csv"))
cat.fish<-read.csv(file.path(path, "BIO_eDNA_032425/BIO_eDNA_032425_12S_dada2_merger10_table_MareMage.csv"))
spot.bact<-cbind(cat.bact[, colnames(cat.bact)[lapply(cat.bact, is.numeric)!=T]],
cat.bact[, grepl("MagExt", colnames(cat.bact))])
cat.bact<-cat.bact[, !grepl("MagExt", colnames(cat.bact))]
spot.fish<-cbind(cat.fish[, colnames(cat.fish)[lapply(cat.fish, is.numeric)!=T]],
cat.fish[, grepl("MagExt", colnames(cat.fish))])
cat.fish<-cat.fish[, !grepl("MagExt", colnames(cat.fish))]
combo.12<-read.csv(file.path(path, "BIO_eDNA_field_12S_combo_merger_table.csv"))
combo.16<-read.csv(file.path(path, "BIO_eDNA_field_16S_combo_merger_table.csv"))
dat.temp<-cat.fish #choose which projects
dat.temp$asv.sum<-rowSums(dat.temp[lapply(dat.temp, is.numeric)==T])
dat.temp<-subset(dat.temp, dat.temp$asv.sum>0)
num.col<-colnames(dat.temp)[lapply(dat.temp, is.numeric)==T]
#get only numeric columns
sampsums<-colSums(dat.temp[ , match (num.col, colnames(dat.temp))])
#get sum of only numeric columns
#sum(sampsums)
dat.n<-combo.12 #choose combined data table
rownames(dat.n)<-dat.n[, 1]
dat.n<-dat.n[,-1]
dat.n.t<-as.data.frame(t(dat.n))
sampsums.df<-data.frame(samps=colnames(dat.n),
sum=colSums(dat.n))
meta<-metaMDS(dat.n.t, try = 100)
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1088287
## Run 1 stress 0.1086262
## ... New best solution
## ... Procrustes: rmse 0.01535361 max resid 0.05818655
## Run 2 stress 0.1131641
## Run 3 stress 0.1122954
## Run 4 stress 0.108896
## ... Procrustes: rmse 0.02097511 max resid 0.1081012
## Run 5 stress 0.1090019
## ... Procrustes: rmse 0.0215831 max resid 0.1084772
## Run 6 stress 0.1117562
## Run 7 stress 0.1179826
## Run 8 stress 0.1117798
## Run 9 stress 0.1114692
## Run 10 stress 0.1114104
## Run 11 stress 0.1173751
## Run 12 stress 0.1123201
## Run 13 stress 0.1225234
## Run 14 stress 0.1132656
## Run 15 stress 0.1096947
## Run 16 stress 0.1133206
## Run 17 stress 0.1096355
## Run 18 stress 0.1218209
## Run 19 stress 0.109418
## Run 20 stress 0.1087532
## ... Procrustes: rmse 0.02350904 max resid 0.1087563
## *** No convergence -- monoMDS stopping criteria:
## 19: stress ratio > sratmax
## 1: scale factor of the gradient < sfgrmin
coords<-as.data.frame(meta$points)
coords$samps<-rownames(coords)
coords<-merge(coords, met, by.x="samps", by.y="Sample.Name", all.x = T)
coords<-merge(sampsums.df, coords, by="samps", all.y = T)
coords$Filter.stage<-as.character(coords$Filter.stage)
coords$Sequencing.Run<-as.character(coords$Sequencing.Run)
gc<-
ggplot(coords)+
geom_point(aes(x=MDS1, y=MDS2,
color=Filter.stage, shape=Project,
size=sum),
stroke=1, alpha=0.75)+
scale_size_continuous(range=c(0,10))+
scale_shape_manual(values=c(16, 8, 6))+ #for projects
# scale_shape_manual(values=c(16, 13, 1))+ #for depths
ggtitle("eDNA field summer 2024 12S (MiFish-Umod)")
# ggtitle("eDNA field summer 2024 16S (EMP)")
gc

#ggsave(plot = gc,
# filename = "BIO_eDNA_field_12S_combo_NMDS_sizeseq.png",
# path = path, height = 6, width = 8)
dist.df <- as.matrix(dist(dat.n.t, diag = F, upper = T))
dist.df.m <- melt(dist.df, varnames=c("samp1", "samp2"))
dist.df.m$match <- dist.df.m$samp1==dist.df.m$samp2
dist.df.m<-dist.df.m[grepl("FALSE", dist.df.m$match), ]
samp.count <- met %>%
group_by(Project, Assay) %>%
summarise(total.samps=length(Sample.Name))
dat1.perc<-sweep(dat.temp[ , match(num.col, colnames(dat.temp))],
2, sampsums, `/`)
#make proportional for only numeric columns
dat1.perc$ASV<-dat.temp$Feature.ID
#add ASV column back
colSums(dat1.perc[ , match (num.col, colnames(dat1.perc))])
## CGASH241028.12S_Site_1A_CTAB_Dupo2 CGASH241028.12S_Site_1A_Dupo1
## 1 1
## CGASH241028.12S_Site_1A.W_Dupo1 CGASH241028.12S_Site_1B_CTAB_Dupo2
## 1 1
## CGASH241028.12S_Site_1B_Dupo1 CGASH241028.12S_Site_1B.W_Dupo1
## 1 1
## CGASH241028.12S_Site_1C_CTAB_Dupo2 CGASH241028.12S_Site_1C_Dupo1
## 1 1
## CGASH241028.12S_Site_1C.W_Dupo1 asv.sum
## 1 1
#check all are 1
dat1.perc.m<-melt(dat1.perc)
dat1.perc.m$num<-1:nrow(dat1.perc.m)
dat1.perc.m<-merge(dat1.perc.m, dat.temp[ , -match(num.col,
colnames(dat.temp))],
by.x = "ASV", by.y="Feature.ID")
#merge with taxonomy info, remove duplicate "num" rows if needed
tax.col<-"Taxon8"
#Taxon8 for fish genus
#Taxon6 for bact genus
dat.temp$tax.consol<-ifelse(is.na(dat.temp$Taxon8), dat.temp$last_tax,
dat.temp$Taxon8)
#if no ID at desired tax, make it last known taxonomy ID
dat.temp.tax<-dat.temp[, match(c("tax.consol", num.col), colnames(dat.temp))]
dat.temp.tax<-dat.temp.tax[!grepl("asv", colnames(dat.temp.tax))]
dat.temp.tax.consol<-ddply(dat.temp.tax, "tax.consol", numcolwise(sum))
dat.v <- dat.temp #if comparing ASV
#dat.v <- dat.temp.tax.consol #if comparing higher tax
stage.1<-dat.v[, grepl("Dupo1|Feature", colnames(dat.v))]
#use "Feature" for ASV, use "tax" for genus
stage.1<-stage.1[, !grepl("\\.W", colnames(stage.1))]
#pick ID for 1st stage, remove any not relevant samples
#CRY: "Top", remove "Top4"
#CAT: "Dupo1", remove "\\.W"
#SPOT: "_1", remove "_S|DCM|BW" as needed
stage.1$sums<-rowSums(stage.1[2:ncol(stage.1)])
stage.1<-subset(stage.1, stage.1$sums>0)
stage.1.tax<-unique(stage.1[, 1])
stage.2<-dat.v[, grepl("Dupo2|Feature", colnames(dat.v))]
#use "Feature" for ASV, use "tax" for genus
stage.2<-stage.2[, !grepl("\\.W", colnames(stage.2))]
#pick ID for 2nd stage, remove any not relevant samples
#CRY: "CTAB", remove "CRY4"
#CAT: "Dupo2", remove "\\.W"
#SPOT: "_2nd", remove "_S|DCM|BW" as needed
stage.2$sums<-rowSums(stage.2[2:ncol(stage.2)])
stage.2<-subset(stage.2, stage.2$sums>0)
stage.2.tax<-unique(stage.2[, 1])
paste("unique for 1st stage: ", length(stage.1.tax)-sum(stage.1.tax %in% stage.2.tax))
## [1] "unique for 1st stage: 352"
paste("unique for 2nd stage: ", length(stage.2.tax)-sum(stage.2.tax %in% stage.1.tax))
## [1] "unique for 2nd stage: 112"
paste("shared: ", sum(stage.1.tax %in% stage.2.tax))
## [1] "shared: 20"