library(knitr)
library(readxl)
#detach(package:plyr)
library(plyr)
library(dplyr)
library(ggplot2)
library(reshape2)
library(VennDiagram)
library(vegan)
set.seed(100)
path <- "/Users/kylielanglois/OneDrive - SCCWRP/eDNA field/"
#path to parent folder

met <- read.csv(file.path(path, "BIO_eDNA_field_metadata.csv"))
met$samp.unique<-paste0(met$Project,"_", met$Site, "_R", met$Replicate)
#combined metadata

cry.bact<-read.csv(file.path(path, "BIO_eDNA_012925/eDNA_CTAB_16S_012925_merger_8_table_silva_tax.csv"))
cry.fish<-read.csv(file.path(path, "BIO_eDNA_012925/eDNA_CTAB_12S_012925_merger_12_table_MareMage_bl95_tax.csv"))

cat.bact<-read.csv(file.path(path, "BIO_eDNA_032425/BIO_eDNA_032425_16S_dada2_merger8_table_silva.csv"))
cat.fish<-read.csv(file.path(path, "BIO_eDNA_032425/BIO_eDNA_032425_12S_dada2_merger10_table_MareMage.csv"))

spot.bact<-cbind(cat.bact[, colnames(cat.bact)[lapply(cat.bact, is.numeric)!=T]], 
                cat.bact[, grepl("MagExt", colnames(cat.bact))])
cat.bact<-cat.bact[, !grepl("MagExt", colnames(cat.bact))]

spot.fish<-cbind(cat.fish[, colnames(cat.fish)[lapply(cat.fish, is.numeric)!=T]], 
                cat.fish[, grepl("MagExt", colnames(cat.fish))])
cat.fish<-cat.fish[, !grepl("MagExt", colnames(cat.fish))]

combo.12<-read.csv(file.path(path, "BIO_eDNA_field_12S_combo_merger_table.csv"))
combo.16<-read.csv(file.path(path, "BIO_eDNA_field_16S_combo_merger_table.csv"))
dat.temp<-cat.fish #choose which projects

dat.temp$asv.sum<-rowSums(dat.temp[lapply(dat.temp, is.numeric)==T])
dat.temp<-subset(dat.temp, dat.temp$asv.sum>0)

num.col<-colnames(dat.temp)[lapply(dat.temp, is.numeric)==T] 
#get only numeric columns
sampsums<-colSums(dat.temp[ , match (num.col, colnames(dat.temp))])
#get sum of only numeric columns
#sum(sampsums)
dat.n<-combo.12 #choose combined data table

rownames(dat.n)<-dat.n[, 1]
dat.n<-dat.n[,-1]
dat.n.t<-as.data.frame(t(dat.n))
sampsums.df<-data.frame(samps=colnames(dat.n), 
                        sum=colSums(dat.n))

meta<-metaMDS(dat.n.t, try = 100)
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1088287 
## Run 1 stress 0.1086262 
## ... New best solution
## ... Procrustes: rmse 0.01535361  max resid 0.05818655 
## Run 2 stress 0.1131641 
## Run 3 stress 0.1122954 
## Run 4 stress 0.108896 
## ... Procrustes: rmse 0.02097511  max resid 0.1081012 
## Run 5 stress 0.1090019 
## ... Procrustes: rmse 0.0215831  max resid 0.1084772 
## Run 6 stress 0.1117562 
## Run 7 stress 0.1179826 
## Run 8 stress 0.1117798 
## Run 9 stress 0.1114692 
## Run 10 stress 0.1114104 
## Run 11 stress 0.1173751 
## Run 12 stress 0.1123201 
## Run 13 stress 0.1225234 
## Run 14 stress 0.1132656 
## Run 15 stress 0.1096947 
## Run 16 stress 0.1133206 
## Run 17 stress 0.1096355 
## Run 18 stress 0.1218209 
## Run 19 stress 0.109418 
## Run 20 stress 0.1087532 
## ... Procrustes: rmse 0.02350904  max resid 0.1087563 
## *** No convergence -- monoMDS stopping criteria:
##     19: stress ratio > sratmax
##      1: scale factor of the gradient < sfgrmin
coords<-as.data.frame(meta$points)
coords$samps<-rownames(coords)
coords<-merge(coords, met, by.x="samps", by.y="Sample.Name", all.x = T)
coords<-merge(sampsums.df, coords, by="samps", all.y = T)
coords$Filter.stage<-as.character(coords$Filter.stage)
coords$Sequencing.Run<-as.character(coords$Sequencing.Run)
gc<-
  ggplot(coords)+
  geom_point(aes(x=MDS1, y=MDS2, 
                 color=Filter.stage, shape=Project, 
                 size=sum), 
             stroke=1, alpha=0.75)+
  scale_size_continuous(range=c(0,10))+
  scale_shape_manual(values=c(16, 8, 6))+ #for projects
#  scale_shape_manual(values=c(16, 13, 1))+ #for depths
  ggtitle("eDNA field summer 2024 12S (MiFish-Umod)")
#  ggtitle("eDNA field summer 2024 16S (EMP)")
gc

#ggsave(plot = gc,
#       filename = "BIO_eDNA_field_12S_combo_NMDS_sizeseq.png", 
#       path = path, height = 6, width = 8)
dist.df <- as.matrix(dist(dat.n.t, diag = F, upper = T))
dist.df.m <- melt(dist.df, varnames=c("samp1", "samp2"))
dist.df.m$match <- dist.df.m$samp1==dist.df.m$samp2
dist.df.m<-dist.df.m[grepl("FALSE", dist.df.m$match), ]
samp.count <- met %>% 
  group_by(Project, Assay) %>%  
  summarise(total.samps=length(Sample.Name))

dat1.perc<-sweep(dat.temp[ , match(num.col, colnames(dat.temp))], 
                                   2, sampsums, `/`)
#make proportional for only numeric columns
dat1.perc$ASV<-dat.temp$Feature.ID
#add ASV column back
colSums(dat1.perc[ , match (num.col, colnames(dat1.perc))]) 
## CGASH241028.12S_Site_1A_CTAB_Dupo2      CGASH241028.12S_Site_1A_Dupo1 
##                                  1                                  1 
##    CGASH241028.12S_Site_1A.W_Dupo1 CGASH241028.12S_Site_1B_CTAB_Dupo2 
##                                  1                                  1 
##      CGASH241028.12S_Site_1B_Dupo1    CGASH241028.12S_Site_1B.W_Dupo1 
##                                  1                                  1 
## CGASH241028.12S_Site_1C_CTAB_Dupo2      CGASH241028.12S_Site_1C_Dupo1 
##                                  1                                  1 
##    CGASH241028.12S_Site_1C.W_Dupo1                            asv.sum 
##                                  1                                  1
#check all are 1

dat1.perc.m<-melt(dat1.perc)
dat1.perc.m$num<-1:nrow(dat1.perc.m)
dat1.perc.m<-merge(dat1.perc.m, dat.temp[ , -match(num.col, 
                                                   colnames(dat.temp))], 
                 by.x = "ASV", by.y="Feature.ID")
#merge with taxonomy info, remove duplicate "num" rows if needed
tax.col<-"Taxon8" 
#Taxon8 for fish genus
#Taxon6 for bact genus

dat.temp$tax.consol<-ifelse(is.na(dat.temp$Taxon8), dat.temp$last_tax, 
                            dat.temp$Taxon8) 
#if no ID at desired tax, make it last known taxonomy ID
dat.temp.tax<-dat.temp[, match(c("tax.consol", num.col), colnames(dat.temp))]
dat.temp.tax<-dat.temp.tax[!grepl("asv", colnames(dat.temp.tax))]

dat.temp.tax.consol<-ddply(dat.temp.tax, "tax.consol", numcolwise(sum))
dat.v <- dat.temp #if comparing ASV
#dat.v <- dat.temp.tax.consol #if comparing higher tax

stage.1<-dat.v[, grepl("Dupo1|Feature", colnames(dat.v))] 
#use "Feature" for ASV, use "tax" for genus
stage.1<-stage.1[, !grepl("\\.W", colnames(stage.1))]
#pick ID for 1st stage, remove any not relevant samples
#CRY: "Top", remove "Top4"
#CAT: "Dupo1", remove "\\.W"
#SPOT: "_1", remove "_S|DCM|BW" as needed
stage.1$sums<-rowSums(stage.1[2:ncol(stage.1)])
stage.1<-subset(stage.1, stage.1$sums>0)
stage.1.tax<-unique(stage.1[, 1])

stage.2<-dat.v[, grepl("Dupo2|Feature", colnames(dat.v))]
#use "Feature" for ASV, use "tax" for genus
stage.2<-stage.2[, !grepl("\\.W", colnames(stage.2))]

#pick ID for 2nd stage, remove any not relevant samples
#CRY: "CTAB", remove "CRY4"
#CAT: "Dupo2", remove "\\.W"
#SPOT: "_2nd", remove "_S|DCM|BW" as needed
stage.2$sums<-rowSums(stage.2[2:ncol(stage.2)])
stage.2<-subset(stage.2, stage.2$sums>0)
stage.2.tax<-unique(stage.2[, 1])

paste("unique for 1st stage: ", length(stage.1.tax)-sum(stage.1.tax %in% stage.2.tax))
## [1] "unique for 1st stage:  352"
paste("unique for 2nd stage: ", length(stage.2.tax)-sum(stage.2.tax %in% stage.1.tax))
## [1] "unique for 2nd stage:  112"
paste("shared: ", sum(stage.1.tax %in% stage.2.tax))
## [1] "shared:  20"