Southern Ocean: eDNA vs CPR

1 eDNA

1.1 Import functions and set working directory

setwd("/Users/16152366/Desktop/Antarctica/Results:Stats/2023/")
source("functions1.R")

1.2 Raw eDNA data prep: contamination removal

#read in data
raweDNA<-read_excel("eDNA_CPR.xlsx", sheet = "Raw_eDNA")
samples<-read_excel("eDNA_CPR.xlsx", sheet = "Samples")
eDNAsamples<-droplevels(subset(samples, Method!="CPR"))

#assign subkingdoms
raweDNA$Subkingdom<-if_else(raweDNA$Phylum=="Dinophyceae", "Myzozoa", raweDNA$Subkingdom)
raweDNA$Subkingdom<-if_else(raweDNA$Phylum=="Chlorophyta", "Chlorophyta", raweDNA$Subkingdom)
raweDNA$Subkingdom<-if_else(raweDNA$Phylum=="Bacillariophyta", "Bacillariophyta", raweDNA$Subkingdom)
raweDNA$Subkingdom<-if_else(raweDNA$Phylum=="Ascomycota", "Dikarya", raweDNA$Subkingdom)
raweDNA$Subkingdom<-if_else(raweDNA$Phylum=="Bacteroidetes", "Bacillariophyta", raweDNA$Subkingdom)
raweDNA$Subkingdom<-if_else(raweDNA$Phylum=="Basidiomycota", "Dikarya", raweDNA$Subkingdom)
raweDNA$Subkingdom<-if_else(raweDNA$Phylum=="Bigyra", "Harosa", raweDNA$Subkingdom)
raweDNA$Subkingdom<-if_else(raweDNA$Phylum=="Bolidophyceae", "Harosa", raweDNA$Subkingdom)
raweDNA$Subkingdom<-if_else(raweDNA$Phylum=="Not assigned", "Not assigned", raweDNA$Subkingdom)

#contamination removal 
contam_zotus<-read_excel("eDNA.xlsx", sheet = "zotu_remove") #zotus to completely remove
newzotus<-read_excel("eDNA.xlsx", sheet = "zotu_add") #zotus w/ number of reads present in controls removed from samples
newzotus<-newzotus %>% select(-`Amount in control`)
raweDNAcontam<-raweDNA[!raweDNA$zOTU %in% contam_zotus$zOTU,]
raweDNAcontam<-merge(raweDNAcontam, newzotus, all=TRUE)
##write.csv(raweDNAcontam, file="rawcontamremoved.csv", row.names = FALSE)

1.3 Raw eDNA analysis

#read back in after adding comments, checking distribution
raweDNAcontam<-read_excel("eDNA_CPR.xlsx", sheet="Raw_eDNA_contamremoved")

#Sequencing statistics
sum(colSums(raweDNAcontam[12:157])) #2.43 million reads

## [1] 2432820

length(unique(raweDNAcontam$zOTU)) #1273 zotus

## [1] 1273

length(unique(raweDNAcontam$zOTU[raweDNAcontam$Subkingdom=="Metazoa"])) #808 metazoan ZOTU

## [1] 808

sum(colSums(raweDNAcontam[12:157][raweDNAcontam$Subkingdom=="Metazoa",])) #1139972 metazoan reads

## [1] 1139972

sum(colSums(raweDNAcontam[12:157][raweDNAcontam$Subkingdom=="Metazoa",]))/sum(colSums(raweDNAcontam[12:157])) #46.86 metazoan

## [1] 0.4685805

length(unique(raweDNAcontam$zOTU[raweDNAcontam$Subkingdom=="Metazoa" & raweDNAcontam$Assignment=="Species"])) #619 resolved to species level

## [1] 619

#take out only my samples
mysamples<-raweDNAcontam[as.character(samples$Sample[samples$Method=="eDNA"])]
raweDNA<-cbind(raweDNAcontam[1:11], mysamples)

#make long format and merge with sample data
long_raw<-pivot_longer(raweDNA, cols = 12:141, names_to = "Sample", values_to = "Reads")
long_raw<-long_raw[long_raw$Reads!=0,]
long_raw<-merge(eDNAsamples, long_raw, all=TRUE)
long_raw<-droplevels(subset(long_raw, Type!="eDNA_Control"))

#summary per method: data prep
rawsummary<- long_raw %>% group_by(Type, Subkingdom) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
rawsummary<-rawsummary[rawsummary$Reads!=0,]
rawsummary$Metazoan<-if_else(rawsummary$Subkingdom=="Metazoa", "Metazoan", "Non-metazoan")
##write.csv(rawsummary, "subkindgomsummary.csv")

#test the correlation between sequencing depth and ZOTU richness
SeqDepth<-long_raw %>% select(Type, Sample, zOTU, Reads)
write.csv(SeqDepth, "Seqdepth.csv")
SeqDep1<-read.csv("SeqDepth1.csv")

#12 L correlation
#plot correlation between read depth and zotu richness
options(scipen = 999)
plot(SeqDep1$Reads[SeqDep1$Type=="12L"],SeqDep1$ZOTU.Richness[SeqDep1$Type=="12L"], xlab="Sequencing depth (Reads)", ylab="ZOTU richness")

cor(SeqDep1$Reads[SeqDep1$Type=="12L"],SeqDep1$ZOTU.Richness[SeqDep1$Type=="12L"], method='pearson')

## [1] -0.03284266

# -0.03 correlation
cor.test(SeqDep1$Reads[SeqDep1$Type=="12L"],SeqDep1$ZOTU.Richness[SeqDep1$Type=="12L"], method='pearson')

## 
##  Pearson's product-moment correlation
## 
## data:  SeqDep1$Reads[SeqDep1$Type == "12L"] and SeqDep1$ZOTU.Richness[SeqDep1$Type == "12L"]
## t = -0.26082, df = 63, p-value = 0.7951
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2745426  0.2127607
## sample estimates:
##         cor 
## -0.03284266

#not significant weak correlation

#2 L correlation
#plot correlation between read depth and zotu richness
plot(SeqDep1$Reads[SeqDep1$Type=="2L"],SeqDep1$ZOTU.Richness[SeqDep1$Type=="2L"], xlab="Sequencing depth (Reads)", ylab="ZOTU richness")

cor(SeqDep1$Reads[SeqDep1$Type=="2L"],SeqDep1$ZOTU.Richness[SeqDep1$Type=="2L"], method='pearson')

## [1] 0.1589604

# -0.03 correlation
cor.test(SeqDep1$Reads[SeqDep1$Type=="2L"],SeqDep1$ZOTU.Richness[SeqDep1$Type=="2L"], method='pearson')

## 
##  Pearson's product-moment correlation
## 
## data:  SeqDep1$Reads[SeqDep1$Type == "2L"] and SeqDep1$ZOTU.Richness[SeqDep1$Type == "2L"]
## t = 1.278, df = 63, p-value = 0.206
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.08836466  0.38782345
## sample estimates:
##       cor 
## 0.1589604

#not significant weak correlation

#12L
#total reads
sum(rawsummary$Reads[rawsummary$Type=="12L"]) #1162458

## [1] 1162458

#total metazoan %
(sum(rawsummary$Reads[rawsummary$Type=="12L" & rawsummary$Metazoan=="Metazoan"])/sum(rawsummary$Reads[rawsummary$Type=="12L"]))*100 #85.66

## [1] 85.65565

(sum(rawsummary$Reads[rawsummary$Type=="12L" & rawsummary$Metazoan=="Non-metazoan"])/sum(rawsummary$Reads[rawsummary$Type=="12L"]))*100

## [1] 14.34435

#2L
#total reads
sum(rawsummary$Reads[rawsummary$Type=="2L"]) #1268468

## [1] 1268468

#total metazoan % 
(sum(rawsummary$Reads[rawsummary$Type=="2L" & rawsummary$Metazoan=="Metazoan"])/sum(rawsummary$Reads[rawsummary$Type=="2L"]))*100 #11.37

## [1] 11.37285

(sum(rawsummary$Reads[rawsummary$Type=="2L" & rawsummary$Metazoan=="Non-metazoan"])/sum(rawsummary$Reads[rawsummary$Type=="2L"]))*100

## [1] 88.62715

ggplot(rawsummary, aes(fill=Subkingdom, y=Reads, x=Type)) + 
  geom_bar(stat="identity",width = 0.9) + 
  scale_fill_manual(values = fill13)+
  theme(axis.line = element_line(colour = "black", linewidth = 0.2, linetype = "solid"),
        axis.ticks = element_line(colour = "black", linewidth = 0.2, linetype = "solid"),
        legend.text = element_text(size = 15), legend.title = element_text(face = "bold",size=16.5),
        plot.margin = margin(20,10,10,10))+ 
  theme(axis.text.x = element_text(colour="black",size=18,vjust = 0.5, hjust=0.5),
        axis.text.y = element_text(colour="black",size=18, vjust = 0.5, hjust=1)) + 
  scale_y_continuous(expand = c(0, 0))+
  theme(axis.title.y = element_text(size=16.5, face = "bold"),
        axis.title.x = element_text(size=16.5,vjust = -0.5, face = "bold")) +
  scale_x_discrete(labels=c("12L"="12 L", "2L"="2 L", "eDNA_Control"="Control"),expand = c(0.25, 0))+
  guides(fill=guide_legend(title="Subkindgom"))+ xlab("Sample type")+ ylab("Number of reads")

#Chlorophyta, Hacrobia and Harosa dominate 2L> what are the %s?
(sum(rawsummary$Reads[rawsummary$Type=="2L" & rawsummary$Subkingdom=="Chlorophyta"])/sum(rawsummary$Reads[rawsummary$Type=="2L"]))*100 #11.37

## [1] 47.48705

(sum(rawsummary$Reads[rawsummary$Type=="2L" & rawsummary$Subkingdom=="Hacrobia"])/sum(rawsummary$Reads[rawsummary$Type=="2L"]))*100 #11.37

## [1] 15.08812

(sum(rawsummary$Reads[rawsummary$Type=="2L" & rawsummary$Subkingdom=="Harosa"])/sum(rawsummary$Reads[rawsummary$Type=="2L"]))*100 #11.37

## [1] 19.88391

#tidy environment
rm(raweDNAcontam, contam_zotus, newzotus)

# plot metazoan vs non-meta 
long_raw$Metazoan<-if_else(long_raw$Subkingdom=="Metazoa", "Metazoan", "Non-metazoan")
long_raw$Metazoan<-if_else(long_raw$Subkingdom=="Not assigned", "Not assigned", long_raw$Metazoan)
long_raw$Metazoan<-factor(long_raw$Metazoan, levels=rev(sort(unique(long_raw$Metazoan))))
long_raw$Zone<-factor(long_raw$Zone, levels = c("STZ","SAZ","PFZ","AAZ","SACCZ"))

#Overall
ggplot(long_raw, aes(fill=Metazoan, y=Reads, x=Type)) + theme_classic()+
  geom_bar(position="fill", stat="identity",width = 0.9) + 
  scale_fill_manual(values = rev(rawfill))+
  theme(axis.line = element_line(colour = "black", linewidth = 0.2, linetype = "solid"),
        axis.ticks = element_line(colour = "black", linewidth = 0.2, linetype = "solid"),
        legend.text = element_text(size = 15), legend.title = element_text(face = "bold",size=16.5),
        plot.margin = margin(20,10,10,10))+ 
  theme(axis.text.x = element_text(colour="black",size=18,vjust = 0.5, hjust=0.5),
        axis.text.y = element_text(colour="black",size=18, vjust = 0.5, hjust=1)) + 
  theme(axis.title.y = element_text(size=16.5, face = "bold"),
        axis.title.x = element_text(size=16.5,vjust = -0.5, face = "bold")) +
  scale_y_continuous(labels=scales::percent,expand = c(0, 0))+
  scale_x_discrete(labels=c("12L"="LargeVF", "2L"="SmallVF"),expand = c(0.25, 0))+
  guides(fill=guide_legend(title="Subkingdom"))+ xlab("Sample type")+ ylab("Number of reads (%)")

ggplot(long_raw, aes(fill=Metazoan, y=Reads, x=Type)) + theme_classic()+
  geom_bar(stat="identity",width = 0.9) + 
  scale_fill_manual(values = rev(rawfill))+
  theme(axis.line = element_line(colour = "black", linewidth = 0.2, linetype = "solid"),
        axis.ticks = element_line(colour = "black", linewidth = 0.2, linetype = "solid"),
        legend.text = element_text(size = 15), legend.title = element_text(face = "bold",size=16.5),
        plot.margin = margin(20,10,10,10))+ 
  theme(axis.text.x = element_text(colour="black",size=18,vjust = 0.5, hjust=0.5),
        axis.text.y = element_text(colour="black",size=18, vjust = 0.5, hjust=1)) + 
  theme(axis.title.y = element_text(size=16.5, face = "bold"),
        axis.title.x = element_text(size=16.5,vjust = -0.5, face = "bold")) +
  scale_y_continuous(expand = c(0, 0))+
  scale_x_discrete(labels=c("12L"="LargeVF", "2L"="SmallVF"),expand = c(0.25, 0))+
  guides(fill=guide_legend(title="Subkingdom"))+ xlab("Sample type")+ ylab("Number of reads")

#per zone
ggplot(data=long_raw, aes(y=Reads, x=Type, fill=Metazoan))+
  geom_bar(position="fill",stat="identity",width = 0.98) +
  scale_fill_manual(values = MetazoanFill, labels=c("Not assigned"="Not assigned", "Non-metazoan"="Non-Animalia", "Metazoan"="Animalia")) + 
  theme(axis.line = element_line(colour = "black", linewidth = 0.3, linetype = "solid"), 
        axis.ticks = element_line(colour = "black", linewidth = 0.3, linetype = "solid"), 
        strip.text.x = element_text(size=16, face="bold"), 
        axis.text.x = element_text(colour="black",size=14,vjust = 0.5, hjust=0.5),
        axis.text.y = element_text(colour="black",size=18, vjust = 0.5, hjust=1),
        axis.title.x = element_text(vjust = -3, size=16, face="bold"), 
        axis.title.y = element_text(size=16, face="bold"), 
        plot.margin = margin(10,5,20,20), legend.text = element_text(size = 12), 
        legend.title = element_text(face = "bold", size=16))+
  facet_grid(~Zone, scales = "free_x", space = "free_x")+ 
    scale_x_discrete(labels=c("12L"="LargeVF", "2L"="SmallVF"),expand = c(0.25, 0))+
  #scale_fill_discrete(labels=c("Not assigned"="Not assigned", "Non-metazoan"="Non-Animalia", "Metazoan"="Animalia"))+
  scale_y_continuous(labels=c("0"="0", "0.25"="25", "0.50"="50","0.75"="75", "1.00"="100"),expand = c(0, 0))+
  xlab("Volume")+ ylab("Relative read abundance (%)")

#ggsave(filename = "Meta_nonmeta_zone.png", plot = last_plot(),  width = 40, height = 16, units = "cm", dpi = 600)
?ggsave

#reads assigned to 12L (%)
sum(long_raw$Reads[long_raw$Type=="12L" & long_raw$Metazoan=="Metazoan"])/sum(long_raw$Reads[long_raw$Type=="12L"])

## [1] 0.8565565

#reads assigned to 2L (%)
sum(long_raw$Reads[long_raw$Type=="2L" & long_raw$Metazoan=="Metazoan"])/sum(long_raw$Reads[long_raw$Type=="2L"])

## [1] 0.1137285

#particularly low in SAZ, read %?
#12L
sum(long_raw$Reads[long_raw$Type=="12L" & long_raw$Zone=="SAZ"]) #239846

## [1] 239846

sum(long_raw$Reads[long_raw$Type=="12L" & long_raw$Zone=="SAZ" & long_raw$Metazoan=="Metazoan"]) #204424

## [1] 204424

sum(long_raw$Reads[long_raw$Type=="12L" & long_raw$Zone=="SAZ" & long_raw$Metazoan=="Metazoan"])/sum(long_raw$Reads[long_raw$Type=="12L" & long_raw$Zone=="SAZ"])

## [1] 0.8523136

#2L
sum(long_raw$Reads[long_raw$Type=="2L" & long_raw$Zone=="SAZ"]) #529515

## [1] 529515

sum(long_raw$Reads[long_raw$Type=="2L" & long_raw$Zone=="SAZ" & long_raw$Metazoan=="Metazoan"])/sum(long_raw$Reads[long_raw$Type=="2L" & long_raw$Zone=="SAZ"]) #4383 (0.008%) metazoan

## [1] 0.008277386

1.4 Metazoan data

#filter data to be metazoan only
eDNAdata<-droplevels(subset(long_raw, Subkingdom=="Metazoa", Assignment!="Exclude"))
eDNAdata_wide<-droplevels(subset(raweDNA, Subkingdom=="Metazoa", Assignment!="Exclude"))

sum(eDNAdata$Reads)

## [1] 1139972

#1139972 total metazoan reads, these were assigned to:
sum(eDNAdata$Reads[eDNAdata$Assignment=="Species"]) #1119052 98.16%

## [1] 1119052

sum(eDNAdata$Reads[eDNAdata$Assignment=="Genus"]) #4687

## [1] 4635

sum(eDNAdata$Reads[eDNAdata$Assignment=="Family"]) #15584

## [1] 15584

sum(eDNAdata$Reads[eDNAdata$Assignment=="Order"]) #429

## [1] 429

sum(eDNAdata$Reads[eDNAdata$Assignment=="Class"]) #16

## [1] 16

sum(eDNAdata$Reads[eDNAdata$Assignment=="Phyla"]) #256

## [1] 256

#species assignments
species<-droplevels(subset(eDNAdata_wide, Assignment=="Species"))
species_long<-droplevels(subset(eDNAdata, Assignment=="Species"))
length(unique(species$Species)) #68 species detected

## [1] 68

length(unique(species$Genus)) #from 54 genera

## [1] 54

length(unique(species$Family)) #and 43 families

## [1] 43

length(unique(species$Order)) #and 24  orders

## [1] 24

1.5 t-test between volumes

#test normality
ttest<-eDNAdata %>% select(Sample, Type, Reads) %>% group_by(Sample, Type) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
tapply(ttest$Reads,ttest$Type,shapiro.test) #not normal

## $`12L`
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.71037, p-value = 0.000000000504
## 
## 
## $`2L`
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.53665, p-value = 0.000000000000518

#conduct wilcoxon ttest
wilcox.test(ttest$Reads~ttest$Type, mu=0, alt="two.sided", 
       conf.level=0.95, paired=TRUE) #V = 2077, p-value = 9.8e-12

## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  ttest$Reads by ttest$Type
## V = 2077, p-value = 0.00000000005342
## alternative hypothesis: true location shift is not equal to 0

tapply(ttest$Reads, ttest$Type, median) #11376 for 12, 696 for 2

##   12L    2L 
## 11376   696

#metazoan reads/composition per sample
tapply(ttest$Reads, ttest$Type, mean)

##      12L       2L 
## 15318.63  2219.40

#      12L       2L 
# 15318.63  2219.40 

#overall reads
overall<-long_raw %>% select(Sample, Type, Reads) %>% group_by(Sample, Type) %>% summarise_if(is.numeric, sum, na.rm=TRUE) 
tapply(overall$Reads, overall$Type, mean)

##      12L       2L 
## 17883.97 19514.89

rm(overall, ttest)

#per zone differences
eDNAdata$Type<-as.factor(eDNAdata$Type)
eDNAdata$Zone<-as.factor(eDNAdata$Zone)
wilcoxdat<-eDNAdata %>% select(Sample, Zone, Type, Reads) %>% group_by(Sample, Zone, Type) %>% summarise_if(is.numeric, sum, na.rm=TRUE)

wilcoxdat %>% group_by(Zone) %>% wilcox_test(Reads~Type, paired=TRUE)

## # A tibble: 5 × 8
##   Zone  .y.   group1 group2    n1    n2 statistic         p
## * <fct> <chr> <chr>  <chr>  <int> <int>     <dbl>     <dbl>
## 1 STZ   Reads 12L    2L         6     6        21 0.0313   
## 2 SAZ   Reads 12L    2L        17    17       153 0.0000153
## 3 PFZ   Reads 12L    2L        12    12        78 0.000488 
## 4 AAZ   Reads 12L    2L        24    24       272 0.000175 
## 5 SACCZ Reads 12L    2L         6     6        21 0.0313

1.6 Venn diagram

#make a list of the species found in 12L and 2L samples
Venn12<-subset(species_long, Type=="12L")
Venn2<-subset(species_long, Type=="2L")
#create venn plot
venn_plot <- venn.diagram(list('12L eDNA' = unique(Venn12$Species), '2L eDNA' = unique(Venn2$Species)),
                          fill = alpha(vennfill,0.95), lty = 'blank',fontfamily ="sans", 
                          fontface="bold",cex = 6,cat.dist = c(0.05, 0.05), cat.cex = 1,cat.fontface = "bold", 
                          cat.default.pos = "outer", cat.col=vennfill,rotation.degree=300,cat.pos = c(310,130),
                          cat.fontfamily="sans",scaled=FALSE,filename = NULL,disable.logging = TRUE); 
grid.newpage(); grid.draw(venn_plot)

#proportional
plot(euler(c("LargeVF" = 14, "SmallVF"= 2, "LargeVF&SmallVF"= 51), shape = "ellipse"), 
     fill = vennfill, alpha = 0.6, legend = TRUE, quantities = list(type = c("counts"), font=3, round=2, cex=0.8), labels=list(font=2, cex=1), c("#332288", "#44AA99"), alpha = 0.3, edges=list(lty = 0))

#will have to fix up in photoshop

1.7 Barplot of exclusive species per volume

#establish exclusive species and shared per volume
Exclusive12<-Venn12[!Venn12$Species %in% Venn2$Species,] #exclusive 12
Exclusive12<-Exclusive12 %>% select(Phylum, Species, Reads)
Exclusive12$ID<-"12 L exclusive"
Exclusive12<- Exclusive12 %>% group_by(Phylum, Species, ID) %>% summarise_if(is.numeric, sum, na.rm=TRUE)

Exclusive2<-Venn2[!Venn2$Species %in% Venn12$Species,] #exclusive 2
Exclusive2<-Exclusive2 %>% select(Phylum, Species, Reads)
Exclusive2$ID<-"2 L exclusive"
Exclusive2<- Exclusive2 %>% group_by(Phylum, Species, ID) %>% summarise_if(is.numeric, sum, na.rm=TRUE)

Shared<-Venn12[Venn12$Species %in% Venn2$Species,] #exclusiveshared
Shared<-Shared %>% select(Phylum, Species, Reads)
Shared$ID<-"Shared"
Shared<- Shared %>% group_by(Phylum, Species, ID) %>% summarise_if(is.numeric, sum, na.rm=TRUE)

#merge the together 
exc_sha<-merge(Exclusive12, Exclusive2, all=TRUE) %>% merge(Shared, all=TRUE)
#make PA data
exc_sha$PA<-1

#barplot: no. species
#plot1<-
  ggplot(exc_sha, aes(fill=Phylum, y=PA, x=ID)) + 
  geom_bar(stat="identity", width = 0.95) + scale_fill_manual(values = fill12) + 
  theme(axis.line = element_line(colour = "black", linewidth = 0.4, linetype = "solid"),
        axis.ticks = element_line(colour = "black", linewidth = 0.4, linetype = "solid"), 
        legend.text = element_text(size = 10),legend.background = element_blank(),
       # legend.box.background = element_rect(colour = "black"),legend.position = c(0.89,0.92),
        legend.title = element_text(face="bold", size=13), legend.key.height = unit(0.3, 'cm'),
        legend.key.width = unit(0.4, 'cm'),plot.margin = margin(50,80,20,20))+
  theme(axis.text.x = element_text(colour="black",size=14,vjust = -1),
        axis.text.y = element_text(colour="black",size=16, vjust = -0.5, hjust=1),
        axis.title.y = element_text(face="bold",size=16, vjust = 3),
        axis.title.x = element_text(face="bold",size=16,vjust = -2.5)) + 
  scale_y_continuous(expand = c(0, 0))+ scale_x_discrete(expand = c(0.05,0.05), labels=c("12 L exclusive"= "LargeVF", "2 L exclusive"= "SmallVF-eDNA"))+
  guides(fill=guide_legend(title="Phylum", byrow=TRUE))+ xlab("Detection")+ ylab("Number of species")

#cvdPlot(plot1) #check colour blind appropriate>looks good
#ggsave(filename = "eDNA_excshare.jpeg", plot = last_plot(),  width = 20, height = 14, units = "cm", dpi = 600)

#no. species as percentage
ggplot(exc_sha, aes(fill=Phylum, y=PA, x=ID)) + 
  geom_bar(position = "fill", stat="identity", width = 0.95) + scale_fill_manual(values = fill12) + 
  theme(axis.line = element_line(colour = "black", linewidth = 0.2, linetype = "solid"),
        axis.ticks = element_line(colour = "black", linewidth = 0.2, linetype = "solid"), 
        legend.text = element_text(size = 16), legend.spacing = unit(0.1,"mm"),legend.background = element_blank(),
       # legend.box.background = element_rect(colour = "black"),legend.position = c(0.89,0.92),
        legend.title = element_text(face="bold", size=20), plot.margin = margin(50,80,20,20))+
  theme(axis.text.x = element_text(colour="black",size=18,vjust = -1),
        axis.text.y = element_text(colour="black",size=22, vjust = 0.5, hjust=1),
        axis.title.y = element_text(face="bold",size=22, vjust = 3),
        axis.title.x = element_text(face="bold",size=22,vjust = -3)) + 
    scale_y_continuous(labels=c("0"="0", "0.25"="25", "0.50"="50","0.75"="75", "1.00"="100"),expand = c(0, 0))+
  scale_x_discrete(expand = c(0.05,0.05))+
  guides(fill=guide_legend(title="Phylum", byrow=TRUE))+ xlab("Detection")+ ylab("Unique species composition (%)")

#reads assigned to LargeVF samples
sum(exc_sha$Reads[exc_sha$ID=="12 L exclusive"])/sum(eDNAdata$Reads[eDNAdata$Type=="12L" & eDNAdata$Assignment=="Species"])

## [1] 0.0007965421

#tidy environment
rm(Exclusive12, Exclusive2, Shared)

#zotus assigned to species per volume
length(unique(eDNAdata$zOTU[eDNAdata$Type=="12L" & eDNAdata$Assignment=="Species"]))/length(unique(eDNAdata$zOTU[eDNAdata$Type=="12L"])) #78.07

## [1] 0.7806452

length(unique(eDNAdata$zOTU[eDNAdata$Type=="2L" & eDNAdata$Assignment=="Species"]))/length(unique(eDNAdata$zOTU[eDNAdata$Type=="2L"])) #76.95

## [1] 0.7695167

1.8 Richness comparisons

#select data needed
Richness<-species_long %>% select(Type, Sample, Species, Reads)
Richness<-Richness %>% group_by(Type, Sample, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
Richness$PA<-1
Richness<-Richness %>% select(Type, Sample, PA) %>% group_by(Type, Sample) %>% summarise_if(is.numeric, sum, na.rm=TRUE)

#t test
tapply(Richness$PA, Richness$Type, hist) #histogram looks pretty good no big skew

## $`12L`
## $breaks
##  [1]  2  4  6  8 10 12 14 16 18 20 22 24
## 
## $counts
##  [1]  2  5 15 16 13  8  3  2  0  0  1
## 
## $density
##  [1] 0.015384615 0.038461538 0.115384615 0.123076923 0.100000000 0.061538462
##  [7] 0.023076923 0.015384615 0.000000000 0.000000000 0.007692308
## 
## $mids
##  [1]  3  5  7  9 11 13 15 17 19 21 23
## 
## $xname
## [1] "X[[i]]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## 
## $`2L`
## $breaks
## [1]  2  4  6  8 10 12 14 16 18
## 
## $counts
## [1] 10 13 12 10 11  6  2  1
## 
## $density
## [1] 0.076923077 0.100000000 0.092307692 0.076923077 0.084615385 0.046153846
## [7] 0.015384615 0.007692308
## 
## $mids
## [1]  3  5  7  9 11 13 15 17
## 
## $xname
## [1] "X[[i]]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"

tapply(Richness$PA, Richness$Type, shapiro.test) #not completely normal but histogram looks ok

## $`12L`
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.93593, p-value = 0.002246
## 
## 
## $`2L`
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.97003, p-value = 0.1159

t.test(Richness$PA~Richness$Type, alternative = "two.sided", conf=0.95, paired = T) #t-test

## 
##  Paired t-test
## 
## data:  Richness$PA by Richness$Type
## t = 4.8254, df = 64, p-value = 0.000009024
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.9556194 2.3059190
## sample estimates:
## mean difference 
##        1.630769

#mean and standard error
tapply(Richness$PA, Richness$Type, mean)

##      12L       2L 
## 10.03077  8.40000

standard_error <- function(x) sd(x) / sqrt(length(x)) #standard error function for mean
standard_error(Richness$PA[Richness$Type=="12L"]) #0.43

## [1] 0.4412445

standard_error(Richness$PA[Richness$Type=="2L"]) #0.43

## [1] 0.4367714

1.9 Detection frequency: species assignments

SpeciesFreq<-species_long %>% select(Type, Sample, Phylum, Class, Order, Family, Genus, Species, Reads) #select data
SpeciesFreq<-SpeciesFreq %>% group_by(Type, Sample, Phylum, Class, Order, Family, Genus, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE) #summarise
SpeciesFreq$FOO<-1 #add frequency of occurence column
SpeciesFreq<-SpeciesFreq %>% group_by(Type, Phylum, Class, Order, Family, Genus, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE) #summarise without sample to add all the species FOO together
SpeciesFreq

## # A tibble: 119 × 9
## # Groups:   Type, Phylum, Class, Order, Family, Genus [96]
##    Type  Phylum     Class        Order        Family  Genus Species  Reads   FOO
##    <chr> <chr>      <chr>        <chr>        <chr>   <chr> <chr>    <dbl> <dbl>
##  1 12L   Annelida   Polychaeta   Phyllodocida Polyno… Harm… Harmot…  14054     1
##  2 12L   Annelida   Polychaeta   Phyllodocida Polyno… Poly… Polyeu…  25286     2
##  3 12L   Arthropoda Branchiopoda Diplostraca  Podoni… Evad… Evadne…    291     4
##  4 12L   Arthropoda Branchiopoda Diplostraca  Podoni… Podon Podon …     44     2
##  5 12L   Arthropoda Copepoda     Calanoida    Acarti… Para… Parala…    255     1
##  6 12L   Arthropoda Copepoda     Calanoida    Calani… Cala… Calano…   2720    35
##  7 12L   Arthropoda Copepoda     Calanoida    Calani… Cala… Calanu…   2226     3
##  8 12L   Arthropoda Copepoda     Calanoida    Calani… Cala… Calanu…  25349    30
##  9 12L   Arthropoda Copepoda     Calanoida    Calani… Cala… Calanu… 232205    39
## 10 12L   Arthropoda Copepoda     Calanoida    Calani… Cten… Ctenoc… 322253    52
## # ℹ 109 more rows

sum(SpeciesFreq$Reads[SpeciesFreq$Type=="2L" & SpeciesFreq$Species=="Ctenocalanus citer"])/sum(SpeciesFreq$Reads[SpeciesFreq$Type=="2L"])

## [1] 0.4241434

sum(SpeciesFreq$Reads[SpeciesFreq$Type=="12L" & SpeciesFreq$Species=="Ctenocalanus citer"])/sum(SpeciesFreq$Reads[SpeciesFreq$Type=="12L"])

## [1] 0.3286659

#plot FOO and reads in grid form
species_freqplot<-
  plot_grid(nrow = 1, ncol = 3, labels = c("A", "B", "C"),label_size = 25,
            ggplot(SpeciesFreq, aes(fill=Phylum, y=FOO, x=Type)) + 
              geom_bar(stat="identity", width = 0.95) + scale_fill_manual(values = fill12) + 
              theme(axis.line = element_line(colour = "black", linewidth = 0.4, linetype = "solid"),
                    axis.ticks = element_line(colour = "black", linewidth = 0.4, linetype = "solid"), 
                    legend.position="none", plot.margin = margin(50,80,20,20))+
              theme(axis.text.x = element_text(colour="black",size=14,vjust = -1),
                    axis.text.y = element_text(colour="black",size=16, vjust = -0.5, hjust=1),
                    axis.title.y = element_text(face="bold",size=16, vjust = 3),
                    axis.title.x = element_text(face="bold",size=16,vjust = -2.5)) + 
              scale_y_continuous(expand = c(0, 0))+ scale_x_discrete(expand = c(0.05,0.05), labels=c("12L"= "LargeVF", "2L"= "SmallVF"))+
              guides(fill=guide_legend(title="Phylum", byrow=TRUE))+ xlab("Detection")+ ylab("Species detection frequency (FOO)")
                   ,
            ggplot(SpeciesFreq, aes(fill=Phylum, y=Reads, x=Type)) + 
              geom_bar(stat="identity", width = 0.95) + scale_fill_manual(values = fill12) + 
              theme(axis.line = element_line(colour = "black", linewidth = 0.4, linetype = "solid"),
                    axis.ticks = element_line(colour = "black", linewidth = 0.4, linetype = "solid"), 
                    legend.position="none", plot.margin = margin(50,80,20,20))+
              theme(axis.text.x = element_text(colour="black",size=14,vjust = -1),
                    axis.text.y = element_text(colour="black",size=16, vjust = -0.5, hjust=1),
                    axis.title.y = element_text(face="bold",size=16, vjust = 3),
                    axis.title.x = element_text(face="bold",size=16,vjust = -2.5)) + 
              scale_y_continuous(expand = c(0, 0))+ scale_x_discrete(expand = c(0.05,0.05), labels=c("12L"= "LargeVF", "2L"= "SmallVF"))+
              guides(fill=guide_legend(title="Phylum", byrow=TRUE))+ xlab("Detection")+ ylab("Reads")
            ,
            ggplot(SpeciesFreq, aes(fill=Phylum, y=Reads, x=Type)) + 
              geom_bar(position = "fill", stat="identity", width = 0.95) + scale_fill_manual(values = fill12) + 
              theme(axis.line = element_line(colour = "black", linewidth = 0.4, linetype = "solid"),
                    axis.ticks = element_line(colour = "black", linewidth = 0.4, linetype = "solid"), 
                    legend.position="none", plot.margin = margin(50,80,20,20))+
              theme(axis.text.x = element_text(colour="black",size=14,vjust = -1),
                    axis.text.y = element_text(colour="black",size=16, vjust = -0.5, hjust=1),
                    axis.title.y = element_text(face="bold",size=16, vjust = 3),
                    axis.title.x = element_text(face="bold",size=16,vjust = -2.5)) + 
              scale_y_continuous(labels=c("0"="0", "0.25"="25", "0.50"="50","0.75"="75", "1.00"="100"), expand = c(0, 0))+ scale_x_discrete(expand = c(0.05,0.05), labels=c("12L"= "LargeVF", "2L"= "SmallVF"))+
              guides(fill=guide_legend(title="Phylum", byrow=TRUE))+ xlab("Detection")+ ylab("Read abundance (%)"))

#take the legend from one plot (code above just pasted here, need legend for plot_grid)
legend<-ggplot(SpeciesFreq, aes(fill=Phylum, y=Reads, x=Type)) + 
  geom_bar(position = "fill", stat="identity", width = 0.95) + scale_fill_manual(values = fill12) 
legend<-get_legend(legend) 

#plot heatmap and legend together
plot_grid(species_freqplot, legend, rel_widths = c(3, .3))

rm(legend)

#export data
##write.csv(SpeciesFreq, "SpeciesFrequency.csv")

1.10 Diversity across zones

#select data and convert abundance to PA
diversity<-species_long %>% select(-Comment, -PercentID, -zOTU, -Metazoan) %>% group_by(Type, Zone, Phylum, Class, Order, Family, Genus, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
diversity$FOO[diversity$Reads>0]<-1

#order zones
diversity$Zone<-factor(diversity$Zone, levels = c("STZ", "SAZ", "PFZ", "AAZ", "SACCZ"))

ggplot(data=diversity, aes(fill=Phylum, y=FOO, x=Type)) +
  geom_bar(stat="identity", width = 0.97) + 
  scale_fill_manual(values = fill12) +
  theme(axis.line = element_line(colour = "black", size = 0.4, linetype = "solid"),
        axis.ticks = element_line(colour = "black", size = 0.4, linetype = "solid"), 
        axis.text.x = element_text(size=11,vjust = 0.5, hjust=0.5, colour="black"),
        axis.text.y = element_text(size=16, vjust = 0.5, hjust=1,colour="black"),
        axis.title.y = element_text(face="bold", size=15, vjust=3),
        strip.text.x = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(face="bold", size=13,vjust = -2),
        legend.text = element_text(size = 10), legend.title = element_text(size=13, face="bold"),
        legend.key.height = unit(0.2, 'cm'), legend.key.width = unit(0.3, 'cm'),
        plot.margin =  margin(20,20,30,20)) + scale_y_continuous(expand = c(0, 0))+ 
  facet_grid(~Zone, switch = "x", scales = "free_x", space = "free_x")+ 
  theme(panel.spacing = unit(0.65, "cm"))+
  scale_x_discrete(labels=stringr::str_wrap(c("12L"="LargeVF", "2L"="SmallVF"),width=5), expand = c(0, 0))+
  guides(fill=guide_legend(byrow = TRUE, title="Phylum"))+
  labs(x="Detection methods across Southern Ocean zones", y="Species diversity")

#ggsave(filename = "CPR_eDNA_diversityzones.jpeg", plot = last_plot(),  width = 24, height = 14, units = "cm", dpi = 600)

#as table
with(species_long, table(Phylum, Zone, Type))

## , , Type = 12L
## 
##                Zone
## Phylum           STZ  SAZ  PFZ  AAZ SACCZ
##   Annelida         0    0    0    0     8
##   Arthropoda     483 1450  298  665    53
##   Bryozoa         12    9    3   20     4
##   Chaetognatha     0    3    1    3     0
##   Chordata         2   10    3    6     0
##   Cnidaria        39   27   11   50     6
##   Echinodermata    7    4    0    4     3
##   Mollusca         4    3   12   23     0
##   Nemertea         0    0    0    0     0
##   Porifera         0    0    0    0     0
## 
## , , Type = 2L
## 
##                Zone
## Phylum           STZ  SAZ  PFZ  AAZ SACCZ
##   Annelida         0    0    0    0     4
##   Arthropoda      61  146  139  398    24
##   Bryozoa         16   23    3   29    11
##   Chaetognatha     0    1    2    3     0
##   Chordata         1    1    0    2     0
##   Cnidaria        35   29   20   98    17
##   Echinodermata    6    0    0    1     1
##   Mollusca         2    2    8   25     1
##   Nemertea         0    0    0    0     1
##   Porifera         2    0    0    1     0

1.11 Hills numbers

1.11.1 Plot

#merge species assignments by species (i.e unique)
speciesmerged<-species %>% select(-zOTU, -Comment, -PercentID, -Assignment)
speciesmerged<-speciesmerged %>% group_by(Phylum, Class, Order, Family, Genus, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
#12L only
species12<-cbind(speciesmerged[1:6], speciesmerged[as.character(samples$Sample[samples$Type=="12L"])])
species12<-species12[rowSums(species12[,7:71])!=0,]
#2L only
species2<-cbind(speciesmerged[1:6], speciesmerged[as.character(samples$Sample[samples$Type=="2L"])])
species2<-species2[rowSums(species2[,7:71])!=0,]

#eDNA 12L hills data
hill_0 <- hill_div(species12[7:71], qvalue = 0)
hill_1 <- hill_div(species12[7:71], qvalue = 1)
hill_2 <- hill_div(species12[7:71], qvalue = 2)
hill12 <- cbind("Sample"=names(hill_0), "Type"="12L", hill_0, hill_1, hill_2) #keep names to check the samples are correct
hill12<-merge(samples[samples$Type=="12L",], hill12, all=TRUE)
hill12 <- hill12 %>% mutate_at(c('hill_0', 'hill_1', 'hill_2'), as.numeric)

#2L hills data
hill_0 <- hill_div(species2[7:71], qvalue = 0)
hill_1 <- hill_div(species2[7:71], qvalue = 1)
hill_2 <- hill_div(species2[7:71], qvalue = 2)
hill2 <- cbind("Sample"=names(hill_0),"Type"="2L", hill_0, hill_1, hill_2) #keep names to check the samples are correct
hill2<-merge(samples[samples$Type=="2L",], hill2, all=TRUE)
hill2 <- hill2 %>% mutate_at(c('hill_0', 'hill_1', 'hill_2'), as.numeric)

#merge together
hills<-merge(hill12, hill2, all=TRUE)
hills$Zone<-factor(hills$Zone, levels=c("STZ", "SAZ", "PFZ","AAZ", "SACCZ"))

#seperate plots for each hills number
plothill_0 <- violin.plot(hills, ydata = hills$hill_0, col = vennfill, fill = vennfill, ylab = "Hills number (q = 0)", ylow = 0, yhigh = 26)+ theme_light()+
  theme(plot.margin = unit(c(1,1,1,1), "cm"), strip.text.x = element_text(size=16, face="bold", colour="black"), legend.text = element_text(size = 14), legend.title = element_text(face="bold", size=18))+
  theme(axis.text.x = element_blank(),axis.text.y = element_text(size=16, colour="black"),axis.title.y = element_text(size=15, hjust=0.7,vjust=4, face="bold"))+ facet_grid(~Zone, scales = "free_x", space = "free_x")
 
plothill_1 <- violin.plot(hills, ydata = hills$hill_1, col = vennfill, fill = vennfill, ylab = "Hills number (q = 1)", ylow = 0, yhigh = 6)+ theme_light()+
  theme(plot.margin = unit(c(1,1,1,1), "cm"), strip.text.x = element_text(size=16, face="bold", colour="black"), legend.text = element_text(size = 14), legend.title = element_text(face="bold", size=18))+
  theme(axis.text.x = element_blank(),axis.text.y = element_text(size=16, colour="black"),axis.title.y = element_text(size=15, hjust=0.7,vjust=4, face="bold"))+ facet_grid(~Zone, scales = "free_x", space = "free_x")

plothill_2 <- violin.plot(hills, ydata = hills$hill_2, col = vennfill, fill = vennfill, ylab = "Hills number (q = 2)", ylow = 0, yhigh = 6)+ theme_light()+
  theme(plot.margin = unit(c(1,1,1,1), "cm"), axis.text.x = element_blank(), strip.text.x = element_text(size=16, face="bold", colour="black"), legend.text = element_text(size = 14), legend.title = element_text(face="bold", size=18))+
  theme(axis.text.y = element_text(size=16, colour="black"),axis.title.y = element_text(size=15, hjust=0.7,vjust=4, face="bold"))+ facet_grid(~Zone, scales = "free_x", space = "free_x")

#combine into one plot and...plot
hillsplot<-plot_grid(plothill_0, plothill_1, plothill_2, ncol=1, labels = c("A", "B", "C"), label_size = 25)
plot(hillsplot)

#tidy env
rm(hill_0, hill_1, hill_2)

1.11.2 Summary stats & t-tests

#select hills numbers
hillsummstat<-hills %>% select(Type, Zone, hill_0, hill_1, hill_2)
#summary stats function
hillsummstat<-summaryBy(hillsummstat[,3:5]~Zone & Type,data=hillsummstat,FUN=function(x) 
  + {c(mean=mean(x),sum=sum(x),se=sd(x)/sqrt(length(x)))})
#pivot table longer
hillsummstat<-hillsummstat %>% pivot_longer(!c(Zone, Type), names_to = c("hill",".value"),  names_pattern = "(\\d*).([A-Za-z]+)", names_prefix = "hill_")
print(hillsummstat, n=30)

## # A tibble: 30 × 6
##    Zone  Type  hill   mean    sum    se
##    <fct> <chr> <chr> <dbl>  <dbl> <dbl>
##  1 STZ   12L   0     12.7   76    2.79 
##  2 STZ   12L   1      2.51  15.1  0.600
##  3 STZ   12L   2      1.89  11.3  0.371
##  4 STZ   2L    0      8.17  49    2.36 
##  5 STZ   2L    1      2.62  15.7  0.665
##  6 STZ   2L    2      1.89  11.4  0.378
##  7 SAZ   12L   0      8    136    0.569
##  8 SAZ   12L   1      2.46  41.8  0.294
##  9 SAZ   12L   2      2.01  34.2  0.219
## 10 SAZ   2L    0      5.76  98    0.511
## 11 SAZ   2L    1      2.95  50.2  0.347
## 12 SAZ   2L    2      2.34  39.8  0.278
## 13 PFZ   12L   0      9.92 119    0.434
## 14 PFZ   12L   1      2.81  33.7  0.216
## 15 PFZ   12L   2      2.26  27.1  0.177
## 16 PFZ   2L    0      7.83  94    0.458
## 17 PFZ   2L    1      2.65  31.8  0.314
## 18 PFZ   2L    2      2.09  25.1  0.279
## 19 AAZ   12L   0     11.9  285    0.539
## 20 AAZ   12L   1      2.81  67.4  0.221
## 21 AAZ   12L   2      2.05  49.2  0.164
## 22 AAZ   2L    0     11    264    0.571
## 23 AAZ   2L    1      2.87  68.9  0.252
## 24 AAZ   2L    2      2.20  52.8  0.202
## 25 SACCZ 12L   0      6     36    0.632
## 26 SACCZ 12L   1      1.43   8.58 0.171
## 27 SACCZ 12L   2      1.25   7.50 0.139
## 28 SACCZ 2L    0      6.83  41    1.28 
## 29 SACCZ 2L    1      2.54  15.2  0.325
## 30 SACCZ 2L    2      2.05  12.3  0.286

1.11.3 Richness (q=0)

#test for normality
#2L
tapply(hill2$hill_0, hill2$Zone, shapiro.test) #not normal in SACCZ

## $AAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.92594, p-value = 0.07914
## 
## 
## $PFZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.936, p-value = 0.4481
## 
## 
## $SACCZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.73312, p-value = 0.01355
## 
## 
## $SAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.97841, p-value = 0.9413
## 
## 
## $STZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.7975, p-value = 0.0558

tapply(hill2$hill_0[hill2$Zone=="SACCZ"], hill2$Zone[hill2$Zone=="SACCZ"], hist) #hist bimodal so will not become normal

## $SACCZ
## $breaks
## [1]  4  6  8 10
## 
## $counts
## [1] 3 0 3
## 
## $density
## [1] 0.25 0.00 0.25
## 
## $mids
## [1] 5 7 9
## 
## $xname
## [1] "X[[i]]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"

#12L
tapply(hill12$hill_0, hill12$Zone, shapiro.test) #Not normal in saccz

## $AAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.9799, p-value = 0.894
## 
## 
## $PFZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.90053, p-value = 0.1611
## 
## 
## $SACCZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.72072, p-value = 0.01014
## 
## 
## $SAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.9633, p-value = 0.694
## 
## 
## $STZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.8616, p-value = 0.1947

tapply(hill12$hill_0[hill12$Zone=="SACCZ"], hill12$Zone[hill12$Zone=="SACCZ"], hist) #skew but will do wilcox rather than transform and interpret

## $SACCZ
## $breaks
## [1] 3 4 5 6 7
## 
## $counts
## [1] 1 0 2 3
## 
## $density
## [1] 0.1666667 0.0000000 0.3333333 0.5000000
## 
## $mids
## [1] 3.5 4.5 5.5 6.5
## 
## $xname
## [1] "X[[i]]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"

hills %>% group_by(Zone) %>% t_test(hill_0~Type, paired=TRUE)  #default conf 0.95 and two sided

## # A tibble: 5 × 9
##   Zone  .y.    group1 group2    n1    n2 statistic    df         p
## * <fct> <chr>  <chr>  <chr>  <int> <int>     <dbl> <dbl>     <dbl>
## 1 STZ   hill_0 12L    2L         6     6     3.25      5 0.0227   
## 2 SAZ   hill_0 12L    2L        17    17     3.41     16 0.0036   
## 3 PFZ   hill_0 12L    2L        12    12     6.20     11 0.0000674
## 4 AAZ   hill_0 12L    2L        24    24     1.68     23 0.107    
## 5 SACCZ hill_0 12L    2L         6     6    -0.881     5 0.419

hills %>% group_by(Zone) %>% wilcox_test(hill_0~Type, paired=TRUE)

## # A tibble: 5 × 8
##   Zone  .y.    group1 group2    n1    n2 statistic       p
## * <fct> <chr>  <chr>  <chr>  <int> <int>     <dbl>   <dbl>
## 1 STZ   hill_0 12L    2L         6     6        21 0.0355 
## 2 SAZ   hill_0 12L    2L        17    17        94 0.00932
## 3 PFZ   hill_0 12L    2L        12    12        66 0.00355
## 4 AAZ   hill_0 12L    2L        24    24       115 0.203  
## 5 SACCZ hill_0 12L    2L         6     6         6 0.395

#significantly differs in the PFZ, SAZ, and STZ

1.11.4 Shannons diversity (q=1)

#test for normality
#2L
tapply(hill2$hill_1, hill2$Zone, shapiro.test) #not normal in STZ or PFZ

## $AAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.95572, p-value = 0.3586
## 
## 
## $PFZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.77634, p-value = 0.005106
## 
## 
## $SACCZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.89128, p-value = 0.325
## 
## 
## $SAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.92127, p-value = 0.1552
## 
## 
## $STZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.77093, p-value = 0.03168

tapply(hill2$hill_1[hill2$Zone=="STZ"], hill2$Zone[hill2$Zone=="STZ"], hist) #bimodal so will not become normal

## $STZ
## $breaks
## [1] 1 2 3 4 5 6
## 
## $counts
## [1] 4 0 0 1 1
## 
## $density
## [1] 0.6666667 0.0000000 0.0000000 0.1666667 0.1666667
## 
## $mids
## [1] 1.5 2.5 3.5 4.5 5.5
## 
## $xname
## [1] "X[[i]]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"

tapply(hill2$hill_1[hill2$Zone=="PFZ"], hill2$Zone[hill2$Zone=="PFZ"], hist) #also fairly blocked

## $PFZ
## $breaks
## [1] 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0
## 
## $counts
## [1] 3 4 3 0 0 0 2
## 
## $density
## [1] 0.5000000 0.6666667 0.5000000 0.0000000 0.0000000 0.0000000 0.3333333
## 
## $mids
## [1] 1.75 2.25 2.75 3.25 3.75 4.25 4.75
## 
## $xname
## [1] "X[[i]]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"

#12L
tapply(hill12$hill_1, hill12$Zone, shapiro.test) #not normal in AAZ

## $AAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.90625, p-value = 0.02927
## 
## 
## $PFZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.95267, p-value = 0.6762
## 
## 
## $SACCZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.87997, p-value = 0.2689
## 
## 
## $SAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.89411, p-value = 0.05419
## 
## 
## $STZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.87167, p-value = 0.233

tapply(hill12$hill_1[hill12$Zone=="AAZ"], hill12$Zone[hill12$Zone=="AAZ"], hist) #will conduct wilcox for not normal zones rather than transform and interpret

## $AAZ
## $breaks
## [1] 1 2 3 4 5 6
## 
## $counts
## [1]  3 14  3  2  2
## 
## $density
## [1] 0.12500000 0.58333333 0.12500000 0.08333333 0.08333333
## 
## $mids
## [1] 1.5 2.5 3.5 4.5 5.5
## 
## $xname
## [1] "X[[i]]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"

hills %>% group_by(Zone) %>% t_test(hill_1~Type, paired=TRUE)  #default conf 0.95 and two sided

## # A tibble: 5 × 9
##   Zone  .y.    group1 group2    n1    n2 statistic    df      p
## * <fct> <chr>  <chr>  <chr>  <int> <int>     <dbl> <dbl>  <dbl>
## 1 STZ   hill_1 12L    2L         6     6    -0.718     5 0.505 
## 2 SAZ   hill_1 12L    2L        17    17    -1.57     16 0.137 
## 3 PFZ   hill_1 12L    2L        12    12     0.445    11 0.665 
## 4 AAZ   hill_1 12L    2L        24    24    -0.195    23 0.847 
## 5 SACCZ hill_1 12L    2L         6     6    -2.33      5 0.0676

hills %>% group_by(Zone) %>% wilcox_test(hill_1~Type, paired=TRUE)

## # A tibble: 5 × 8
##   Zone  .y.    group1 group2    n1    n2 statistic      p
## * <fct> <chr>  <chr>  <chr>  <int> <int>     <dbl>  <dbl>
## 1 STZ   hill_1 12L    2L         6     6         6 0.438 
## 2 SAZ   hill_1 12L    2L        17    17        40 0.0887
## 3 PFZ   hill_1 12L    2L        12    12        45 0.677 
## 4 AAZ   hill_1 12L    2L        24    24       154 0.922 
## 5 SACCZ hill_1 12L    2L         6     6         3 0.156

#no significant differences

1.11.5 Simspons diversity (q=2)

#test for normality
#2L
tapply(hill2$hill_2, hill2$Zone, shapiro.test) #only normal in SACCZ conduct wilcox

## $AAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.89926, p-value = 0.02076
## 
## 
## $PFZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.75954, p-value = 0.003368
## 
## 
## $SACCZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.95563, p-value = 0.7855
## 
## 
## $SAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.89048, p-value = 0.04718
## 
## 
## $STZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.76202, p-value = 0.02605

#12L
tapply(hill12$hill_2, hill12$Zone, shapiro.test) #will conduct wilcox as above aren't normal

## $AAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.8823, p-value = 0.009249
## 
## 
## $PFZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.91429, p-value = 0.242
## 
## 
## $SACCZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.7618, p-value = 0.02593
## 
## 
## $SAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.87718, p-value = 0.02863
## 
## 
## $STZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.87408, p-value = 0.243

hills %>% group_by(Zone) %>% t_test(hill_2~Type, paired=TRUE)  #default conf 0.95 and two sided

## # A tibble: 5 × 9
##   Zone  .y.    group1 group2    n1    n2 statistic    df     p
## * <fct> <chr>  <chr>  <chr>  <int> <int>     <dbl> <dbl> <dbl>
## 1 STZ   hill_2 12L    2L         6     6  -0.00345     5 0.997
## 2 SAZ   hill_2 12L    2L        17    17  -1.30       16 0.211
## 3 PFZ   hill_2 12L    2L        12    12   0.523      11 0.612
## 4 AAZ   hill_2 12L    2L        24    24  -0.588      23 0.562
## 5 SACCZ hill_2 12L    2L         6     6  -1.97        5 0.106

hills %>% group_by(Zone) %>% wilcox_test(hill_2~Type, paired=TRUE)

## # A tibble: 5 × 8
##   Zone  .y.    group1 group2    n1    n2 statistic     p
## * <fct> <chr>  <chr>  <chr>  <int> <int>     <dbl> <dbl>
## 1 STZ   hill_2 12L    2L         6     6        10 1    
## 2 SAZ   hill_2 12L    2L        17    17        43 0.12 
## 3 PFZ   hill_2 12L    2L        12    12        46 0.622
## 4 AAZ   hill_2 12L    2L        24    24       132 0.623
## 5 SACCZ hill_2 12L    2L         6     6         3 0.156

#no significant differences

1.12 Indicator species analysis

1.12.1 12L

#12L PA
indic_12L<-t(species12[7:71])
indic_12L[indic_12L>0]<-1 #convert to PA
colnames(indic_12L)<-species12$Species
#indic_12L<-multipatt(indic_12L, samples$Zone[samples$Type=="12L"], func = "IndVal.g",duleg = FALSE, control = how(nperm=9999))
#summary(indic_12L, alpha=0.05, digits=3, too.many=100, indvalcomp=TRUE, sort=TRUE)

1.12.2 2L

#PA
indic_2L<-t(species2[7:71])
indic_2L[indic_2L>0]<-1 #convert to PA
colnames(indic_2L)<-species2$Species
#indic_2L<-multipatt(indic_2L, samples$Zone[samples$Type=="2L"], func = "IndVal.g",duleg = FALSE, control = how(nperm=9999))
#summary(indic_2L, alpha=0.05, digits=3, too.many=100, indvalcomp=TRUE, sort=TRUE)

1.13 Potential invasive/pest identification

1.13.1 Plot

#select species identified as invasive, pest, or biofouling
Invasives<-species %>% filter(str_detect(Comment, c("^I")))

#shorten lat
samples$Lat<-format(round(as.numeric(samples$Latitude), 3), nsmall=3) 
#samples$Sample1<-gsub('V1_', '', samples$Sample) #remove V1 from sample
samples$Lat<-paste(samples$Sample, samples$Lat, sep="_")
#summarise data
Invasives<-Invasives %>% select(-zOTU, -PercentID, -Assignment, -Comment) %>% group_by(Phylum, Class, Order, Family, Genus, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
#assign status
Invasives$Status<-if_else(Invasives$Genus=="Amphinema" | Invasives$Genus=="Lizzia" | Invasives$Genus=="Austrominius" | Invasives$Genus=="Coryne" | Invasives$Genus=="Clytia", "Unclear", "NA")
Invasives$Status<-if_else(Invasives$Genus=="Asterias" | Invasives$Genus=="Mytilus", "Invasive to Tasmania", Invasives$Status)
Invasives$Status<-if_else(Invasives$Genus=="Obelia" | Invasives$Genus=="Metacarcinus" |Invasives$Genus=="Bougainvillia" | Invasives$Genus=="Watersipora" | Invasives$Genus=="Patiriella" | Invasives$Genus=="Bugulina" | Invasives$Genus=="Cryptosula", "Introduced to Tasmania", Invasives$Status) 
Invasives$Status<-if_else(Invasives$Genus=="Arachnopusia" | Invasives$Genus=="Ostrea", "Native in Tasmania", Invasives$Status)

#long format for ggplot
LongInvasives<-pivot_longer(Invasives, cols = starts_with("V"), names_to = "Sample", values_to = "Reads")
LongInvasives<-merge(LongInvasives, samples[samples$Method=="eDNA",], all=TRUE)
LongInvasives<-LongInvasives %>% select(Zone, Sample, eDNA_Pair, Type, Lat, Genus, Species, Status, Reads)
#write.xlsx(as.data.frame(LongInvasives), file="OverallInvasivesData.xlsx", sheetName = "RawData") #export raw
LongInvasives$Reads[LongInvasives$Reads=="0"]<-NA #change 0's to NA>nicer on plot

#plot individually for proportion
Amphinema<-droplevels(subset(LongInvasives, Genus=="Amphinema"))
sum(Amphinema$Reads, na.rm=T)

## [1] 18

Amphinema$Proportion<-(Amphinema$Reads/18)*100 #calculation proportion reads per sample

Arach<-droplevels(subset(LongInvasives, Genus=="Arachnopusia"))
sum(Arach$Reads, na.rm=T)

## [1] 29

Arach$Proportion<-(Arach$Reads/29)*100 #calculation proportion reads per sample

Aster<-droplevels(subset(LongInvasives, Genus=="Asterias"))
sum(Aster$Reads, na.rm=T)

## [1] 78

Aster$Proportion<-(Aster$Reads/78)*100 #calculation proportion reads per sample

Austrominius<-droplevels(subset(LongInvasives, Genus=="Austrominius"))
sum(Austrominius$Reads, na.rm=T)

## [1] 121

Austrominius$Proportion<-(Austrominius$Reads/121)*100 #calculation proportion reads per sample

Bougainvillia<-droplevels(subset(LongInvasives, Genus=="Bougainvillia"))
sum(Bougainvillia$Reads, na.rm=T)

## [1] 1567

Bougainvillia$Proportion<-(Bougainvillia$Reads/1567)*100 #calculation proportion reads per sample

BugFlab<-droplevels(subset(LongInvasives, Genus=="Bugulina"))
sum(BugFlab$Reads, na.rm=T)

## [1] 72755

BugFlab$Proportion<-(BugFlab$Reads/72755)*100 #calculation proportion reads per sample

Clytia<-droplevels(subset(LongInvasives, Genus=="Clytia"))
sum(Clytia$Reads, na.rm=T)

## [1] 20

Clytia$Proportion<-(Clytia$Reads/20)*100 #calculation proportion reads per sample

Coryne<-droplevels(subset(LongInvasives, Genus=="Coryne"))
sum(Coryne$Reads, na.rm=T)

## [1] 5908

Coryne$Proportion<-(Coryne$Reads/5908)*100 #calculation proportion reads per sample

Cryp<-droplevels(subset(LongInvasives, Genus=="Cryptosula"))
sum(Cryp$Reads, na.rm=T)

## [1] 69

Cryp$Proportion<-(Cryp$Reads/69)*100 #calculation proportion reads per sample

Lizzia<-droplevels(subset(LongInvasives, Genus=="Lizzia"))
sum(Lizzia$Reads, na.rm=T)

## [1] 287

Lizzia$Proportion<-(Lizzia$Reads/287)*100 #calculation proportion reads per sample

Metacarcinus<-droplevels(subset(LongInvasives, Genus=="Metacarcinus"))
sum(Metacarcinus$Reads, na.rm=T)

## [1] 5

Metacarcinus$Proportion<-(Metacarcinus$Reads/5)*100 #calculation proportion reads per sample
#low read numbers: the sample had 16130 reads>using species specific would be better but impressive to still detect low read numbers

Mytilus<-droplevels(subset(LongInvasives, Genus=="Mytilus"))
sum(Mytilus$Reads, na.rm=T)

## [1] 22

Mytilus$Proportion<-(Mytilus$Reads/22)*100 #calculation proportion reads per sample

Obelia<-droplevels(subset(LongInvasives, Genus=="Obelia"))
sum(Obelia$Reads, na.rm=T)

## [1] 1143

Obelia$Proportion<-(Obelia$Reads/1143)*100 #calculation proportion reads per sample

Ostrea<-droplevels(subset(LongInvasives, Genus=="Ostrea"))
sum(Ostrea$Reads, na.rm=T)

## [1] 314

Ostrea$Proportion<-(Ostrea$Reads/314)*100 #calculation proportion reads per sample

Patiriella<-droplevels(subset(LongInvasives, Genus=="Patiriella"))
sum(Patiriella$Reads, na.rm=T)

## [1] 26

Patiriella$Proportion<-(Patiriella$Reads/26)*100 #calculation proportion reads per sample

Water<-droplevels(subset(LongInvasives, Genus=="Watersipora"))
sum(Water$Reads, na.rm=T)

## [1] 154

Water$Proportion<-(Water$Reads/154)*100 #calculation proportion reads per sample


#plot time!
#figures not displayed as they warp
ggplot(Amphinema,aes(x=factor(Amphinema$eDNA_Pair, levels=unique(Amphinema$eDNA_Pair)), y=Type))+
  geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
  theme_bw() +
  theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
        panel.border = element_blank(),
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        panel.grid.major.y = element_line(colour="#eeeeee"),
        axis.text.x = element_blank())+
  scale_x_discrete(expand = c(0.03, 0))+
  scale_colour_manual(values=InvasivesCol)+
  scale_fill_manual(values=InvasivesCol)+
  scale_shape_manual(values=eDNAshape)+
  scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12))

#ggsave(filename = "Amphinema.jpeg", plot = last_plot(),  width = 18, height = 2.7, units = "cm", dpi = 600)

ggplot(Arach,aes(x=factor(Arach$eDNA_Pair, levels=unique(Arach$eDNA_Pair)), y=Type))+
  geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
  theme_bw() +
  theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
        panel.grid.major.x = element_blank(),
        #panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
        panel.border = element_blank(),
        axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        panel.grid.major.y = element_line(colour="#eeeeee"),
        axis.text.x = element_blank())+
  scale_x_discrete(expand = c(0.03, 0))+
  scale_colour_manual(values=InvasivesCol)+
  scale_fill_manual(values=InvasivesCol)+
  scale_shape_manual(values=eDNAshape)+
  scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12))

#ggsave(filename = "Arach.jpeg", plot = last_plot(),  width = 18, height = 2.7, units = "cm", dpi = 600)

ggplot(Aster,aes(x=factor(Aster$eDNA_Pair, levels=unique(Aster$eDNA_Pair)), y=Type))+
  geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
  theme_bw() +
  theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
        panel.border = element_blank(),
        axis.title.y = element_blank(),
        axis.text.y = element_blank(),
        axis.title.x = element_blank(),
        panel.grid.major.y = element_line(colour="#eeeeee"),
        axis.text.x = element_blank())+
  scale_x_discrete(expand = c(0.03, 0))+
  scale_colour_manual(values=InvasivesCol)+
  scale_fill_manual(values=InvasivesCol)+
  scale_shape_manual(values=eDNAshape)+
  scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12))

#ggsave(filename = "Aster.jpeg", plot = last_plot(),  width = 18, height = 2.7, units = "cm", dpi = 600)

ggplot(Austrominius,aes(x=factor(Austrominius$eDNA_Pair, levels=unique(Austrominius$eDNA_Pair)), y=Type))+
  geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
  theme_bw() +
  theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
        panel.grid.major.x = element_blank(),
        # panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
        axis.text.y = element_blank(),
        panel.border = element_blank(),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        panel.grid.major.y = element_line(colour="#eeeeee"),
        axis.text.x = element_blank())+
  scale_x_discrete(expand = c(0.03, 0))+
  scale_colour_manual(values=InvasivesCol)+
  scale_fill_manual(values=InvasivesCol)+
  scale_shape_manual(values=eDNAshape)+
  scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12))

#ggsave(filename = "Austro.jpeg", plot = last_plot(),  width = 18, height = 2.7, units = "cm", dpi = 600)

ggplot(Bougainvillia,aes(x=factor(Bougainvillia$eDNA_Pair, levels=unique(Bougainvillia$eDNA_Pair)), y=Type))+
  geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
  theme_bw() +
  theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
        panel.border = element_blank(),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        panel.grid.major.y = element_line(colour="#eeeeee"),
        axis.text.x = element_blank())+
  scale_x_discrete(expand = c(0.03, 0))+
  scale_colour_manual(values=InvasivesCol)+
  scale_fill_manual(values=InvasivesCol)+
  scale_shape_manual(values=eDNAshape)+
  scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12))

#ggsave(filename = "Boug.jpeg", plot = last_plot(),  width = 18, height = 2.7, units = "cm", dpi = 600)

ggplot(BugFlab,aes(x=factor(BugFlab$eDNA_Pair, levels=unique(BugFlab$eDNA_Pair)), y=Type))+
  geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
  theme_bw() +
  theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        panel.grid.major.x = element_blank(),
        #panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
        panel.border = element_blank(),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        panel.grid.major.y = element_line(colour="#eeeeee"),
        axis.text.x = element_blank())+
  scale_x_discrete(expand = c(0.03, 0))+
  scale_colour_manual(values=InvasivesCol)+
  scale_fill_manual(values=InvasivesCol)+
  scale_shape_manual(values=eDNAshape)+
  scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12))

#ggsave(filename = "bugflab.jpeg", plot = last_plot(),  width = 18, height = 2.7, units = "cm", dpi = 600)

ggplot(Clytia,aes(x=factor(Clytia$eDNA_Pair, levels=unique(Clytia$eDNA_Pair)), y=Type))+
  geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
  theme_bw() +
  theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
        panel.border = element_blank(),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        panel.grid.major.y = element_line(colour="#eeeeee"),
        axis.text.x = element_blank())+
  scale_x_discrete(expand = c(0.03, 0))+
  scale_colour_manual(values=InvasivesCol)+
  scale_fill_manual(values=InvasivesCol)+
  scale_shape_manual(values=eDNAshape)+
  scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12))

#ggsave(filename = "clytia.jpeg", plot = last_plot(),  width = 18, height = 2.7, units = "cm", dpi = 600)

ggplot(Coryne,aes(x=factor(Coryne$eDNA_Pair, levels=unique(Coryne$eDNA_Pair)), y=Type))+
  geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
  theme_bw() +
  theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        panel.grid.major.x = element_blank(),
        #  panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
        panel.border = element_blank(),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        panel.grid.major.y = element_line(colour="#eeeeee"),
        axis.text.x = element_blank())+
  scale_x_discrete(expand = c(0.03, 0))+
  scale_colour_manual(values=InvasivesCol)+
  scale_fill_manual(values=InvasivesCol)+
  scale_shape_manual(values=eDNAshape)+
  scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12))

#ggsave(filename = "coryne.jpeg", plot = last_plot(),  width = 18, height = 2.7, units = "cm", dpi = 600)

ggplot(Cryp,aes(x=factor(Cryp$eDNA_Pair, levels=unique(Cryp$eDNA_Pair)), y=Type))+
  geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
  theme_bw() +
  theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
        panel.border = element_blank(),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        panel.grid.major.y = element_line(colour="#eeeeee"),
        axis.text.x = element_blank())+
  scale_x_discrete(expand = c(0.03, 0))+
  scale_colour_manual(values=InvasivesCol)+
  scale_fill_manual(values=InvasivesCol)+
  scale_shape_manual(values=eDNAshape)+
  scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12))

#ggsave(filename = "cryp.jpeg", plot = last_plot(),  width = 18, height = 2.7, units = "cm", dpi = 600)

ggplot(Lizzia,aes(x=factor(Lizzia$eDNA_Pair, levels=unique(Lizzia$eDNA_Pair)), y=Type))+
  geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
  theme_bw() +
  theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        panel.grid.major.x = element_blank(),
        #panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
        panel.border = element_blank(),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        panel.grid.major.y = element_line(colour="#eeeeee"),
        axis.text.x = element_blank())+
  scale_x_discrete(expand = c(0.03, 0))+
  scale_colour_manual(values=InvasivesCol)+
  scale_fill_manual(values=InvasivesCol)+
  scale_shape_manual(values=eDNAshape)+
  scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12))

#ggsave(filename = "lizzia.jpeg", plot = last_plot(),  width = 18, height = 2.7, units = "cm", dpi = 600)

ggplot(Metacarcinus,aes(x=factor(Metacarcinus$eDNA_Pair, levels=unique(Metacarcinus$eDNA_Pair)), y=Type))+
  geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
  theme_bw() +
  theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
        panel.border = element_blank(),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        panel.grid.major.y = element_line(colour="#eeeeee"),
        axis.text.x = element_blank())+
  scale_x_discrete(expand = c(0.03, 0))+
  scale_colour_manual(values=InvasivesCol)+
  scale_fill_manual(values=InvasivesCol)+
  scale_shape_manual(values=eDNAshape)+
  scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12))

#ggsave(filename = "Metac.jpeg", plot = last_plot(),  width = 18, height = 2.7, units = "cm", dpi = 600)

ggplot(Mytilus,aes(x=factor(Mytilus$eDNA_Pair, levels=unique(Mytilus$eDNA_Pair)), y=Type))+
  geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
  theme_bw() +
  theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        panel.grid.major.x = element_blank(),
        #  panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
        panel.border = element_blank(),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        panel.grid.major.y = element_line(colour="#eeeeee"),
        axis.text.x = element_blank())+
  scale_x_discrete(expand = c(0.03, 0))+
  scale_colour_manual(values=InvasivesCol)+
  scale_fill_manual(values=InvasivesCol)+
  scale_shape_manual(values=eDNAshape)+
  scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12))

#ggsave(filename = "Mytilus.jpeg", plot = last_plot(),  width = 18, height = 2.7, units = "cm", dpi = 600)

ggplot(Obelia,aes(x=factor(Obelia$eDNA_Pair, levels=unique(Obelia$eDNA_Pair)), y=Type))+
  geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
  theme_bw() +
  theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
        panel.border = element_blank(),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        panel.grid.major.y = element_line(colour="#eeeeee"),
        axis.text.x = element_blank())+
  scale_x_discrete(expand = c(0.03, 0))+
  scale_colour_manual(values=InvasivesCol)+
  scale_fill_manual(values=InvasivesCol)+
  scale_shape_manual(values=eDNAshape)+
  scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12))

#ggsave(filename = "obelia.jpeg", plot = last_plot(),  width = 18, height = 2.7, units = "cm", dpi = 600)

ggplot(Ostrea,aes(x=factor(Ostrea$eDNA_Pair, levels=unique(Ostrea$eDNA_Pair)), y=Type))+
  geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
  theme_bw() +
  theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        panel.grid.major.x = element_blank(),
        #  panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
        panel.border = element_blank(),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        panel.grid.major.y = element_line(colour="#eeeeee"),
        axis.text.x = element_blank())+
  scale_x_discrete(expand = c(0.03, 0))+
  scale_colour_manual(values=InvasivesCol)+
  scale_fill_manual(values=InvasivesCol)+
  scale_shape_manual(values=eDNAshape)+
  scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12))

#ggsave(filename = "ostrea.jpeg", plot = last_plot(),  width = 18, height = 2.7, units = "cm", dpi = 600)

ggplot(Patiriella,aes(x=factor(Patiriella$eDNA_Pair, levels=unique(Patiriella$eDNA_Pair)), y=Type))+
  geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
  theme_bw() +
  theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
        panel.border = element_blank(),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        panel.grid.major.y = element_line(colour="#eeeeee"),
        axis.text.x = element_blank())+
  scale_x_discrete(expand = c(0.03, 0))+
  scale_colour_manual(values=InvasivesCol)+
  scale_fill_manual(values=InvasivesCol)+
  scale_shape_manual(values=eDNAshape)+
  scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12))

#ggsave(filename = "patir.jpeg", plot = last_plot(),  width = 18, height = 2.7, units = "cm", dpi = 600)

ggplot(Water,aes(x=factor(Water$eDNA_Pair, levels=unique(Water$eDNA_Pair)), y=Type))+
  geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
  theme_bw() +
  theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
        axis.text.y = element_blank(),
        panel.grid.major.x = element_blank(),
        #   panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
        panel.border = element_blank(),
        axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        panel.grid.major.y = element_line(colour="#eeeeee"),
        #axis.text.x = element_blank())+
        axis.text.x = element_text(colour="black", size = 8, angle = 90))+
  scale_x_discrete(expand = c(0.03, 0))+
  scale_colour_manual(values=InvasivesCol)+
  scale_fill_manual(values=InvasivesCol)+
  scale_shape_manual(values=eDNAshape)+
  scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12))

#ggsave(filename = "watersip.jpeg", plot = last_plot(),  width = 18, height = 2.7, units = "cm", dpi = 600)

1.13.2 Summary and statistics

#make summary table
InvasiveSummary<-Invasives
InvasiveSummary$Sum<-rowSums(InvasiveSummary[7:136])
InvasiveSummary<-InvasiveSummary %>% select(-starts_with("V")) 
#write.xlsx(as.data.frame(InvasiveSummary), file="OverallInvasivesData.xlsx", sheetName = "OverallSummary", append = TRUE)
##write.csv(InvasiveSummary, "InvasiveSummary.csv")


#compare detections between 12L and 2L
InvasivesComparison<-LongInvasives %>% select(-Lat, -Genus, -Status)  %>% group_by(Zone, Sample, eDNA_Pair, Type, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
#total reads per volume
sum(InvasivesComparison$Reads[InvasivesComparison$Type=="12L"])

## [1] 75866

sum(InvasivesComparison$Reads[InvasivesComparison$Type=="2L"])

## [1] 6650

#t-test for reads
ttestinv<-InvasivesComparison %>% group_by(Sample, eDNA_Pair, Type) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
wilcox.test(ttestinv$Reads~ttestinv$Type, mu=0, alt="two.sided", 
       conf.level=0.95, paired=TRUE)

## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  ttestinv$Reads by ttestinv$Type
## V = 560.5, p-value = 0.005743
## alternative hypothesis: true location shift is not equal to 0

tapply(ttestinv$Reads,ttestinv$Type,median)

## 12L  2L 
##   6  28

# tapply(InvasivesComparison$Reads,InvasivesComparison$Type,shapiro.test) #not normal
# wilcox.test(InvasivesComparison$Reads~InvasivesComparison$Type, mu=0, alt="two.sided", 
#        conf.level=0.95, paired=TRUE)
# tapply(InvasivesComparison$Reads,InvasivesComparison$Type,median)
#occurences
InvasivesComparison$Frequency<-0
InvasivesComparison$Frequency[InvasivesComparison$Reads>0]<-1

#how many total detections along the transect per volume
InvasivesVolume<-as.data.frame(with(InvasivesComparison, table(Type, Frequency)))
InvasivesVolume<-droplevels(subset(InvasivesVolume, Frequency==1))
InvasivesVolume

##   Type Frequency Freq
## 3  12L         1  140
## 4   2L         1  180

#Zone and vol frequency
InvasivesVolumeZone<-as.data.frame(with(InvasivesComparison, table(Type, Zone, Frequency)))
InvasivesVolumeZone<-droplevels(subset(InvasivesVolumeZone, Frequency==1))
InvasivesVolumeZone

##    Type  Zone Frequency Freq
## 11  12L   AAZ         1   51
## 12   2L   AAZ         1   75
## 13  12L   PFZ         1   13
## 14   2L   PFZ         1   18
## 15  12L SACCZ         1    9
## 16   2L SACCZ         1   19
## 17  12L   SAZ         1   33
## 18   2L   SAZ         1   38
## 19  12L   STZ         1   34
## 20   2L   STZ         1   30

#number of unique species detected with each volume
InvasivesComparison<-InvasivesComparison %>% group_by(Type, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
InvasivesComparison$PA[InvasivesComparison$Frequency>0]<-1
with(InvasivesComparison, table
     (Type, PA))

##      PA
## Type   1
##   12L 16
##   2L  14

#two less species detected with 2L samples
#they are:
unique(InvasivesComparison$Species[InvasivesComparison$Reads=="0"])

## [1] "Metacarcinus novaezelandiae" "Patiriella regularis"

#seastar and crab, 'larger' taxa both detected in the first sample/as we depart>in derwent
#these are paired samples 2 & 3

#sums and metazoan proportions
#12l
sum(long_raw$Reads[long_raw$Type=="12L" & long_raw$eDNA_Pair=="1"])

## [1] 16130

sum(long_raw$Reads[long_raw$Type=="12L" & long_raw$eDNA_Pair=="1" & long_raw$Subkingdom=="Metazoa"])/sum(long_raw$Reads[long_raw$Type=="12L" & long_raw$eDNA_Pair=="1"])

## [1] 0.7293862

#2L
sum(long_raw$Reads[long_raw$Type=="2L" & long_raw$eDNA_Pair=="1"])

## [1] 8229

sum(long_raw$Reads[long_raw$Type=="2L" & long_raw$eDNA_Pair=="1" & long_raw$Subkingdom=="Metazoa"])/sum(long_raw$Reads[long_raw$Type=="2L" & long_raw$eDNA_Pair=="1"])

## [1] 0.5698141

#12L had higher read numbers (16130) and proportion of metazoan reads (72.94%) compared to 2L (8229, 56.98%)
#again more than likely due to the filter type

#interestingly, although 2L less species, it detected 'invasives' more frequently>so perhaps smaller filter size is better for small-bodied?

1.14 Analysis without invasives

1.14.1 Hills numbers

1.14.1.1 Plot

#select species identified as invasive, pest, or biofouling
spcs_noinv<-species[!species$Species %in% Invasives$Species,]

#merge species assignments by species (i.e unique)
spcs_noinvmerged<-spcs_noinv %>% select(-zOTU, -Comment, -PercentID, -Assignment)
spcs_noinvmerged<-spcs_noinvmerged %>% group_by(Phylum, Class, Order, Family, Genus, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
#12L only
spcs_noinv12<-cbind(spcs_noinvmerged[1:6], spcs_noinvmerged[as.character(samples$Sample[samples$Type=="12L"])])
spcs_noinv12<-spcs_noinv12[rowSums(spcs_noinv12[,7:71])!=0,]
#2L only
spcs_noinv2<-cbind(spcs_noinvmerged[1:6], spcs_noinvmerged[as.character(samples$Sample[samples$Type=="2L"])])
spcs_noinv2<-spcs_noinv2[rowSums(spcs_noinv2[,7:71])!=0,]

#eDNA 12L hills data
hillinv_0 <- hill_div(spcs_noinv12[7:71], qvalue = 0)
hillinv_1 <- hill_div(spcs_noinv12[7:71], qvalue = 1)
hillinv_2 <- hill_div(spcs_noinv12[7:71], qvalue = 2)
hillinv12 <- cbind("Sample"=names(hillinv_0), "Type"="12L", hillinv_0, hillinv_1, hillinv_2) #keep names to check the samples are correct
hillinv12<-merge(samples[samples$Type=="12L",], hillinv12, all=TRUE)
hillinv12 <- hillinv12 %>% mutate_at(c('hillinv_0', 'hillinv_1', 'hillinv_2'), as.numeric)

#2L hills data
hillinv_0 <- hill_div(spcs_noinv2[7:71], qvalue = 0)
hillinv_1 <- hill_div(spcs_noinv2[7:71], qvalue = 1)
hillinv_2 <- hill_div(spcs_noinv2[7:71], qvalue = 2)
hillinv2 <- cbind("Sample"=names(hillinv_0),"Type"="2L", hillinv_0, hillinv_1, hillinv_2) #keep names to check the samples are correct
hillinv2<-merge(samples[samples$Type=="2L",], hillinv2, all=TRUE)
hillinv2 <- hillinv2 %>% mutate_at(c('hillinv_0', 'hillinv_1', 'hillinv_2'), as.numeric)

#merge together
hillsinv<-merge(hillinv12, hillinv2, all=TRUE)
hillsinv$Zone<-factor(hillsinv$Zone, levels=c("STZ", "SAZ", "PFZ","AAZ", "SACCZ"))

#seperate plots for each hillsinv number
plothillinv_0 <- violin.plot(hillsinv, ydata = hillsinv$hillinv_0, col = vennfill, fill = vennfill, ylab = "Hills number (q = 0)", ylow = 0, yhigh = 14)+ theme_light()+
  theme(plot.margin = unit(c(1,1,1,1), "cm"), strip.text.x = element_text(size=16, face="bold", colour="black"), legend.text = element_text(size = 14), legend.title = element_text(face="bold", size=18))+
  theme(axis.text.x = element_blank(),axis.text.y = element_text(size=16, colour="black"),axis.title.y = element_text(size=15, hjust=0.7,vjust=4, face="bold"))+ facet_grid(~Zone, scales = "free_x", space = "free_x")

plothillinv_1 <- violin.plot(hillsinv, ydata = hillsinv$hillinv_1, col = vennfill, fill = vennfill, ylab = "Hills number (q = 1)", ylow = 0, yhigh = 5.5)+ theme_light()+
  theme(plot.margin = unit(c(1,1,1,1), "cm"), strip.text.x = element_text(size=16, face="bold", colour="black"), legend.text = element_text(size = 14), legend.title = element_text(face="bold", size=18))+
  theme(axis.text.x = element_blank(),axis.text.y = element_text(size=16, colour="black"),axis.title.y = element_text(size=15, hjust=0.7,vjust=4, face="bold"))+ facet_grid(~Zone, scales = "free_x", space = "free_x")

plothillinv_2 <- violin.plot(hillsinv, ydata = hillsinv$hillinv_2, col = vennfill, fill = vennfill, ylab = "Hills number (q = 2)", ylow = 0, yhigh = 4.5)+ theme_light()+
  theme(plot.margin = unit(c(1,1,1,1), "cm"), axis.text.x = element_blank(), strip.text.x = element_text(size=16, face="bold", colour="black"), legend.text = element_text(size = 14), legend.title = element_text(face="bold", size=18))+
  theme(axis.text.y = element_text(size=16, colour="black"),axis.title.y = element_text(size=15, hjust=0.7,vjust=4, face="bold"))+ facet_grid(~Zone, scales = "free_x", space = "free_x")

#combine into one plot and...plot
hillsinvplot<-plot_grid(plothillinv_0, plothillinv_1, plothillinv_2, ncol=1, labels = c("A", "B", "C"), label_size = 25)
plot(hillsinvplot)

#tidy env
rm(hillinv_0, hillinv_1, hillinv_2)

1.14.1.2 Summary stats & t-tests

#select hillsinv numbers
hillsinvummstat<-droplevels(subset(hillsinv, hillinv_1!="NA"))
hillsinvummstat<-hillsinvummstat %>% select(Type, Zone, hillinv_0, hillinv_1, hillinv_2)
#summary stats function
hillsinvummstat<-summaryBy(hillsinvummstat[,3:5]~Zone & Type,data=hillsinvummstat,FUN=function(x) 
  + {c(mean=mean(x),sum=sum(x),se=sd(x)/sqrt(length(x)))})
#pivot table longer
hillsinvummstat<-hillsinvummstat %>% pivot_longer(!c(Zone, Type), names_to = c("hill",".value"),  names_pattern = "(\\d*).([A-Za-z]+)", names_prefix = "hillinv_")
hillsinvummstat

## # A tibble: 30 × 6
##    Zone  Type  hill   mean    sum    se
##    <fct> <chr> <chr> <dbl>  <dbl> <dbl>
##  1 STZ   12L   0      7     42    1.44 
##  2 STZ   12L   1      1.57   9.40 0.269
##  3 STZ   12L   2      1.31   7.88 0.184
##  4 STZ   2L    0      3.17  19    1.38 
##  5 STZ   2L    1      1.66   9.98 0.445
##  6 STZ   2L    2      1.45   8.68 0.320
##  7 SAZ   12L   0      6.06 103    0.458
##  8 SAZ   12L   1      2.43  41.4  0.294
##  9 SAZ   12L   2      2.01  34.1  0.219
## 10 SAZ   2L    0      3.75  60    0.393
## # ℹ 20 more rows

#write.csv(hillsinvummstat, "hillsumstatinv.csv")

1.14.1.3 Richness (q=0)

#test for normality
#2L
tapply(hillinv2$hillinv_0, hillinv2$Zone, shapiro.test) #not normal

## $AAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.90965, p-value = 0.03466
## 
## 
## $PFZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.83643, p-value = 0.02506
## 
## 
## $SACCZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.91393, p-value = 0.4627
## 
## 
## $SAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.41746, p-value = 0.0000002785
## 
## 
## $STZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.67311, p-value = 0.003179

#12L
tapply(hillinv12$hillinv_0, hillinv12$Zone, shapiro.test) #normal

## $AAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.96531, p-value = 0.554
## 
## 
## $PFZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.94817, p-value = 0.6104
## 
## 
## $SACCZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.83173, p-value = 0.1112
## 
## 
## $SAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.93286, p-value = 0.2429
## 
## 
## $STZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.87691, p-value = 0.2552

#wilcox
hillsinv %>% group_by(Zone) %>% wilcox_test(hillinv_0~Type, paired=TRUE)

## # A tibble: 5 × 8
##   Zone  .y.       group1 group2    n1    n2 statistic        p
## * <fct> <chr>     <chr>  <chr>  <int> <int>     <dbl>    <dbl>
## 1 STZ   hillinv_0 12L    2L         6     6       21  0.035   
## 2 SAZ   hillinv_0 12L    2L        17    17      105  0.0111  
## 3 PFZ   hillinv_0 12L    2L        12    12       66  0.00362 
## 4 AAZ   hillinv_0 12L    2L        24    24      236. 0.000381
## 5 SACCZ hillinv_0 12L    2L         6     6       16  0.281

hillsinv %>% group_by(Type) %>% wilcox_test(hillinv_0~Zone, paired=FALSE)

## # A tibble: 20 × 10
##    Type  .y.    group1 group2    n1    n2 statistic       p   p.adj p.adj.signif
##  * <chr> <chr>  <chr>  <chr>  <int> <int>     <dbl>   <dbl>   <dbl> <chr>       
##  1 12L   hilli… STZ    SAZ        6    17      56.5 7.23e-1 7.23e-1 ns          
##  2 12L   hilli… STZ    PFZ        6    12      25.5 3.45e-1 6.9 e-1 ns          
##  3 12L   hilli… STZ    AAZ        6    24      39.5 9.3 e-2 5.6 e-1 ns          
##  4 12L   hilli… STZ    SACCZ      6     6      26   2.2 e-1 6.6 e-1 ns          
##  5 12L   hilli… SAZ    PFZ       17    12      27.5 9.39e-4 8   e-3 **          
##  6 12L   hilli… SAZ    AAZ       17    24      34.5 6.79e-6 6.79e-5 ****        
##  7 12L   hilli… SAZ    SACCZ     17     6      74.5 1.03e-1 5.6 e-1 ns          
##  8 12L   hilli… PFZ    AAZ       12    24     100   1.39e-1 5.6 e-1 ns          
##  9 12L   hilli… PFZ    SACCZ     12     6      69.5 2   e-3 1.2 e-2 *           
## 10 12L   hilli… AAZ    SACCZ     24     6     142.  3.13e-4 3   e-3 **          
## 11 2L    hilli… STZ    SAZ        6    17      33.5 2.28e-1 6.84e-1 ns          
## 12 2L    hilli… STZ    PFZ        6    12      19   1.14e-1 4.56e-1 ns          
## 13 2L    hilli… STZ    AAZ        6    24      17   4   e-3 2.8 e-2 *           
## 14 2L    hilli… STZ    SACCZ      6     6      12.5 4.05e-1 8.1 e-1 ns          
## 15 2L    hilli… SAZ    PFZ       17    12      30.5 1   e-3 1.3 e-2 *           
## 16 2L    hilli… SAZ    AAZ       17    24      53   5.86e-5 5.86e-4 ***         
## 17 2L    hilli… SAZ    SACCZ     17     6      59.5 5.71e-1 8.1 e-1 ns          
## 18 2L    hilli… PFZ    AAZ       12    24      72.5 1.5 e-2 9.2 e-2 ns          
## 19 2L    hilli… PFZ    SACCZ     12     6      60   2.4 e-2 1.22e-1 ns          
## 20 2L    hilli… AAZ    SACCZ     24     6     130   2   e-3 2   e-2 *

#significantly differs in the AAZ, PFZ, SAZ, and STZ

1.14.1.4 Shannons diversity (q=1)

#test for normality
#2L
tapply(hillinv2$hillinv_1, hillinv2$Zone, shapiro.test) #not normal in STZ or PFZ

## $AAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.95746, p-value = 0.3895
## 
## 
## $PFZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.75436, p-value = 0.002969
## 
## 
## $SACCZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.87849, p-value = 0.2622
## 
## 
## $SAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.91645, p-value = 0.1479
## 
## 
## $STZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.70463, p-value = 0.006915

tapply(hillinv2$hillinv_1[hillinv2$Zone=="STZ"], hillinv2$Zone[hillinv2$Zone=="STZ"], hist) #bimodal so will not become normal

## $STZ
## $breaks
## [1] 1.0 1.5 2.0 2.5 3.0 3.5 4.0
## 
## $counts
## [1] 4 0 1 0 0 1
## 
## $density
## [1] 1.3333333 0.0000000 0.3333333 0.0000000 0.0000000 0.3333333
## 
## $mids
## [1] 1.25 1.75 2.25 2.75 3.25 3.75
## 
## $xname
## [1] "X[[i]]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"

tapply(hillinv2$hillinv_1[hillinv2$Zone=="PFZ"], hillinv2$Zone[hillinv2$Zone=="PFZ"], hist) #also fairly blocked

## $PFZ
## $breaks
## [1] 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0
## 
## $counts
## [1] 3 5 2 0 0 1 1
## 
## $density
## [1] 0.5000000 0.8333333 0.3333333 0.0000000 0.0000000 0.1666667 0.1666667
## 
## $mids
## [1] 1.75 2.25 2.75 3.25 3.75 4.25 4.75
## 
## $xname
## [1] "X[[i]]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"

#12L
tapply(hillinv12$hillinv_1, hillinv12$Zone, shapiro.test) #not normal in SAZ

## $AAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.93544, p-value = 0.129
## 
## 
## $PFZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.95321, p-value = 0.6843
## 
## 
## $SACCZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.83361, p-value = 0.1153
## 
## 
## $SAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.89109, p-value = 0.0483
## 
## 
## $STZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.79496, p-value = 0.05292

tapply(hillinv12$hillinv_1[hillinv12$Zone=="SAZ"], hillinv12$Zone[hillinv12$Zone=="SAZ"], hist) #will conduct wilcox for not normal zones rather than transform and interpret

## $SAZ
## $breaks
## [1] 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5
## 
## $counts
## [1] 7 0 1 2 3 2 2
## 
## $density
## [1] 0.8235294 0.0000000 0.1176471 0.2352941 0.3529412 0.2352941 0.2352941
## 
## $mids
## [1] 1.25 1.75 2.25 2.75 3.25 3.75 4.25
## 
## $xname
## [1] "X[[i]]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"

hillsinv %>% group_by(Zone) %>% t_test(hillinv_1~Type, paired=TRUE)  #default conf 0.95 and two sided

## # A tibble: 5 × 9
##   Zone  .y.       group1 group2    n1    n2 statistic    df     p
## * <fct> <chr>     <chr>  <chr>  <int> <int>     <dbl> <dbl> <dbl>
## 1 STZ   hillinv_1 12L    2L         6     6    -0.429     5 0.686
## 2 SAZ   hillinv_1 12L    2L        17    16    -0.164    15 0.872
## 3 PFZ   hillinv_1 12L    2L        12    12     0.986    11 0.345
## 4 AAZ   hillinv_1 12L    2L        24    24     0.912    23 0.371
## 5 SACCZ hillinv_1 12L    2L         6     6    -0.461     5 0.664

hillsinv %>% group_by(Zone) %>% wilcox_test(hillinv_1~Type, paired=TRUE)

## # A tibble: 5 × 8
##   Zone  .y.       group1 group2    n1    n2 statistic     p
## * <fct> <chr>     <chr>  <chr>  <int> <int>     <dbl> <dbl>
## 1 STZ   hillinv_1 12L    2L         6     6        10 1    
## 2 SAZ   hillinv_1 12L    2L        17    16        64 0.86 
## 3 PFZ   hillinv_1 12L    2L        12    12        52 0.339
## 4 AAZ   hillinv_1 12L    2L        24    24       189 0.277
## 5 SACCZ hillinv_1 12L    2L         6     6         8 0.688

#no significant differences

1.14.1.5 Simspons diversity (q=2)

#test for normality
#2L
tapply(hillinv2$hillinv_2, hillinv2$Zone, shapiro.test) #only normal in SAZ+SACCZ conduct wilcox

## $AAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.89528, p-value = 0.01711
## 
## 
## $PFZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.75416, p-value = 0.002955
## 
## 
## $SACCZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.84435, p-value = 0.1416
## 
## 
## $SAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.90563, p-value = 0.09896
## 
## 
## $STZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.68397, p-value = 0.004172

#12L
tapply(hillinv12$hillinv_2, hillinv12$Zone, shapiro.test) #will conduct wilcox as above aren't normal

## $AAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.88028, p-value = 0.008418
## 
## 
## $PFZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.91412, p-value = 0.2409
## 
## 
## $SACCZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.73879, p-value = 0.01544
## 
## 
## $SAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.87679, p-value = 0.02822
## 
## 
## $STZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.71705, p-value = 0.009302

hillsinv %>% group_by(Zone) %>% wilcox_test(hillinv_2~Type, paired=TRUE)

## # A tibble: 5 × 8
##   Zone  .y.       group1 group2    n1    n2 statistic     p
## * <fct> <chr>     <chr>  <chr>  <int> <int>     <dbl> <dbl>
## 1 STZ   hillinv_2 12L    2L         6     6        10 1    
## 2 SAZ   hillinv_2 12L    2L        17    16        61 0.744
## 3 PFZ   hillinv_2 12L    2L        12    12        51 0.38 
## 4 AAZ   hillinv_2 12L    2L        24    24       156 0.877
## 5 SACCZ hillinv_2 12L    2L         6     6         8 0.688

#no significant differences- same in the STZ

1.14.2 Indicator species analysis

1.14.2.1 12L

#12L PA
indicnoinv_12L<-t(spcs_noinv12[7:71])
indicnoinv_12L[indicnoinv_12L>0]<-1 #convert to PA
colnames(indicnoinv_12L)<-spcs_noinv12$Species
#indicnoinv_12L<-multipatt(indicnoinv_12L, samples$Zone[samples$Type=="12L"], func = "IndVal.g",duleg = FALSE, control = how(nperm=9999))
#summary(indicnoinv_12L, alpha=0.05, digits=3, too.many=100, indvalcomp=TRUE, sort=TRUE)

1.14.2.2 2L

#PA
indicnoinv_2L<-t(spcs_noinv2[7:71])
indicnoinv_2L[indicnoinv_2L>0]<-1 #convert to PA
colnames(indicnoinv_2L)<-spcs_noinv2$Species
#indicnoinv_2L<-multipatt(indicnoinv_2L, samples$Zone[samples$Type=="2L"], func = "IndVal.g",duleg = FALSE, control = how(nperm=9999))
#summary(indicnoinv_2L, alpha=0.05, digits=3, too.many=100, indvalcomp=TRUE, sort=TRUE)

2 eDNA combined

Combining 12L and 2L and removing hull bound species from analysis.

2.1 Formatting

#remove all besides eDNAdata, eDNAdata_wide, fill12, samples, CPR_eDNAvals, CPRspecies, species_long
rm("Amphinema","Arach", "Aster", "Austrominius", "Bougainvillia", "BugFlab","Clytia", "Coryne", "Cryp", "diversity",  "eDNAshape", "exc_sha", "hill12", "hill2", "hillinv12", "hillinv2", "hills", "hillsinv", "hillsplot", "hillsummstat", "hillsinvplot", "hillsinvummstat", "indic_12L", "indic_2L", "indicnoinv_2L", "indicnoinv_12L","InvasivesCol", "InvasivesComparison", "InvasiveSummary", "InvasivesVolume", "InvasivesVolumeZone", "Lizzia", "long_raw", "LongInvasives", "matchedsamples", "Metacarcinus", "mysamples", "Mytilus", "Obelia", "Ostrea", "Patiriella", "plothill_0", "plothill_1", "plothill_2", "plothillinv_0","plothillinv_1", "plothillinv_2", "raweDNA", "rawfill", "rawsummary", "Richness", "spcs_noinv", "spcs_noinv12", "spcs_noinv2", "species_freqplot", "spcs_noinvmerged","species", "species12", "species2", "SpeciesFreq", "speciesmerged","standard_error", "ttestinv", "venn_plot", "Venn12", "Venn2", "vennfill", "Water")

#combine 2 and 12L
eDNAmerged<-eDNAdata %>% select(-Sample, -Type, -Metazoan, -PercentID)
eDNAmerged<-eDNAmerged %>% group_by(Method, eDNA_Pair, Zone, CPR_Transect, Latitude, Longitude, CPRSubset, zOTU, Subkingdom, Phylum, Class, Order, Family, Genus, Species, Comment, Assignment) %>% summarize_if(is.numeric, sum, na.rm=TRUE)
eDNAmerged$eDNA_Pair<-paste("eDNA",eDNAmerged$eDNA_Pair, sep = "")
eDNAmerged<-eDNAmerged %>% group_by(Method, eDNA_Pair, Zone, CPR_Transect, Latitude, Longitude, CPRSubset, zOTU, Subkingdom,Phylum, Class,Order,Family,Genus, Species,Comment,Assignment) %>% summarize_if(is.numeric, sum, na.rm=TRUE)

#remove invasives/hull species
eDNAmerged<-eDNAmerged[!eDNAmerged$Species %in% Invasives$Species,]

#samples merged by pair
samples_paired<-droplevels(subset(samples, eDNA_Pair!="NA"))
samples_paired<-samples_paired %>% select(-Type,-Sample, -Lat)
samples_paired<-samples_paired %>% distinct() 
samples_paired$eDNA_Pair<-paste("eDNA",samples_paired$eDNA_Pair, sep = "")

2.2 Summary statistics

#summary stats 
sum(eDNAmerged$Reads)

## [1] 1057322

#1139972 prior, 1057322 remaining, 82650 reads assigned to hull species

#of the remaining reads, these were assigned to:
sum(eDNAmerged$Reads[eDNAmerged$Assignment=="Species"]) #1036536 98.03%

## [1] 1036536

sum(eDNAmerged$Reads[eDNAmerged$Assignment=="Genus"]) #4635 0.004%

## [1] 4635

sum(eDNAmerged$Reads[eDNAmerged$Assignment=="Family"]) #15584 0.015%

## [1] 15584

sum(eDNAmerged$Reads[eDNAmerged$Assignment=="Order"]) #429 0.0004%

## [1] 429

sum(eDNAmerged$Reads[eDNAmerged$Assignment=="Class"]) #16 1.513257e-05%

## [1] 16

sum(eDNAmerged$Reads[eDNAmerged$Assignment=="Phyla"]) #122 0.0001153859%

## [1] 122

#zotus assigned to species
length(unique(eDNAmerged$zOTU[eDNAmerged$Assignment=="Species"]))/length(unique(eDNAmerged$zOTU)) #76.6%

## [1] 0.7558442

#species assignments
species<-droplevels(subset(eDNAmerged, Assignment=="Species"))
length(unique(species$Species)) #52 species detected

## [1] 52

length(unique(species$Genus)) #from 38 genera

## [1] 38

length(unique(species$Family)) #and 29 families

## [1] 29

length(unique(species$Order)) #and 19  orders

## [1] 19

2.3 Diversity across zones

#select data and convert abundance to PA
diversity<-species %>% select(-Comment, -zOTU) %>% group_by(Zone, Phylum, Class, Order, Family, Genus, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
diversity$FOO[diversity$Reads>0]<-1

#order zones
diversity$Zone<-factor(diversity$Zone, levels = c("STZ", "SAZ", "PFZ", "AAZ", "SACCZ"))

ggplot(data=diversity, aes(fill=Phylum, y=FOO, x=Zone)) +
  geom_bar(stat="identity", width = 0.97) + 
  scale_fill_manual(values = fill12) +
  theme(axis.line = element_line(colour = "black", size = 0.4, linetype = "solid"),
        axis.ticks = element_line(colour = "black", size = 0.4, linetype = "solid"), 
        axis.text.x = element_text(size=11,vjust = 0.5, hjust=0.5, colour="black"),
        axis.text.y = element_text(size=16, vjust = 0.5, hjust=1,colour="black"),
        axis.title.y = element_text(face="bold", size=15, vjust=3),
        strip.text.x = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(face="bold", size=13,vjust = -2),
        legend.text = element_text(size = 10), legend.title = element_text(size=13, face="bold"),
        legend.key.height = unit(0.2, 'cm'), legend.key.width = unit(0.3, 'cm'),
        plot.margin =  margin(20,20,30,20)) + scale_y_continuous(expand = c(0, 0))+ 
  #scale_x_discrete(labels=stringr::str_wrap(c("12L"="LargeVF", "2L"="SmallVF"),width=5), expand = c(0, 0))+
  guides(fill=guide_legend(byrow = TRUE, title="Phylum"))+
  labs(x="Southern Ocean zones", y="Species diversity")

#ggsave(filename = "CPR_eDNA_diversityzones.jpeg", plot = last_plot(),  width = 24, height = 14, units = "cm", dpi = 600)

#as table
with(species_long, table(Phylum, Zone, Type))

## , , Type = 12L
## 
##                Zone
## Phylum           STZ  SAZ  PFZ  AAZ SACCZ
##   Annelida         0    0    0    0     8
##   Arthropoda     483 1450  298  665    53
##   Bryozoa         12    9    3   20     4
##   Chaetognatha     0    3    1    3     0
##   Chordata         2   10    3    6     0
##   Cnidaria        39   27   11   50     6
##   Echinodermata    7    4    0    4     3
##   Mollusca         4    3   12   23     0
##   Nemertea         0    0    0    0     0
##   Porifera         0    0    0    0     0
## 
## , , Type = 2L
## 
##                Zone
## Phylum           STZ  SAZ  PFZ  AAZ SACCZ
##   Annelida         0    0    0    0     4
##   Arthropoda      61  146  139  398    24
##   Bryozoa         16   23    3   29    11
##   Chaetognatha     0    1    2    3     0
##   Chordata         1    1    0    2     0
##   Cnidaria        35   29   20   98    17
##   Echinodermata    6    0    0    1     1
##   Mollusca         2    2    8   25     1
##   Nemertea         0    0    0    0     1
##   Porifera         2    0    0    1     0

2.4 Hills numbers

2.4.1 Plot

#merge species assignments by species (i.e unique)
spcs_merged<-species %>% ungroup() %>% select(-zOTU, -Method, -Assignment, -CPR_Transect, -Latitude, -Longitude, -CPRSubset) %>% group_by(eDNA_Pair, Phylum, Class, Order, Family, Genus, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)

#wide format species
spcs_wide<-pivot_wider(spcs_merged, names_from = eDNA_Pair, values_from = Reads, values_fill = 0)
#hills data
hill0 <- hill_div(spcs_wide[7:71], qvalue = 0)
hill1 <- hill_div(spcs_wide[7:71], qvalue = 1)
hill2 <- hill_div(spcs_wide[7:71], qvalue = 2)
hill <- cbind.data.frame("eDNA_Pair"=names(hill0), hill0, hill1, hill2)
hill<-left_join(hill, samples_paired)
hill <- hill %>% mutate_at(c('hill0', 'hill1', 'hill2'), as.numeric)

#order zones 
hill$Zone<-factor(hill$Zone, levels=c("STZ", "SAZ", "PFZ","AAZ", "SACCZ"))

#seperate plots for each hillsinv number

plot0<- violin.plot2(hill, ydata = hill$hill0, col = zonefill, fill = zonefill, ylab = "Hill number (q = 0)", ylow = 0, yhigh = 14)+ theme_light()+
  theme(plot.margin = unit(c(1,1,1,1), "cm"), legend.position="none", axis.text.x = element_blank(),axis.text.y = element_text(size=16, colour="black"),axis.title.y = element_text(size=15, hjust=0.7,vjust=4, face="bold"))

plot1<- violin.plot2(hill, ydata = hill$hill1, col = zonefill, fill = zonefill, ylab = "Hill number (q = 1)", ylow = 0, yhigh = 5.5)+ theme_light()+
  theme(plot.margin = unit(c(1,1,1,1), "cm"), legend.position="none", axis.text.x = element_blank(),axis.text.y = element_text(size=16, colour="black"),axis.title.y = element_text(size=15, hjust=0.7,vjust=4, face="bold"))

plot2<- violin.plot2(hill, ydata = hill$hill2, col = zonefill, fill = zonefill, ylab = "Hill number (q = 2)", ylow = 0, yhigh = 4.5)+ theme_light()+
  theme(plot.margin = unit(c(1,1,1,1), "cm"), legend.position="none", axis.text.x = element_text(size=16, colour="black"),axis.text.y = element_text(size=16, colour="black"),axis.title.y = element_text(size=15, hjust=0.7,vjust=4, face="bold"))

#combine into one plot and...plot
hillplot<-plot_grid(plot0, plot1, plot2, ncol=1, labels = c("A", "B", "C"), label_size = 25)
plot(hillplot)

#get legend
legend<-violin.plot2(hill, ydata = hill$hill2, col = zonefill, fill = zonefill, ylab = "hill number (q = 2)", ylow = 0, yhigh = 5)+ theme_light()+
  theme(plot.margin = unit(c(1,1,1,1), "cm"), legend.text = element_text(size = 14), legend.title = element_text(face="bold", size=18), axis.text.x = element_text(size=16, colour="black"),axis.text.y = element_text(size=16, colour="black"),axis.title.y = element_text(size=15, hjust=0.7,vjust=4, face="bold"))
legend<-get_legend(legend) 

#plot heatmap and legend together
plot_grid(hillplot, legend, rel_widths = c(5, 1))

#tidy env
rm(plot0, plot1, plot2)

2.4.2 Summary stats & anova

#summary stats (means se)
#select hills numbers
hillsummstat<-hill %>% select(Zone, hill0, hill1, hill2)
#summary stats function
hillsummstat<-summaryBy(hillsummstat[,2:4]~Zone ,data=hillsummstat,FUN=function(x) 
  + {c(mean=mean(x),sum=sum(x),se=sd(x)/sqrt(length(x)))})
#pivot table longer
hillsummstat<-hillsummstat %>% pivot_longer(!c(Zone), names_to = c("hill",".value"),  names_pattern = "(\\d*).([A-Za-z]+)", names_prefix = "hill")
hillsummstat

## # A tibble: 15 × 5
##    Zone  hill   mean    sum    se
##    <fct> <chr> <dbl>  <dbl> <dbl>
##  1 STZ   0      7.5   45    1.73 
##  2 STZ   1      1.70  10.2  0.343
##  3 STZ   2      1.39   8.33 0.224
##  4 SAZ   0      6.59 112    0.446
##  5 SAZ   1      2.44  41.5  0.292
##  6 SAZ   2      2.00  34.1  0.217
##  7 PFZ   0      9.67 116    0.432
##  8 PFZ   1      2.83  34.0  0.212
##  9 PFZ   2      2.24  26.9  0.172
## 10 AAZ   0     10.5  251    0.356
## 11 AAZ   1      2.70  64.8  0.182
## 12 AAZ   2      1.98  47.4  0.146
## 13 SACCZ 0      5.33  32    0.843
## 14 SACCZ 1      1.61   9.66 0.335
## 15 SACCZ 2      1.43   8.59 0.303

#write.csv(hillsummstat, "hillsummstat_merged.csv")

#tests for normality
tapply(hill$hill0, hill$Zone, shapiro.test) #normal

## $STZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.89303, p-value = 0.3344
## 
## 
## $SAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.97126, p-value = 0.8402
## 
## 
## $PFZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.95309, p-value = 0.6825
## 
## 
## $AAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.91605, p-value = 0.04779
## 
## 
## $SACCZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.91824, p-value = 0.4928

tapply(hill$hill1, hill$Zone, shapiro.test) #normal

## $STZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.81515, p-value = 0.08009
## 
## 
## $SAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.89414, p-value = 0.05425
## 
## 
## $PFZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.9524, p-value = 0.6722
## 
## 
## $AAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.9705, p-value = 0.6794
## 
## 
## $SACCZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.77812, p-value = 0.03702

tapply(hill$hill2, hill$Zone, shapiro.test) #not normal

## $STZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.75516, p-value = 0.02237
## 
## 
## $SAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.87902, p-value = 0.03066
## 
## 
## $PFZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.91424, p-value = 0.2417
## 
## 
## $AAZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.8801, p-value = 0.00835
## 
## 
## $SACCZ
## 
##  Shapiro-Wilk normality test
## 
## data:  X[[i]]
## W = 0.66766, p-value = 0.00277

hist(hill$hill2) #positive skew

#variance
library(car)
leveneTest(hill0~Zone, data=hill) #not equal

## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value   Pr(>F)   
## group  4  3.9689 0.006359 **
##       60                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

leveneTest(hill1~Zone, data=hill) # equal

## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  4  1.8347 0.1339
##       60

leveneTest(hill2~Zone, data=hill) # equal

## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  4  1.5555 0.1979
##       60

#kruskal test
kruskal.test(hill0~Zone, data=hill)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  hill0 by Zone
## Kruskal-Wallis chi-squared = 30.693, df = 4, p-value = 0.000003536

kruskal.test(hill1~Zone, data=hill)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  hill1 by Zone
## Kruskal-Wallis chi-squared = 11.458, df = 4, p-value = 0.02187

kruskal.test(hill2~Zone, data=hill)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  hill2 by Zone
## Kruskal-Wallis chi-squared = 10.862, df = 4, p-value = 0.02816

#anova for hill1
hill1lm<-lm(hill1~Zone, data=hill)
anova(hill1lm)

## Analysis of Variance Table
## 
## Response: hill1
##           Df Sum Sq Mean Sq F value  Pr(>F)  
## Zone       4 10.812 2.70294  2.9818 0.02595 *
## Residuals 60 54.389 0.90648                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#similar p value use kruskal for all for consistencys

2.5 Indicator species analysis

#12L PA
indic<-t(spcs_wide[7:71])
indic[indic>0]<-1 #convert to PA
colnames(indic)<-spcs_wide$Species
#indic<-multipatt(indic, samples_paired$Zone, func = "IndVal.g",duleg = FALSE, control = how(nperm=9999))
#summary(indic, alpha=0.05, digits=3, too.many=100, indvalcomp=TRUE, sort=TRUE)

#takes ages to run so pasted below instead

# Multilevel pattern analysis
#  ---------------------------
# 
#  Association function: IndVal.g
#  Significance level (alpha): 0.05
# 
#  Total number of species: 52
#  Selected number of species: 16 
#  Number of species associated to 1 group: 6 
#  Number of species associated to 2 groups: 5 
#  Number of species associated to 3 groups: 5 
#  Number of species associated to 4 groups: 0 
# 
#  List of species associated to each combination: 
# 
#  Group PFZ  #sps.  1 
#                         A      B  stat p.value  
# Vibilia antarctica 0.7273 0.3333 0.492  0.0474 *
# 
#  Group SACCZ  #sps.  4 
#                            A      B  stat p.value   
# Stephos longipes      0.6667 0.5000 0.577  0.0085 **
# Polyeunoa laevis      1.0000 0.3333 0.577  0.0148 * 
# Sterechinus neumayeri 1.0000 0.3333 0.577  0.0148 * 
# Nematoscelis megalops 0.8889 0.3333 0.544  0.0190 * 
# 
#  Group STZ  #sps.  1 
#                       A      B  stat p.value  
# Evadne nordmanni 0.7391 0.3333 0.496  0.0404 *
# 
#  Group AAZ+PFZ  #sps.  3 
#                         A      B  stat p.value    
# Calanoides acutus  0.8815 0.8611 0.871   1e-04 ***
# Rhincalanus gigas  0.9233 0.6667 0.785   2e-04 ***
# Calanus propinquus 0.8281 0.7222 0.773   7e-04 ***
# 
#  Group AAZ+SACCZ  #sps.  1 
#                          A      B  stat p.value   
# Thysanoessa macrura 0.9200 0.5667 0.722  0.0011 **
# 
#  Group SAZ+STZ  #sps.  1 
#                           A      B  stat p.value  
# Euphausia vallentini 0.8223 0.3478 0.535   0.045 *
# 
#  Group AAZ+PFZ+SAZ  #sps.  2 
#                                   A      B  stat p.value    
# Calanus propinquus/similimus 1.0000 0.8113 0.901   1e-04 ***
# Metridia lucens              0.9321 0.7358 0.828   8e-04 ***
# 
#  Group SACCZ+SAZ+STZ  #sps.  3 
#                             A      B  stat p.value    
# Neocalanus tonsus      0.8265 0.8276 0.827   1e-04 ***
# Clausocalanus brevipes 0.8674 0.4828 0.647   1e-02 ** 
# Clausocalanus pergens  1.0000 0.2414 0.491   5e-02 *  
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

2.6 PERMANOVA

#make a lat5
Lat5<-read.csv("sampslat5.csv")
Lat5<-Lat5 %>% select(-Zone)
Lat5<-left_join(Lat5, samples, by="Sample")

Lat5$eDNA_Pair<-paste("eDNA",Lat5$eDNA_Pair, sep = "")
Lat5<-Lat5 %>% select(eDNA_Pair, Lat5) %>% distinct()

samples_paired<-left_join(samples_paired, Lat5, by="eDNA_Pair")

#convert to PA
PA_matrix<-spcs_wide %>% ungroup() %>% select(-Phylum, -Class, -Order, -Family, -Genus)
PA_matrix<-pivot_longer(PA_matrix, cols = 2:66, names_to = "eDNA_Pair", values_to = "PA")
PA_matrix<-pivot_wider(PA_matrix, names_from = Species, values_from = PA, values_fill = 0)
PA_matrix<-as.data.frame(PA_matrix)
rownames(PA_matrix)<-PA_matrix$eDNA_Pair
PA_matrix<-PA_matrix %>% select(-eDNA_Pair)

adonis2(PA_matrix ~ Zone, data=samples_paired, method="jaccard", permutations=9999)

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
## 
## adonis2(formula = PA_matrix ~ Zone, data = samples_paired, permutations = 9999, method = "jaccard")
##          Df SumOfSqs      R2      F Pr(>F)    
## Zone      4   3.9924 0.16406 2.9439 0.0001 ***
## Residual 60  20.3423 0.83594                  
## Total    64  24.3347 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

2.7 nMDS

#get jaccard matrix for nMDS plot
jaccard<-vegdist(PA_matrix, method = "jaccard")
jaccard<-as.matrix(jaccard, labels=TRUE)

#run nMDS
nMDS <- metaMDS(jaccard, k=3, distance = "jaccard", autotransform = FALSE, trymax = 10000)

## Run 0 stress 0.1079726 
## Run 1 stress 0.1007289 
## ... New best solution
## ... Procrustes: rmse 0.08151526  max resid 0.3444544 
## Run 2 stress 0.1056801 
## Run 3 stress 0.1061836 
## Run 4 stress 0.1050821 
## Run 5 stress 0.1025566 
## Run 6 stress 0.103335 
## Run 7 stress 0.1027071 
## Run 8 stress 0.1046893 
## Run 9 stress 0.1042888 
## Run 10 stress 0.1066247 
## Run 11 stress 0.1081116 
## Run 12 stress 0.1027057 
## Run 13 stress 0.1123301 
## Run 14 stress 0.1094867 
## Run 15 stress 0.1097408 
## Run 16 stress 0.1032063 
## Run 17 stress 0.1007189 
## ... New best solution
## ... Procrustes: rmse 0.001499893  max resid 0.007132327 
## ... Similar to previous best
## Run 18 stress 0.1007191 
## ... Procrustes: rmse 0.00003739295  max resid 0.0001685826 
## ... Similar to previous best
## Run 19 stress 0.1081479 
## Run 20 stress 0.1079959 
## *** Best solution repeated 2 times

#stress=0.1007196
nMDS <- metaMDS(jaccard, previous = nMDS)

## Starting from 3-dimensional configuration
## Stress differs from 'previous.best': reset tries
## Run 0 stress 0.1980241 
## Run 1 stress 0.1513518 
## ... New best solution
## ... Procrustes: rmse 0.08573331  max resid 0.3310584 
## Run 2 stress 0.1583389 
## Run 3 stress 0.1474427 
## ... New best solution
## ... Procrustes: rmse 0.06298191  max resid 0.4237687 
## Run 4 stress 0.1450595 
## ... New best solution
## ... Procrustes: rmse 0.06172505  max resid 0.2872753 
## Run 5 stress 0.1527898 
## Run 6 stress 0.1425149 
## ... New best solution
## ... Procrustes: rmse 0.03694813  max resid 0.2233 
## Run 7 stress 0.1726683 
## Run 8 stress 0.1603489 
## Run 9 stress 0.1485234 
## Run 10 stress 0.1628316 
## Run 11 stress 0.1425145 
## ... New best solution
## ... Procrustes: rmse 0.0001571397  max resid 0.001135846 
## ... Similar to previous best
## Run 12 stress 0.1672463 
## Run 13 stress 0.1639642 
## Run 14 stress 0.1436714 
## Run 15 stress 0.1568015 
## Run 16 stress 0.1645933 
## Run 17 stress 0.1589634 
## Run 18 stress 0.1580686 
## Run 19 stress 0.1585105 
## Run 20 stress 0.1502189 
## *** Best solution repeated 1 times

# Next, extract the NMDS scores
scrs <- scores(nMDS, display = 'sites')
scrs<-as.data.frame(scrs)
scrs$eDNA_Pair<-rownames(scrs)
scrs <- merge(scrs, samples_paired, all=TRUE)

# Now we compute the group centroids/mean of each zone:
cent <- aggregate(cbind(NMDS1, NMDS2) ~ Zone, data = scrs, FUN = mean)

# To draw the spider, we need to us geom_segment() which requires coordiates to draw the segment from and to our centroids. So we need to replicate the group centroid for each observation in the group. This we facilitate by a left join via merge:
segs <- merge(scrs, setNames(cent, c('Zone','oNMDS1','oNMDS2')),
              by = 'Zone', sort = FALSE)
#rename the columns in cent eg oNMDS1 so these don't get confused for columns of the same names in scrs — we want these centroid variables to have different names

#plot
ggplot(scrs, aes(x = NMDS1, y = NMDS2, colour = Zone)) +
  geom_segment(data = segs, mapping = aes(xend = oNMDS1, yend = oNMDS2)) + # spiders
  geom_point(data = cent, size = 5, aes(fill=Zone), colour="black", stroke=0.2, shape=21) +    # centroids
  geom_point(data=scrs, size=5, colour="black", aes(fill=Zone), stroke=0.2, shape=21) +
  #geom_point(data=scrs, size=4, aes(fill=Zone,colour=Zone)) +      # sample scores
  scale_fill_manual(values=zonefill)+
  scale_colour_manual(values=zonefill)+
  #scale_shape_manual(values=c(21))+
  theme(axis.text.x = element_text(size=18,colour="black"),
        axis.text.y = element_text(size=18,colour="black"),
        axis.title.y = element_text(size=18, vjust=3, face="bold"),
        axis.title.x = element_text(size=18,vjust = -2, face="bold"),
        panel.grid = element_blank(),
        axis.line = element_line(colour = "black", linewidth = 0.4, linetype = "solid"),
        panel.background = element_rect(fill='transparent'), #transparent panel bg
        plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
        panel.grid.major = element_blank(), #remove major gridlines
        legend.background = element_rect(fill='transparent'), #transparent legend bg
        legend.box.background = element_rect(fill='transparent'),
         plot.margin =  margin(20,20,30,20),
        panel.grid.minor = element_blank(), #remove minor gridlines
        panel.border = element_blank())  +
  geom_text(label=scrs$eDNA_Pair,
    check_overlap = T)

ggsave(filename = "ggborders1.png", plot = last_plot(), bg='transparent', width = 34, height = 18, units = "cm", dpi = 600)

# #obtain surf data
# species.scores1 <- as.data.frame(scores(nMDS, "species"))
# species.scores1$species <- rownames(species.scores1)
# names(species.scores1)[c(1, 2)] <- c("x", "y")
# species.scores1$z <- NA
# 
# head(species.scores1)

newsf <- ordisurf(nMDS ~ as.numeric(samples_paired$Latitude), plot = FALSE, scaling = 3)
head(newsf)

## $coefficients
## (Intercept)  s(x1,x2).1  s(x1,x2).2  s(x1,x2).3  s(x1,x2).4  s(x1,x2).5 
## -54.2160062   3.3105151   3.4175740  -2.5537027   0.7469594   1.6357877 
##  s(x1,x2).6  s(x1,x2).7  s(x1,x2).8  s(x1,x2).9 
##  -0.7631287  -4.4193811  -1.4355797   3.0602435 
## 
## $residuals
##            1            2            3            4            5            6 
##   2.73373948  12.31132221   3.80505891   5.26650986   7.32699990   1.62833176 
##            7            8            9           10           11           12 
##  -0.33713747   3.27715463   0.07237837   3.16888039   1.03285881   0.78393704 
##           13           14           15           16           17           18 
##   0.74185857   1.93967144  -1.63873930   5.98502696   3.35308168   2.09012973 
##           19           20           21           22           23           24 
##   1.54824317   4.25622029   0.31466661   6.69537612   5.44395990   2.23598993 
##           25           26           27           28           29           30 
##   1.71665024   2.41908146   3.64889197   2.14559315   0.44016165   2.07047101 
##           31           32           33           34           35           36 
##   1.03842776   1.81790937   4.38390540   1.05545414   0.77976891  -0.12823240 
##           37           38           39           40           41           42 
##  -0.44859529  -0.93452281  -1.23944470  -2.25983636  -2.27657674  -0.71856240 
##           43           44           45           46           47           48 
##  -2.06161614   1.32399152  -4.87858496  -3.81828066  -2.84863759  -0.28889367 
##           49           50           51           52           53           54 
##   1.89819925   1.05809576   0.74421234  -9.67205533  -4.87599303  -2.31656097 
##           55           56           57           58           59           60 
##  -4.96118410  -6.21158311  -4.62550067  -5.03573498  -5.47235844  -0.76086232 
##           61           62           63           64           65 
##  -2.40627262  -0.45946916  -8.94992402 -11.88724102 -11.03980943 
## 
## $fitted.values
##         1         2         3         4         5         6         7         8 
## -45.61564 -55.19342 -47.93546 -49.61751 -51.85810 -46.79883 -45.78326 -49.55545 
##         9        10        11        12        13        14        15        16 
## -46.46158 -49.73908 -48.29426 -48.23444 -48.54226 -50.15997 -46.74246 -55.12273 
##        17        18        19        20        21        22        23        24 
## -52.64078 -51.58063 -51.65524 -54.40722 -50.75437 -57.61318 -56.73296 -53.64649 
##        25        26        27        28        29        30        31        32 
## -53.30695 -54.44398 -56.19749 -55.62189 -53.67056 -55.35747 -54.53533 -55.93841 
##        33        34        35        36        37        38        39        40 
## -58.64361 -55.48565 -55.33987 -55.03277 -54.82140 -54.56408 -54.68236 -53.94776 
##        41        42        43        44        45        46        47        48 
## -54.14022 -55.85194 -54.90478 -58.58449 -52.45462 -54.58252 -55.30146 -58.02751 
##        49        50        51        52        53        54        55        56 
## -60.30900 -60.05820 -59.88511 -49.75834 -54.87191 -58.07374 -55.91902 -55.19892 
##        57        58        59        60        61        62        63        64 
## -57.76510 -57.53467 -58.10924 -63.51534 -61.90643 -63.96133 -57.21098 -54.44316 
##        65 
## -55.39749 
## 
## $family
## 
## Family: gaussian 
## Link function: identity 
## 
## 
## $linear.predictors
##         1         2         3         4         5         6         7         8 
## -45.61564 -55.19342 -47.93546 -49.61751 -51.85810 -46.79883 -45.78326 -49.55545 
##         9        10        11        12        13        14        15        16 
## -46.46158 -49.73908 -48.29426 -48.23444 -48.54226 -50.15997 -46.74246 -55.12273 
##        17        18        19        20        21        22        23        24 
## -52.64078 -51.58063 -51.65524 -54.40722 -50.75437 -57.61318 -56.73296 -53.64649 
##        25        26        27        28        29        30        31        32 
## -53.30695 -54.44398 -56.19749 -55.62189 -53.67056 -55.35747 -54.53533 -55.93841 
##        33        34        35        36        37        38        39        40 
## -58.64361 -55.48565 -55.33987 -55.03277 -54.82140 -54.56408 -54.68236 -53.94776 
##        41        42        43        44        45        46        47        48 
## -54.14022 -55.85194 -54.90478 -58.58449 -52.45462 -54.58252 -55.30146 -58.02751 
##        49        50        51        52        53        54        55        56 
## -60.30900 -60.05820 -59.88511 -49.75834 -54.87191 -58.07374 -55.91902 -55.19892 
##        57        58        59        60        61        62        63        64 
## -57.76510 -57.53467 -58.10924 -63.51534 -61.90643 -63.96133 -57.21098 -54.44316 
##        65 
## -55.39749 
## 
## $deviance
## [1] 1178.947

extract.xyz <- function(obj) {
  xy <- expand.grid(x = obj$grid$x, y = obj$grid$y)
  xyz <- cbind(xy, c(obj$grid$z))
  names(xyz) <- c("x", "y", "z")
  return(xyz)
}

contour.vals1 <- extract.xyz(obj = newsf)
head(contour.vals1)

##            x          y  z
## 1 -0.5785276 -0.4212773 NA
## 2 -0.5519046 -0.4212773 NA
## 3 -0.5252815 -0.4212773 NA
## 4 -0.4986585 -0.4212773 NA
## 5 -0.4720354 -0.4212773 NA
## 6 -0.4454123 -0.4212773 NA

p1 <- ggplot(data = contour.vals1, aes(x, y, z = z)) + 
  stat_contour(aes(colour = ..level..)) + 
  coord_cartesian(xlim = c(-0.6, 0.2), ylim = c(-0.4, 0.2)) + 
  #scale_shape_manual(values=c(21))+
  theme(axis.text.x = element_text(size=14,colour="black"),
        axis.text.y = element_text(size=14,colour="black"),
        axis.title =  element_text(size=18,colour="black"),
        panel.grid = element_blank(),
        axis.line = element_line(colour = "black", linewidth = 0.4, linetype = "solid"),
        panel.background = element_rect(fill='transparent'), #transparent panel bg
        plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
        panel.grid.major = element_blank(), #remove major gridlines
        legend.background = element_rect(fill='transparent'), #transparent legend bg
        legend.box.background = element_rect(fill='transparent'),
        panel.grid.minor = element_blank(), #remove minor gridlines
        panel.border = element_blank())
p1

ggsave(filename = "ggsurf.png", plot = last_plot(), bg='transparent', width = 30, height = 18, units = "cm", dpi = 600)

#overlay in photoshop

# spcs_wide<-pivot_wider(spcs_merged, names_from = eDNA_Pair, values_from = Reads, values_fill = 0)
# adonis(Gcomm2 ~ Lat5, data = Gdat2, permutations = 10000)
#make jaccard matrix

3 CPR

3.1 Overview of CPR detections

#raw CPR and metazoan only
CPR<-as.data.frame(read_excel("eDNA_CPR.xlsx",sheet="CPR"))
#total counts
sum(CPR$Sum)

## [1] 55465

#Metazoan/non-metazoan counts/abundance
sum(CPR$Sum[CPR$Subkingdom!="Metazoan"])

## [1] 7183

sum(CPR$Sum[CPR$Subkingdom!="Metazoan"])/sum(CPR$Sum)

## [1] 0.1295051

sum(CPR$Sum[CPR$Subkingdom=="Metazoan"])/sum(CPR$Sum)

## [1] 0.8704949

#drop to metazoan taxa
CPR<-droplevels(subset(CPR, Subkingdom=="Metazoan"))
#Metazoan assignments
table(CPR$Assignment)

## 
##   Class  Family   Genus   Order   Other  Phylum Species 
##       4       4      15       5       1       2      44

#Phyla and classes that were dominant (abundance)
CPR %>% select(Phylum, Class, Sum) %>% group_by(Phylum, Class) %>% summarize_if(is.numeric, sum, na.rm=TRUE) #broad overview

## # A tibble: 13 × 3
## # Groups:   Phylum [6]
##    Phylum       Class          Sum
##    <chr>        <chr>        <dbl>
##  1 Annelida     Polychaeta     113
##  2 Arthropoda   Copepoda     46478
##  3 Arthropoda   Decapoda        19
##  4 Arthropoda   Malacostraca   915
##  5 Arthropoda   Ostracoda       60
##  6 Chaetognatha Sagittoidea     36
##  7 Chaetognatha dropped        184
##  8 Chordata     Thaliacea      159
##  9 Chordata     dropped          2
## 10 Cnidaria     Hydrozoa        55
## 11 Cnidaria     dropped          7
## 12 Mollusca     Cephalopoda      1
## 13 Mollusca     Gastropoda     253

#CPR dominanted by Arthropod-specifically copepod in terms of abundance

#how many individuals were assigned to species level?
sum(CPR$Sum[CPR$Assignment=="Species"])

## [1] 28441

sum(CPR$Sum[CPR$Assignment=="Species"])/sum(CPR$Sum)

## [1] 0.5890601

#Drop to species level
CPRspecies<-droplevels(subset(CPR, Assignment=="Species"))
#what species were dominant?
CPRspecies %>% select(Phylum, Class, Family, Species, Sum) %>% group_by(Phylum, Class, Family, Species) %>% summarize_if(is.numeric, sum, na.rm=TRUE) %>% print(n=32) #what species were dominant

## # A tibble: 32 × 5
## # Groups:   Phylum, Class, Family [20]
##    Phylum       Class        Family            Species                      Sum
##    <chr>        <chr>        <chr>             <chr>                      <dbl>
##  1 Annelida     Polychaeta   Lopadorrhynchidae Pelagobia longicirrata        53
##  2 Annelida     Polychaeta   Tomopteridae      Tomopteris carpenteri          1
##  3 Arthropoda   Copepoda     Augaptilidae      Haloptilus oxycephalus        41
##  4 Arthropoda   Copepoda     Calanidae         Calanoides acutus           1049
##  5 Arthropoda   Copepoda     Calanidae         Calanus simillimus           608
##  6 Arthropoda   Copepoda     Calanidae         Neocalanus tonsus           2200
##  7 Arthropoda   Copepoda     Clausocalanidae   Clausocalanus brevipes      3062
##  8 Arthropoda   Copepoda     Clausocalanidae   Clausocalanus laticeps       399
##  9 Arthropoda   Copepoda     Euchaetidae       Paraeuchaeta exigua            1
## 10 Arthropoda   Copepoda     Metridinidae      Metridia lucens              754
## 11 Arthropoda   Copepoda     Metridinidae      Pleuromamma borealis          24
## 12 Arthropoda   Copepoda     Metridinidae      Pleuromamma piseki             6
## 13 Arthropoda   Copepoda     Metridinidae      Pleuromamma robusta            3
## 14 Arthropoda   Copepoda     Oithonidae        Oithona similis            18788
## 15 Arthropoda   Copepoda     Paracalanidae     Mecynocera clausi             20
## 16 Arthropoda   Copepoda     Rhincalanidae     Rhincalanus gigas            596
## 17 Arthropoda   Copepoda     Subeucalanidae    Subeucalanus longiceps         3
## 18 Arthropoda   Malacostraca Euphausiidae      Euphausia frigida              4
## 19 Arthropoda   Malacostraca Euphausiidae      Euphausia spinifera            4
## 20 Arthropoda   Malacostraca Euphausiidae      Euphausia triacantha           8
## 21 Arthropoda   Malacostraca Euphausiidae      Euphausia vallentini          26
## 22 Arthropoda   Malacostraca Euphausiidae      Nematoscelis megalops         11
## 23 Arthropoda   Malacostraca Euphausiidae      Thysanoessa gregaria          70
## 24 Arthropoda   Malacostraca Euphausiidae      Thysanoessa macrura          512
## 25 Arthropoda   Malacostraca Hyperiidae        Themisto gaudichaudii          8
## 26 Arthropoda   Malacostraca Phrosinidae       Primno macropa                 8
## 27 Chaetognatha Sagittoidea  Eukrohniidae      Eukrohnia hamata              32
## 28 Chaetognatha Sagittoidea  Sagittidae        Pseudosagitta gazellae         1
## 29 Chordata     Thaliacea    Salpidae          Salpa thompsoni              145
## 30 Cnidaria     Hydrozoa     Solmundaeginidae  Solmundella bitentaculata      2
## 31 Mollusca     Gastropoda   Clionidae         Clione limacina antarctica     1
## 32 Mollusca     Gastropoda   Pneumodermatidae  Spongiobranchaea australis     1

#arthopod again at species level, highest abundance and number of unique species

#what % of abundance was Arthropod
sum(CPRspecies$Sum[CPRspecies$Phylum=="Arthropoda"])/sum(CPRspecies$Sum)

## [1] 0.9917021

#number of individuals?
sum(CPRspecies$Sum[CPRspecies$Phylum=="Arthropoda"])

## [1] 28205

#oithona similis is dominant>by how much?
sum(CPRspecies$Sum[CPRspecies$Species=="Oithona similis"])/sum(CPRspecies$Sum)

## [1] 0.6605956

4 Tidy workspace

ls()

##  [1] "cent"           "contour.vals1"  "CPR"            "CPR_eDNAvals"  
##  [5] "CPR_eDNAvals2"  "CPRspecies"     "diversity"      "eDNAdata"      
##  [9] "eDNAdata_wide"  "eDNAmerged"     "eDNAsamples"    "eulerfill"     
## [13] "extract.xyz"    "fill12"         "fill13"         "hill"          
## [17] "hill0"          "hill1"          "hill1lm"        "hill2"         
## [21] "hillplot"       "hillsummstat"   "indic"          "Invasives"     
## [25] "jaccard"        "Lat5"           "legend"         "MetazoanFill"  
## [29] "methodfill"     "newsf"          "nMDS"           "p1"            
## [33] "PA_matrix"      "rawfill2"       "samples"        "samples_paired"
## [37] "scrs"           "segs"           "SeqDep1"        "SeqDepth"      
## [41] "spcs_merged"    "spcs_wide"      "species"        "species_long"  
## [45] "violin.plot"    "violin.plot2"   "wilcoxdat"      "zonefill"

#remove all besides eDNAdata, eDNAdata_wide, fill12, samples, CPR_eDNAvals, CPRspecies, species_long
rm("Amphinema","Arach", "Aster", "Austrominius", "Bougainvillia", "BugFlab","Clytia", "Coryne", "Cryp", "eDNAsamples", "eDNAdata", "eDNAshape", "exc_sha", "hill12", "hill2", "hills", "hillsplot", "hillsummstat", "Invasives", "InvasivesCol", "InvasivesComparison", "InvasiveSummary", "InvasivesVolume", "InvasivesVolumeZone", "Lizzia", "long_raw", "LongInvasives", "matchedsamples", "Metacarcinus", "mysamples", "Mytilus", "Obelia", "Ostrea", "Patiriella", "plothill_0", "plothill_1", "plothill_2", "raweDNA", "rawfill", "rawsummary", "Richness", "species", "species12", "species2", "SpeciesFreq", "speciesmerged", "standard_error", "ttestinv", "venn_plot", "Venn12", "Venn2", "vennfill", "violin.plot", "Water")

#CPR vs eDNA merged

5 CPR vs eDNA

5.1 Pre-match: Species beyond CPR limits (sea-ice)

#species detected past the sea-ice where CPR cannot be cast
seaiceeDNA<-droplevels(subset(species_long, Zone=="AAZ" | Zone=="SACCZ"))
seaiceeDNA<-droplevels(subset(seaiceeDNA, CPRSubset!="match"))
seaiceeDNA<-seaiceeDNA[!seaiceeDNA$Species %in% CPRspecies$Species,] 

length(unique(seaiceeDNA$Species))

## [1] 29

unique(seaiceeDNA$Species)

##  [1] "Ctenocalanus citer"              "Calanus propinquus"             
##  [3] "Obelia dichotoma"                "Calanus propinquus/similimus"   
##  [5] "Ostrea angasi"                   "Coryne eximia"                  
##  [7] "Bugulina flabellata"             "Stephos longipes"               
##  [9] "Asterias amurensis"              "Limacina retroversa"            
## [11] "Bougainvillia muscus"            "Cryptosula pallasiana"          
## [13] "Clione antarctica"               "Krefftichthys anderssoni"       
## [15] "Euphausia superba"               "Vibilia antarctica"             
## [17] "Watersipora subtorquata/subatra" "Arachnopusia unicornis"         
## [19] "Clytia gracilis"                 "Halichondria panicea"           
## [21] "Austrominius modestus"           "Lizzia blondina"                
## [23] "Metridia gerlachei"              "Amphinema dinema"               
## [25] "Polyeunoa laevis"                "Sterechinus neumayeri"          
## [27] "Harmothoe fuligineum"            "Parborlasia corrugatus"         
## [29] "Paralabidocera grandispina"

5.2 Format data

#sample data and paired samples
matchedsamples<-droplevels(subset(samples, CPRSubset=="match"))
eDNAsamples<-droplevels(subset(matchedsamples, Method=="eDNA"))

#subset eDNA data and remove empty rows
eDNAdata<-eDNAdata_wide %>% select(-Comment, -PercentID) %>% group_by(zOTU, Subkingdom, Phylum, Class, Order, Family, Genus, Species, Assignment) %>% summarize_if(is.numeric, sum, na.rm=TRUE)
eDNAdata<-cbind.data.frame(eDNAdata[,1:9], eDNAdata[as.character(eDNAsamples$Sample)])
eDNAdata<-eDNAdata[rowSums(eDNAdata[,10:91])!=0,]

#12L and 2L
data12<-cbind.data.frame(eDNAdata[,1:9], eDNAdata[as.character(eDNAsamples$Sample[eDNAsamples$Type=="12L"])])
data12<-data12[rowSums(data12[,10:50])!=0,]
data2<-cbind.data.frame(eDNAdata[,1:9], eDNAdata[as.character(eDNAsamples$Sample[eDNAsamples$Type=="2L"])])
data2<-data2[rowSums(data2[,10:50])!=0,]

#add in sample data
eDNAlong<-eDNAdata %>% select(-zOTU) %>% group_by(Subkingdom, Phylum, Class, Order, Family, Genus, Species, Assignment) %>% summarize_if(is.numeric, sum, na.rm=TRUE)
eDNAlong<-pivot_longer(eDNAlong, cols = starts_with("V"), names_to = "Sample", values_to = "Abundance")
eDNAlong<-eDNAlong[eDNAlong$Abundance!=0,]
eDNAlong<-merge(eDNAlong, eDNAsamples, all=TRUE) 

#subset CPR data and remove empty rows
CPR<-as.data.frame(cbind(CPR[,c(1:8)], CPR[as.character(matchedsamples$Sample[matchedsamples$Method=="CPR"])]))
CPR<-CPR[rowSums(CPR[,9:49])!=0,]

#rename order "Amphipoda (Hyperiidea)"
CPR$Order<-recode_factor(CPR$Order, "Amphipoda (Hyperiidea)"="Amphipoda")
CPR<-CPR %>% group_by(Subkingdom, Phylum, Class, Order, Family, Genus, Species, Assignment) %>% summarize_if(is.numeric, sum, na.rm=TRUE)

#long format CPR and combine with sample data
CPRlong<-pivot_longer(CPR, cols = 9:49, names_to = "Sample", values_to = "Abundance")
CPRlong<-CPRlong[CPRlong$Abundance!=0,]
CPRlong<-merge(matchedsamples, CPRlong)

#merge all
LongData<-merge(eDNAlong, CPRlong, all=TRUE)

5.3 Diversity

5.3.1 Overview

#Zotu's 12L and 2L
length(unique(data12$zOTU))

## [1] 469

length(unique(data2$zOTU))

## [1] 151

#reads comparison
sum(eDNAlong$Abundance[eDNAlong$Type=="12L"])

## [1] 682136

sum(eDNAlong$Abundance[eDNAlong$Type=="12L"])/sum(eDNAlong$Abundance)

## [1] 0.9156434

sum(eDNAlong$Abundance[eDNAlong$Type=="2L"])

## [1] 62844

sum(eDNAlong$Abundance[eDNAlong$Type=="2L"])/sum(eDNAlong$Abundance)

## [1] 0.08435663

rm(eDNAdata_wide)

#abundance of CPR
sum(CPRlong$Abundance)

## [1] 5190

#Assignments: number of zotus assigned to taxonomic levels
table(data12$Assignment)

## 
##  Family   Genus   Order   Phyla Species 
##      46      40       6       2     375

(375/469)*100 #79.96% of zotus to species level

## [1] 79.95736

table(data2$Assignment)

## 
##   Class  Family   Genus   Order   Phyla Species 
##       1       9      16       2       4     119

(119/151)*100 #78.81% of zotus to species level

## [1] 78.80795

#number of individuals assigned to species level for CPR
sum(CPRlong$Abundance[CPRlong$Assignment=="Species"])/sum(CPRlong$Abundance)

## [1] 0.5884393

#Unique assignments
#remove zotu from eDNA first
data12<-data12 %>% select(-zOTU) %>% group_by(Subkingdom, Phylum, Class, Order, Family, Genus, Species, Assignment) %>% summarize_if(is.numeric, sum, na.rm=TRUE)
data2<-data2 %>% select(-zOTU) %>% group_by(Subkingdom, Phylum, Class, Order, Family, Genus, Species, Assignment) %>% summarize_if(is.numeric, sum, na.rm=TRUE)

table(data12$Assignment)

## 
##  Family   Genus   Order   Phyla Species 
##       7       8       6       2      35

table(data2$Assignment)

## 
##   Class  Family   Genus   Order   Phyla Species 
##       1       5       7       2       4      29

table(CPR$Assignment)

## 
##   Class  Family   Genus   Order  Phylum Species 
##       3       2      11       4       1      18

#Phyla and classes that were dominant (abundance)
CPRlong %>% select(Phylum, Class, Abundance) %>% group_by(Phylum, Class) %>% summarize_if(is.numeric, sum, na.rm=TRUE) #broad overview

## # A tibble: 10 × 3
## # Groups:   Phylum [6]
##    Phylum       Class        Abundance
##    <chr>        <chr>            <dbl>
##  1 Annelida     Polychaeta          12
##  2 Arthropoda   Copepoda          4994
##  3 Arthropoda   Decapoda             2
##  4 Arthropoda   Malacostraca        93
##  5 Arthropoda   Ostracoda           10
##  6 Chaetognatha Sagittoidea          9
##  7 Chaetognatha dropped             21
##  8 Chordata     Thaliacea            9
##  9 Cnidaria     Hydrozoa            13
## 10 Mollusca     Gastropoda          27

#CPR again dominanted by Arthropod-specifically copepod in terms of abundance
#reads for eDNA
eDNAlong %>% select(Type, Phylum, Class, Abundance) %>% group_by(Type, Phylum, Class) %>% summarize_if(is.numeric, sum, na.rm=TRUE) %>% print(n=40)

## # A tibble: 25 × 4
## # Groups:   Type, Phylum [19]
##    Type  Phylum        Class        Abundance
##    <chr> <chr>         <chr>            <dbl>
##  1 12L   Arthropoda    Branchiopoda         3
##  2 12L   Arthropoda    Copepoda        648225
##  3 12L   Arthropoda    Malacostraca      1246
##  4 12L   Bryozoa       Gymnolaemata       173
##  5 12L   Chaetognatha  Sagittoidea        272
##  6 12L   Chordata      Actinopteri      28438
##  7 12L   Cnidaria      Anthozoa            25
##  8 12L   Cnidaria      Hydrozoa           209
##  9 12L   Ctenophora    dropped              8
## 10 12L   Echinodermata Asteroidea           3
## 11 12L   Mollusca      Gastropoda        3526
## 12 12L   Porifera      Demospongiae         1
## 13 12L   Rotifera      Eurotatoria          7
## 14 2L    Annelida      Polychaeta          23
## 15 2L    Arthropoda    Copepoda         57347
## 16 2L    Arthropoda    Malacostraca       438
## 17 2L    Bryozoa       Gymnolaemata       439
## 18 2L    Chaetognatha  Sagittoidea        366
## 19 2L    Chordata      Actinopteri       2560
## 20 2L    Cnidaria      Hydrozoa           642
## 21 2L    Ctenophora    dropped             76
## 22 2L    Echinodermata Asteroidea           1
## 23 2L    Mollusca      Bivalvia             4
## 24 2L    Mollusca      Gastropoda         947
## 25 2L    Mollusca      dropped              1

#also dominated by copepod

5.3.2 Species comparison

5.3.2.1 Data formatting

length(unique(data12$Species[data12$Assignment=="Species"]))

## [1] 35

length(unique(data2$Species[data2$Assignment=="Species"]))

## [1] 29

length(unique(CPR$Species[CPR$Assignment=="Species"]))

## [1] 18

5.3.3 Shared and unique species

SpeciesSub<-droplevels(subset(LongData, Assignment=="Species"))
SpeciesSub<-SpeciesSub %>% select(Type, Phylum, Class, Order, Family, Genus, Species, Abundance) %>% group_by(Type, Phylum, Class, Order, Family, Genus, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)

#drop to species level
species12<-droplevels(subset(data12, Assignment=="Species")) %>% group_by(Phylum, Class, Order, Family, Genus, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
species12$Abundance<-rowSums(species12[,7:47])
species2<-droplevels(subset(data2, Assignment=="Species")) %>% group_by(Phylum, Class, Order, Family, Genus, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
species2$Abundance<-rowSums(species2[,7:47])
CPRspecies<-droplevels(subset(CPR, Assignment=="Species"))
CPRspecies$Abundance<-rowSums(CPRspecies[,9:49])
CPRspecies<-CPRspecies %>% group_by(Phylum, Class, Order, Family, Genus, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)

#subset these for merging (eg get rid of samples)
CPRsubset<-CPRspecies %>% select(Phylum, Class, Order, Family, Genus, Species)
subset12<-species12 %>% select(Phylum, Class, Order, Family, Genus, Species)
subset2<-species2 %>% select(Phylum, Class, Order, Family, Genus, Species)
CPRsubset$Detection<-"Null"
subset12$Detection<-"Null"
subset2$Detection<-"Null"

#create exclusive shared data frame with those in common
Exc_Share<-merge(CPRsubset, subset12) %>% merge(subset2)
Exc_Share$Detection<-"Shared"

#add CPR exclusive
Exc_Share<-merge(Exc_Share, CPRsubset[!CPRsubset$Species %in% c(subset2$Species, subset12$Species),], all=TRUE) 
Exc_Share$Detection<-dplyr::recode_factor(Exc_Share$Detection, Null="CPR exclusive")

#12 only
Exc_Share<-merge(Exc_Share, subset12[!subset12$Species %in% c(subset2$Species, CPRsubset$Species),], all=TRUE) 
Exc_Share$Detection<-dplyr::recode_factor(Exc_Share$Detection, Null="LargeVF- eDNA")
#2 only
Exc_Share<-merge(Exc_Share, subset2[!subset2$Species %in% c(subset12$Species, CPRsubset$Species),], all=TRUE) 
Exc_Share$Detection<-dplyr::recode_factor(Exc_Share$Detection, Null="SmallVF- eDNA")
#CPR & 12
Exc_Share1<-list(CPRsubset, subset12) %>% reduce(semi_join, by="Species") %>% anti_join(.,subset2, by="Species")
Exc_Share1$Detection<-dplyr::recode_factor(Exc_Share1$Detection, Null="CPR & LargeVF-eDNA")
Exc_Share<-merge(Exc_Share, Exc_Share1, all=TRUE)
#CPR & 2
Exc_Share1<-list(CPRsubset, subset2) %>% reduce(semi_join, by="Species") %>% anti_join(.,subset12, by="Species")
Exc_Share1$Detection<-dplyr::recode_factor(Exc_Share1$Detection, Null="CPR & SmallVF-eDNA")
Exc_Share<-merge(Exc_Share, Exc_Share1, all=TRUE)
#eDNA only
Exc_Share1<-list(subset12, subset2) %>% reduce(semi_join, by="Species") %>% anti_join(.,CPRsubset, by="Species")
Exc_Share1$Detection<-dplyr::recode_factor(Exc_Share1$Detection, Null="eDNA exclusive")
#merge for the final time
Exc_Share<-merge(Exc_Share, Exc_Share1, all=TRUE)
Exc_Share$Detection<-factor(Exc_Share$Detection, levels = c("Shared", "eDNA exclusive","LargeVF- eDNA", "SmallVF- eDNA", "CPR exclusive", "CPR & SmallVF-eDNA","CPR & LargeVF-eDNA"))
Exc_Share$PA<-1
rm(Exc_Share1) #remove df

#table
with(Exc_Share, table(Phylum, Detection))

##                Detection
## Phylum          Shared eDNA exclusive LargeVF- eDNA SmallVF- eDNA CPR exclusive
##   Annelida           0              0             0             0             2
##   Arthropoda        10              5             4             0             3
##   Bryozoa            0              3             0             0             0
##   Chaetognatha       1              0             0             0             0
##   Chordata           0              2             3             0             1
##   Cnidaria           0              4             0             1             0
##   Echinodermata      0              1             0             0             0
##   Mollusca           0              1             0             1             0
##                Detection
## Phylum          CPR & SmallVF-eDNA CPR & LargeVF-eDNA
##   Annelida                       0                  0
##   Arthropoda                     0                  1
##   Bryozoa                        0                  0
##   Chaetognatha                   0                  0
##   Chordata                       0                  0
##   Cnidaria                       0                  0
##   Echinodermata                  0                  0
##   Mollusca                       0                  0

5.3.3.1 Bargraph

#ggplot
ggplot(data=Exc_Share, mapping = aes(fill=Phylum, y=PA, x=Detection))+ 
  geom_bar(stat="identity",width = 0.95) + scale_fill_manual(values = fill12) +
  theme(axis.line = element_line(colour = "black", size = 0.4, linetype = "solid"),
        axis.ticks = element_line(colour = "black", size = 0.4, linetype = "solid"), 
        axis.text.x = element_text(size=11,colour="black"),
        axis.text.y = element_text(size=18, vjust = 0.5, hjust=1,colour="black"),
        axis.title.y = element_text(size=15, vjust=3, face="bold"),
        axis.title.x = element_text(size=15,vjust = -2, face="bold"),
        legend.text = element_text(size =9.5), legend.title = element_text(size=13, face="bold"),
        plot.margin =  margin(20,20,30,20)) +
  theme(legend.key.height = unit(0.2, 'cm'), legend.key.width = unit(0.3, 'cm')) +
  scale_x_discrete(expand=c(0,0),labels=function(x) stringr::str_wrap(x, width = 5))+
  scale_y_continuous(expand = c(0, 0))+
  guides(fill=guide_legend(byrow = TRUE, title="Phylum"))+
  labs(x="Detection method", y="Number of unique species")

#ggsave(filename = "CPR_eDNA_excshare.jpeg", plot = last_plot(),  width = 22, height = 14, units = "cm", dpi = 600)

5.3.3.2 Euler plot

plot(euler(c("LargeVF" = 7, "SmallVF-eDNA"= 2, "CPR"= 6, "LargeVF&SmallVF-eDNA"=16, "LargeVF&CPR"=1, "SmallVF-eDNA&CPR"=0, "LargeVF&SmallVF-eDNA&CPR"= 11), shape = "ellipse"), 
     fill = eulerfill, alpha = 0.7, legend = TRUE, quantities = list(type = c("counts", "percent"), font=3, round=2, cex=0.8), labels=list(font=2, cex=1), c("#332288", "#44AA99", "#C76308"), alpha = 0.3, edges=list(lty = 0))

## Spatial trends ### Format data

#presence absence format: CPR
CPRPA<-CPRspecies[,7:47]
CPRPA[CPRPA>0]<-1
rownames(CPRPA)<-CPRspecies$Species

#presence absence format: LargeVF
PA12<-species12[,7:47]
PA12[PA12>0]<-1
rownames(PA12)<-species12$Species

#presence absence format: SmallVF-eDNA
PA2<-species2[,7:47]
PA2[PA2>0]<-1
rownames(PA2)<-species2$Species

5.3.4 Heatmap

#format data
heatmap<-merge(CPRlong, eDNAlong, all=TRUE)
heatmap<-droplevels(subset(heatmap, Assignment=="Species")) #spcs only
heatmap<-heatmap %>% select(Sample, Latitude, Type, Phylum, Class, Order, Family, Genus, Species, Abundance) %>% group_by(Sample, Latitude, Type, Phylum, Class, Order, Family, Genus, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
heatmap$FOO[heatmap$Abundance>0]<-1 #FOO column
heatmap$shortspcs<-spCodes(heatmap$Species, nchar.gen = 1, nchar.sp = 20,nchar.ssp = 20, sep.spcode = " ") #short names for plotting

## OK - no duplicated spcodes found.

heatmap$Lat<-format(round(as.numeric(heatmap$Latitude), 3), nsmall=3) #shorten latitude for plotting

#plot
ggplot(data=heatmap, mapping = aes(x=Lat, y=shortspcs, fill=FOO))+
  geom_tile(colour="grey")+xlab(label="Sample")+
  theme_bw()+
  scale_fill_gradient(low="grey", high="blue")+
  facet_grid(~Type, scales = "free_x", space = "free_x")+ 
  theme(legend.position="none",strip.text.x = element_text(size=16, face="bold"), 
        axis.title.x = element_text(vjust = -3, size=12, face="bold"), 
        panel.background = element_rect(fill="grey"),
        panel.grid.major = element_blank(), axis.ticks.x = element_line(linewidth=0.4),
        axis.title.y = element_text(size=12, face="bold", colour = "black"), 
        axis.text.x = element_text(angle = 90, size=6, colour="black"),
        axis.ticks.y = element_line(linewidth=0.4), plot.margin = margin(10,10,20,20), 
        axis.text.y = element_text(size=9, face="italic", colour="black"))+
  labs(x="Paired eDNA and CPR samples (Latitude)", y="Species detected")

#as table
with(heatmap, table(Species, Type, FOO))

## , , FOO = 1
## 
##                                  Type
## Species                           12L 2L CPR
##   Arachnopusia unicornis            2  2   0
##   Asterias amurensis                2  1   0
##   Bougainvillia muscus              4 19   0
##   Bugulina flabellata              10 20   0
##   Calanoides acutus                26 17  21
##   Calanus propinquus               22 15   0
##   Calanus propinquus/similimus     32 31   0
##   Calanus simillimus                0  0  20
##   Clausocalanus brevipes           13  4  37
##   Clausocalanus laticeps           31 19  23
##   Clausocalanus pergens             4  2   0
##   Corymorpha bigelowi               0  1   0
##   Coryne eximia                    27 31   0
##   Ctenocalanus citer               40 39   0
##   Electrona antarctica              2  0   0
##   Eukrohnia hamata                  5  6   6
##   Euphausia vallentini              6  5   2
##   Evadne nordmanni                  1  0   0
##   Gymnoscopelus bolini              1  0   0
##   Gymnoscopelus fraseri             8  1   0
##   Haloptilus oxycephalus            0  0   2
##   Krefftichthys anderssoni          3  2   0
##   Limacina retroversa              26 20   0
##   Lizzia blondina                   2  1   0
##   Lucicutia flavicornis             1  0   0
##   Mecynocera clausi                 0  0   1
##   Metridia gerlachei                1  0   0
##   Metridia lucens                  35 29  16
##   Neocalanus tonsus                19 11  11
##   Obelia dichotoma                  5  5   0
##   Oithona similis                  41 36  41
##   Ostrea angasi                     0  3   0
##   Paracalanus sp. B AC-2013         1  0   0
##   Pelagobia longicirrata            0  0   4
##   Platycephalus bassensis           1  0   0
##   Rhincalanus gigas                23 11  22
##   Salpa thompsoni                   0  0   3
##   Themisto gaudichaudii             2  0   1
##   Thysanoessa gregaria              2  2   7
##   Thysanoessa macrura               8  7  19
##   Tomopteris carpenteri             0  0   1
##   Vibilia antarctica                5  1   0
##   Watersipora subtorquata/subatra   1  1   0

#CPR had more occurences of C. brevipes and T. macrura

5.3.5 Richness across zones

#calculate hills numbers at q=0 (richness)
hill12<-cbind.data.frame(matchedsamples[matchedsamples$Type=="12L",], "hill0"=hill_div(PA12, qvalue = 0))
hill2<-cbind.data.frame(matchedsamples[matchedsamples$Type=="2L",], "hill0"=hill_div(PA2, qvalue = 0))
hillCPR<-cbind.data.frame(matchedsamples[matchedsamples$Type=="CPR",], "hill0"=hill_div(CPRPA, qvalue = 0))

#merge
hills<-merge(hill12, hill2, all=TRUE) %>% merge(hillCPR, all=TRUE)

#summary stats
hillsummstat<-summaryBy(hill0~Type*Zone,data=hills,FUN=function(x) 
  + {c(mean=mean(x),sum=sum(x),se=sd(x)/sqrt(length(x)))})
hillsummstat

##   Type Zone hill0.mean hill0.sum  hill0.se
## 1  12L  AAZ  11.750000       188 0.4873397
## 2  12L  PFZ   9.916667       119 0.4344682
## 3  12L  SAZ   8.076923       105 0.7378202
## 4   2L  AAZ  10.625000       170 0.5977388
## 5   2L  PFZ   7.833333        94 0.4578165
## 6   2L  SAZ   6.000000        78 0.6097498
## 7  CPR  AAZ   6.583333        79 0.6210735
## 8  CPR  PFZ   5.411765        92 0.6809316
## 9  CPR  SAZ   5.500000        66 0.3588703

#order zones
hillsummstat$Zone<-factor(hillsummstat$Zone, levels = c("SAZ", "PFZ", "AAZ"))
#rename methods for figure
hillsummstat$Type<-recode_factor(hillsummstat$Type, "12L"="LargeVF-eDNA", "2L"="SmallVF-eDNA")

#plot richness
ggplot(hillsummstat,aes(x=Zone,y=hill0.mean,fill=Type))+
  geom_bar(aes(x=Zone,y=hill0.mean), position = 'dodge',stat="identity") + 
  scale_fill_manual(values = CPR_eDNAvals2) +
  geom_errorbar(aes(x=Zone, ymin= hill0.mean-hill0.se, ymax= hill0.mean+hill0.se), position=position_dodge(0.9),width=0.4) +
  theme(axis.line = element_line(colour = "black", size = 0.2, linetype = "solid"),
        axis.ticks = element_line(colour = "black", size = 0.2, linetype = "solid"), 
        axis.text.x = element_text(size=18,vjust = 0.5, hjust=0.5, colour="black"),
        axis.text.y = element_text(size=18, vjust = 0.5, hjust=1,colour="black"),
        axis.title.y = element_text(face="bold", size=16, vjust=3),
        axis.title.x = element_text(face="bold",size=20,vjust = -2),
        legend.text = element_text(size = 14), legend.title = element_text(size=14, face="bold"), 
        plot.margin =  margin(20,10,20,20))+ 
  scale_y_continuous(expand = c(0, 0), limits = c(0,12.3))+ 
  scale_x_discrete(expand = c(0.25, 0))+
  guides(fill=guide_legend(title="Method"))+
  labs(x="SO zone", y="Mean species richness")

#ggsave(filename = "CPR_eDNA_meanrichness.jpeg", plot = last_plot(),  width = 18, height = 15, units = "cm", dpi = 600)

5.3.6 Diversity across zones

#select data and convert abundance to PA
species_long<-droplevels(subset(LongData, Assignment=="Species")) %>% group_by(Type, Zone, Phylum, Class, Order, Family, Genus, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
species_long$FOO[species_long$Abundance>0]<-1

#order zones
species_long$Zone<-factor(species_long$Zone, levels = c("SAZ", "PFZ", "AAZ"))

ggplot(data=species_long, aes(fill=Phylum, y=FOO, x=Type)) +
  geom_bar(stat="identity", width = 0.97) + 
  scale_fill_manual(values = fill12) +
  theme(axis.line = element_line(colour = "black", size = 0.4, linetype = "solid"),
        axis.ticks = element_line(colour = "black", size = 0.4, linetype = "solid"), 
        axis.text.x = element_text(size=10,vjust = 0.5, hjust=0.5, colour="black"),
        axis.text.y = element_text(size=16, vjust = 0.5, hjust=1,colour="black"),
        axis.title.y = element_text(face="bold", size=15, vjust=3),
        strip.text.x = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(face="bold", size=13,vjust = -2),
        legend.text = element_text(size = 9.5), legend.title = element_text(size=13, face="bold"),
        legend.key.height = unit(0.2, 'cm'), legend.key.width = unit(0.3, 'cm'),
        plot.margin =  margin(20,20,30,20)) + scale_y_continuous(expand = c(0, 0))+ 
  facet_grid(~Zone, switch = "x", scales = "free_x", space = "free_x")+ 
  theme(panel.spacing = unit(0.75, "cm"))+
  scale_x_discrete(labels=stringr::str_wrap(c("12L"="LargeVF", "2L"="SmallVF", "CPR"="CPR"),width=5), expand = c(0, 0))+
  guides(fill=guide_legend(byrow = TRUE, title="Phylum"))+
  labs(x="Detection methods across Southern Ocean zones", y="Species diversity")

#ggsave(filename = "CPR_eDNA_diversityzones.jpeg", plot = last_plot(),  width = 24, height = 14, units = "cm", dpi = 600)

#as table
with(species_long, table(Phylum, Zone, Type))

## , , Type = 12L
## 
##                Zone
## Phylum          SAZ PFZ AAZ
##   Annelida        0   0   0
##   Arthropoda     12  14  15
##   Bryozoa         1   2   2
##   Chaetognatha    1   1   1
##   Chordata        2   3   2
##   Cnidaria        4   3   3
##   Echinodermata   1   0   0
##   Mollusca        0   1   1
## 
## , , Type = 2L
## 
##                Zone
## Phylum          SAZ PFZ AAZ
##   Annelida        0   0   0
##   Arthropoda      9  11  12
##   Bryozoa         3   1   2
##   Chaetognatha    1   1   1
##   Chordata        1   0   1
##   Cnidaria        3   4   4
##   Echinodermata   0   0   1
##   Mollusca        1   2   1
## 
## , , Type = CPR
## 
##                Zone
## Phylum          SAZ PFZ AAZ
##   Annelida        0   1   2
##   Arthropoda      9  10   9
##   Bryozoa         0   0   0
##   Chaetognatha    1   1   1
##   Chordata        0   1   1
##   Cnidaria        0   0   0
##   Echinodermata   0   0   0
##   Mollusca        0   0   0

5.3.7 Indicator analyses

#12L
indic_12 <-  multipatt(t(PA12), matchedsamples$Zone[matchedsamples$Type=="12L"], func = "IndVal.g", duleg = FALSE, control = how(nperm=9999))
summary(indic_12, alpha=0.05, digits=3, too.many=100, indvalcomp=TRUE, sort=TRUE)

## 
##  Multilevel pattern analysis
##  ---------------------------
## 
##  Association function: IndVal.g
##  Significance level (alpha): 0.05
## 
##  Total number of species: 35
##  Selected number of species: 10 
##  Number of species associated to 1 group: 5 
##  Number of species associated to 2 groups: 5 
## 
##  List of species associated to each combination: 
## 
##  Group AAZ  #sps.  1 
##                       A   B  stat p.value    
## Thysanoessa macrura 1.0 0.5 0.707  0.0006 ***
## 
##  Group SAZ  #sps.  4 
##                             A      B  stat p.value    
## Neocalanus tonsus      0.6957 1.0000 0.834  0.0001 ***
## Gymnoscopelus fraseri  0.8660 0.5385 0.683  0.0011 ** 
## Clausocalanus brevipes 0.6347 0.6154 0.625  0.0433 *  
## Clausocalanus pergens  1.0000 0.3077 0.555  0.0136 *  
## 
##  Group AAZ+PFZ  #sps.  5 
##                                   A      B  stat p.value    
## Calanoides acutus            1.0000 0.9286 0.964  0.0001 ***
## Limacina retroversa          1.0000 0.9286 0.964  0.0001 ***
## Calanus propinquus/similimus 0.8667 1.0000 0.931  0.0001 ***
## Rhincalanus gigas            1.0000 0.8214 0.906  0.0001 ***
## Calanus propinquus           1.0000 0.7857 0.886  0.0001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#2L
indic_2 <-  multipatt(t(PA2), matchedsamples$Zone[matchedsamples$Type=="2L"], func = "IndVal.g", duleg = FALSE, control = how(nperm=9999))
summary(indic_2, alpha=0.05, digits=3, too.many=100, indvalcomp=TRUE, sort=TRUE)

## 
##  Multilevel pattern analysis
##  ---------------------------
## 
##  Association function: IndVal.g
##  Significance level (alpha): 0.05
## 
##  Total number of species: 29
##  Selected number of species: 10 
##  Number of species associated to 1 group: 5 
##  Number of species associated to 2 groups: 5 
## 
##  List of species associated to each combination: 
## 
##  Group AAZ  #sps.  3 
##                          A      B  stat p.value    
## Calanoides acutus   0.7778 0.8750 0.825  0.0001 ***
## Calanus propinquus  0.7500 0.7500 0.750  0.0006 ***
## Thysanoessa macrura 1.0000 0.4375 0.661  0.0016 ** 
## 
##  Group SAZ  #sps.  2 
##                             A      B  stat p.value    
## Neocalanus tonsus      0.8260 0.6923 0.756  0.0001 ***
## Clausocalanus brevipes 1.0000 0.3077 0.555  0.0120 *  
## 
##  Group AAZ+PFZ  #sps.  5 
##                                   A      B  stat p.value    
## Calanus propinquus/similimus 0.8966 1.0000 0.947  0.0001 ***
## Metridia lucens              0.8893 0.9286 0.909  0.0001 ***
## Limacina retroversa          1.0000 0.7143 0.845  0.0001 ***
## Clausocalanus laticeps       0.8871 0.6071 0.734  0.0143 *  
## Rhincalanus gigas            1.0000 0.3929 0.627  0.0153 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#CPR
indic_CPR <-  multipatt(t(CPRPA), matchedsamples$Zone[matchedsamples$Type=="CPR"], func = "IndVal.g", duleg = FALSE, control = how(nperm=9999))
summary(indic_CPR, alpha=0.05, digits=3, too.many=100, indvalcomp=TRUE, sort=TRUE)

## 
##  Multilevel pattern analysis
##  ---------------------------
## 
##  Association function: IndVal.g
##  Significance level (alpha): 0.05
## 
##  Total number of species: 18
##  Selected number of species: 6 
##  Number of species associated to 1 group: 2 
##  Number of species associated to 2 groups: 4 
## 
##  List of species associated to each combination: 
## 
##  Group SAZ  #sps.  2 
##                           A      B  stat p.value    
## Neocalanus tonsus    1.0000 0.9167 0.957  0.0001 ***
## Thysanoessa gregaria 1.0000 0.5833 0.764  0.0001 ***
## 
##  Group AAZ+PFZ  #sps.  4 
##                          A      B  stat p.value    
## Calanus simillimus  1.0000 0.6897 0.830  0.0001 ***
## Rhincalanus gigas   0.9475 0.7241 0.828  0.0004 ***
## Thysanoessa macrura 1.0000 0.6552 0.809  0.0003 ***
## Calanoides acutus   0.9464 0.6897 0.808  0.0004 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

5.3.8 Format data for primer v7

#only need species assignment matrices
species12<-species12 %>% select(-Abundance)
species2<-species2 %>% select(-Abundance)
CPRspecies<-CPRspecies %>% select(-Abundance)
overallmatrix<-merge(species12, species2, all=TRUE) %>% merge(CPRspecies, all=TRUE)
#replace NAs with 0s
overallmatrix <- replace(overallmatrix, is.na(overallmatrix), 0)
#export
#write.xlsx(overallmatrix, "permanova_CPR.xlsx", sheetName = "PermanovaData", row.names = FALSE)
#add sample data
#write.xlsx(as.data.frame(matchedsamples), "permanova_CPR.xlsx", sheetName ="SampleData", append = TRUE, row.names = FALSE)