Southern Ocean: eDNA vs CPR
1 eDNA
1.1 Import functions and set working directory
setwd("/Users/16152366/Desktop/Antarctica/Results:Stats/2023/")
source("functions1.R")1.2 Raw eDNA data prep: contamination removal
#read in data
raweDNA<-read_excel("eDNA_CPR.xlsx", sheet = "Raw_eDNA")
samples<-read_excel("eDNA_CPR.xlsx", sheet = "Samples")
eDNAsamples<-droplevels(subset(samples, Method!="CPR"))
#assign subkingdoms
raweDNA$Subkingdom<-if_else(raweDNA$Phylum=="Dinophyceae", "Myzozoa", raweDNA$Subkingdom)
raweDNA$Subkingdom<-if_else(raweDNA$Phylum=="Chlorophyta", "Chlorophyta", raweDNA$Subkingdom)
raweDNA$Subkingdom<-if_else(raweDNA$Phylum=="Bacillariophyta", "Bacillariophyta", raweDNA$Subkingdom)
raweDNA$Subkingdom<-if_else(raweDNA$Phylum=="Ascomycota", "Dikarya", raweDNA$Subkingdom)
raweDNA$Subkingdom<-if_else(raweDNA$Phylum=="Bacteroidetes", "Bacillariophyta", raweDNA$Subkingdom)
raweDNA$Subkingdom<-if_else(raweDNA$Phylum=="Basidiomycota", "Dikarya", raweDNA$Subkingdom)
raweDNA$Subkingdom<-if_else(raweDNA$Phylum=="Bigyra", "Harosa", raweDNA$Subkingdom)
raweDNA$Subkingdom<-if_else(raweDNA$Phylum=="Bolidophyceae", "Harosa", raweDNA$Subkingdom)
raweDNA$Subkingdom<-if_else(raweDNA$Phylum=="Not assigned", "Not assigned", raweDNA$Subkingdom)
#contamination removal
contam_zotus<-read_excel("eDNA.xlsx", sheet = "zotu_remove") #zotus to completely remove
newzotus<-read_excel("eDNA.xlsx", sheet = "zotu_add") #zotus w/ number of reads present in controls removed from samples
newzotus<-newzotus %>% select(-`Amount in control`)
raweDNAcontam<-raweDNA[!raweDNA$zOTU %in% contam_zotus$zOTU,]
raweDNAcontam<-merge(raweDNAcontam, newzotus, all=TRUE)
##write.csv(raweDNAcontam, file="rawcontamremoved.csv", row.names = FALSE)1.3 Raw eDNA analysis
#read back in after adding comments, checking distribution
raweDNAcontam<-read_excel("eDNA_CPR.xlsx", sheet="Raw_eDNA_contamremoved")
#Sequencing statistics
sum(colSums(raweDNAcontam[12:157])) #2.43 million reads## [1] 2432820
length(unique(raweDNAcontam$zOTU)) #1273 zotus## [1] 1273
length(unique(raweDNAcontam$zOTU[raweDNAcontam$Subkingdom=="Metazoa"])) #808 metazoan ZOTU## [1] 808
sum(colSums(raweDNAcontam[12:157][raweDNAcontam$Subkingdom=="Metazoa",])) #1139972 metazoan reads## [1] 1139972
sum(colSums(raweDNAcontam[12:157][raweDNAcontam$Subkingdom=="Metazoa",]))/sum(colSums(raweDNAcontam[12:157])) #46.86 metazoan## [1] 0.4685805
length(unique(raweDNAcontam$zOTU[raweDNAcontam$Subkingdom=="Metazoa" & raweDNAcontam$Assignment=="Species"])) #619 resolved to species level## [1] 619
#take out only my samples
mysamples<-raweDNAcontam[as.character(samples$Sample[samples$Method=="eDNA"])]
raweDNA<-cbind(raweDNAcontam[1:11], mysamples)
#make long format and merge with sample data
long_raw<-pivot_longer(raweDNA, cols = 12:141, names_to = "Sample", values_to = "Reads")
long_raw<-long_raw[long_raw$Reads!=0,]
long_raw<-merge(eDNAsamples, long_raw, all=TRUE)
long_raw<-droplevels(subset(long_raw, Type!="eDNA_Control"))
#summary per method: data prep
rawsummary<- long_raw %>% group_by(Type, Subkingdom) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
rawsummary<-rawsummary[rawsummary$Reads!=0,]
rawsummary$Metazoan<-if_else(rawsummary$Subkingdom=="Metazoa", "Metazoan", "Non-metazoan")
##write.csv(rawsummary, "subkindgomsummary.csv")
#test the correlation between sequencing depth and ZOTU richness
SeqDepth<-long_raw %>% select(Type, Sample, zOTU, Reads)
write.csv(SeqDepth, "Seqdepth.csv")
SeqDep1<-read.csv("SeqDepth1.csv")
#12 L correlation
#plot correlation between read depth and zotu richness
options(scipen = 999)
plot(SeqDep1$Reads[SeqDep1$Type=="12L"],SeqDep1$ZOTU.Richness[SeqDep1$Type=="12L"], xlab="Sequencing depth (Reads)", ylab="ZOTU richness")cor(SeqDep1$Reads[SeqDep1$Type=="12L"],SeqDep1$ZOTU.Richness[SeqDep1$Type=="12L"], method='pearson')## [1] -0.03284266
# -0.03 correlation
cor.test(SeqDep1$Reads[SeqDep1$Type=="12L"],SeqDep1$ZOTU.Richness[SeqDep1$Type=="12L"], method='pearson')##
## Pearson's product-moment correlation
##
## data: SeqDep1$Reads[SeqDep1$Type == "12L"] and SeqDep1$ZOTU.Richness[SeqDep1$Type == "12L"]
## t = -0.26082, df = 63, p-value = 0.7951
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2745426 0.2127607
## sample estimates:
## cor
## -0.03284266
#not significant weak correlation
#2 L correlation
#plot correlation between read depth and zotu richness
plot(SeqDep1$Reads[SeqDep1$Type=="2L"],SeqDep1$ZOTU.Richness[SeqDep1$Type=="2L"], xlab="Sequencing depth (Reads)", ylab="ZOTU richness")cor(SeqDep1$Reads[SeqDep1$Type=="2L"],SeqDep1$ZOTU.Richness[SeqDep1$Type=="2L"], method='pearson')## [1] 0.1589604
# -0.03 correlation
cor.test(SeqDep1$Reads[SeqDep1$Type=="2L"],SeqDep1$ZOTU.Richness[SeqDep1$Type=="2L"], method='pearson')##
## Pearson's product-moment correlation
##
## data: SeqDep1$Reads[SeqDep1$Type == "2L"] and SeqDep1$ZOTU.Richness[SeqDep1$Type == "2L"]
## t = 1.278, df = 63, p-value = 0.206
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.08836466 0.38782345
## sample estimates:
## cor
## 0.1589604
#not significant weak correlation
#12L
#total reads
sum(rawsummary$Reads[rawsummary$Type=="12L"]) #1162458## [1] 1162458
#total metazoan %
(sum(rawsummary$Reads[rawsummary$Type=="12L" & rawsummary$Metazoan=="Metazoan"])/sum(rawsummary$Reads[rawsummary$Type=="12L"]))*100 #85.66## [1] 85.65565
(sum(rawsummary$Reads[rawsummary$Type=="12L" & rawsummary$Metazoan=="Non-metazoan"])/sum(rawsummary$Reads[rawsummary$Type=="12L"]))*100 ## [1] 14.34435
#2L
#total reads
sum(rawsummary$Reads[rawsummary$Type=="2L"]) #1268468## [1] 1268468
#total metazoan %
(sum(rawsummary$Reads[rawsummary$Type=="2L" & rawsummary$Metazoan=="Metazoan"])/sum(rawsummary$Reads[rawsummary$Type=="2L"]))*100 #11.37## [1] 11.37285
(sum(rawsummary$Reads[rawsummary$Type=="2L" & rawsummary$Metazoan=="Non-metazoan"])/sum(rawsummary$Reads[rawsummary$Type=="2L"]))*100 ## [1] 88.62715
ggplot(rawsummary, aes(fill=Subkingdom, y=Reads, x=Type)) +
geom_bar(stat="identity",width = 0.9) +
scale_fill_manual(values = fill13)+
theme(axis.line = element_line(colour = "black", linewidth = 0.2, linetype = "solid"),
axis.ticks = element_line(colour = "black", linewidth = 0.2, linetype = "solid"),
legend.text = element_text(size = 15), legend.title = element_text(face = "bold",size=16.5),
plot.margin = margin(20,10,10,10))+
theme(axis.text.x = element_text(colour="black",size=18,vjust = 0.5, hjust=0.5),
axis.text.y = element_text(colour="black",size=18, vjust = 0.5, hjust=1)) +
scale_y_continuous(expand = c(0, 0))+
theme(axis.title.y = element_text(size=16.5, face = "bold"),
axis.title.x = element_text(size=16.5,vjust = -0.5, face = "bold")) +
scale_x_discrete(labels=c("12L"="12 L", "2L"="2 L", "eDNA_Control"="Control"),expand = c(0.25, 0))+
guides(fill=guide_legend(title="Subkindgom"))+ xlab("Sample type")+ ylab("Number of reads")#Chlorophyta, Hacrobia and Harosa dominate 2L> what are the %s?
(sum(rawsummary$Reads[rawsummary$Type=="2L" & rawsummary$Subkingdom=="Chlorophyta"])/sum(rawsummary$Reads[rawsummary$Type=="2L"]))*100 #11.37## [1] 47.48705
(sum(rawsummary$Reads[rawsummary$Type=="2L" & rawsummary$Subkingdom=="Hacrobia"])/sum(rawsummary$Reads[rawsummary$Type=="2L"]))*100 #11.37## [1] 15.08812
(sum(rawsummary$Reads[rawsummary$Type=="2L" & rawsummary$Subkingdom=="Harosa"])/sum(rawsummary$Reads[rawsummary$Type=="2L"]))*100 #11.37## [1] 19.88391
#tidy environment
rm(raweDNAcontam, contam_zotus, newzotus)
# plot metazoan vs non-meta
long_raw$Metazoan<-if_else(long_raw$Subkingdom=="Metazoa", "Metazoan", "Non-metazoan")
long_raw$Metazoan<-if_else(long_raw$Subkingdom=="Not assigned", "Not assigned", long_raw$Metazoan)
long_raw$Metazoan<-factor(long_raw$Metazoan, levels=rev(sort(unique(long_raw$Metazoan))))
long_raw$Zone<-factor(long_raw$Zone, levels = c("STZ","SAZ","PFZ","AAZ","SACCZ"))
#Overall
ggplot(long_raw, aes(fill=Metazoan, y=Reads, x=Type)) + theme_classic()+
geom_bar(position="fill", stat="identity",width = 0.9) +
scale_fill_manual(values = rev(rawfill))+
theme(axis.line = element_line(colour = "black", linewidth = 0.2, linetype = "solid"),
axis.ticks = element_line(colour = "black", linewidth = 0.2, linetype = "solid"),
legend.text = element_text(size = 15), legend.title = element_text(face = "bold",size=16.5),
plot.margin = margin(20,10,10,10))+
theme(axis.text.x = element_text(colour="black",size=18,vjust = 0.5, hjust=0.5),
axis.text.y = element_text(colour="black",size=18, vjust = 0.5, hjust=1)) +
theme(axis.title.y = element_text(size=16.5, face = "bold"),
axis.title.x = element_text(size=16.5,vjust = -0.5, face = "bold")) +
scale_y_continuous(labels=scales::percent,expand = c(0, 0))+
scale_x_discrete(labels=c("12L"="LargeVF", "2L"="SmallVF"),expand = c(0.25, 0))+
guides(fill=guide_legend(title="Subkingdom"))+ xlab("Sample type")+ ylab("Number of reads (%)")ggplot(long_raw, aes(fill=Metazoan, y=Reads, x=Type)) + theme_classic()+
geom_bar(stat="identity",width = 0.9) +
scale_fill_manual(values = rev(rawfill))+
theme(axis.line = element_line(colour = "black", linewidth = 0.2, linetype = "solid"),
axis.ticks = element_line(colour = "black", linewidth = 0.2, linetype = "solid"),
legend.text = element_text(size = 15), legend.title = element_text(face = "bold",size=16.5),
plot.margin = margin(20,10,10,10))+
theme(axis.text.x = element_text(colour="black",size=18,vjust = 0.5, hjust=0.5),
axis.text.y = element_text(colour="black",size=18, vjust = 0.5, hjust=1)) +
theme(axis.title.y = element_text(size=16.5, face = "bold"),
axis.title.x = element_text(size=16.5,vjust = -0.5, face = "bold")) +
scale_y_continuous(expand = c(0, 0))+
scale_x_discrete(labels=c("12L"="LargeVF", "2L"="SmallVF"),expand = c(0.25, 0))+
guides(fill=guide_legend(title="Subkingdom"))+ xlab("Sample type")+ ylab("Number of reads")#per zone
ggplot(data=long_raw, aes(y=Reads, x=Type, fill=Metazoan))+
geom_bar(position="fill",stat="identity",width = 0.98) +
scale_fill_manual(values = MetazoanFill, labels=c("Not assigned"="Not assigned", "Non-metazoan"="Non-Animalia", "Metazoan"="Animalia")) +
theme(axis.line = element_line(colour = "black", linewidth = 0.3, linetype = "solid"),
axis.ticks = element_line(colour = "black", linewidth = 0.3, linetype = "solid"),
strip.text.x = element_text(size=16, face="bold"),
axis.text.x = element_text(colour="black",size=14,vjust = 0.5, hjust=0.5),
axis.text.y = element_text(colour="black",size=18, vjust = 0.5, hjust=1),
axis.title.x = element_text(vjust = -3, size=16, face="bold"),
axis.title.y = element_text(size=16, face="bold"),
plot.margin = margin(10,5,20,20), legend.text = element_text(size = 12),
legend.title = element_text(face = "bold", size=16))+
facet_grid(~Zone, scales = "free_x", space = "free_x")+
scale_x_discrete(labels=c("12L"="LargeVF", "2L"="SmallVF"),expand = c(0.25, 0))+
#scale_fill_discrete(labels=c("Not assigned"="Not assigned", "Non-metazoan"="Non-Animalia", "Metazoan"="Animalia"))+
scale_y_continuous(labels=c("0"="0", "0.25"="25", "0.50"="50","0.75"="75", "1.00"="100"),expand = c(0, 0))+
xlab("Volume")+ ylab("Relative read abundance (%)")#ggsave(filename = "Meta_nonmeta_zone.png", plot = last_plot(), width = 40, height = 16, units = "cm", dpi = 600)
?ggsave
#reads assigned to 12L (%)
sum(long_raw$Reads[long_raw$Type=="12L" & long_raw$Metazoan=="Metazoan"])/sum(long_raw$Reads[long_raw$Type=="12L"])## [1] 0.8565565
#reads assigned to 2L (%)
sum(long_raw$Reads[long_raw$Type=="2L" & long_raw$Metazoan=="Metazoan"])/sum(long_raw$Reads[long_raw$Type=="2L"]) ## [1] 0.1137285
#particularly low in SAZ, read %?
#12L
sum(long_raw$Reads[long_raw$Type=="12L" & long_raw$Zone=="SAZ"]) #239846## [1] 239846
sum(long_raw$Reads[long_raw$Type=="12L" & long_raw$Zone=="SAZ" & long_raw$Metazoan=="Metazoan"]) #204424 ## [1] 204424
sum(long_raw$Reads[long_raw$Type=="12L" & long_raw$Zone=="SAZ" & long_raw$Metazoan=="Metazoan"])/sum(long_raw$Reads[long_raw$Type=="12L" & long_raw$Zone=="SAZ"])## [1] 0.8523136
#2L
sum(long_raw$Reads[long_raw$Type=="2L" & long_raw$Zone=="SAZ"]) #529515## [1] 529515
sum(long_raw$Reads[long_raw$Type=="2L" & long_raw$Zone=="SAZ" & long_raw$Metazoan=="Metazoan"])/sum(long_raw$Reads[long_raw$Type=="2L" & long_raw$Zone=="SAZ"]) #4383 (0.008%) metazoan## [1] 0.008277386
1.4 Metazoan data
#filter data to be metazoan only
eDNAdata<-droplevels(subset(long_raw, Subkingdom=="Metazoa", Assignment!="Exclude"))
eDNAdata_wide<-droplevels(subset(raweDNA, Subkingdom=="Metazoa", Assignment!="Exclude"))
sum(eDNAdata$Reads)## [1] 1139972
#1139972 total metazoan reads, these were assigned to:
sum(eDNAdata$Reads[eDNAdata$Assignment=="Species"]) #1119052 98.16%## [1] 1119052
sum(eDNAdata$Reads[eDNAdata$Assignment=="Genus"]) #4687## [1] 4635
sum(eDNAdata$Reads[eDNAdata$Assignment=="Family"]) #15584## [1] 15584
sum(eDNAdata$Reads[eDNAdata$Assignment=="Order"]) #429## [1] 429
sum(eDNAdata$Reads[eDNAdata$Assignment=="Class"]) #16## [1] 16
sum(eDNAdata$Reads[eDNAdata$Assignment=="Phyla"]) #256## [1] 256
#species assignments
species<-droplevels(subset(eDNAdata_wide, Assignment=="Species"))
species_long<-droplevels(subset(eDNAdata, Assignment=="Species"))
length(unique(species$Species)) #68 species detected## [1] 68
length(unique(species$Genus)) #from 54 genera## [1] 54
length(unique(species$Family)) #and 43 families## [1] 43
length(unique(species$Order)) #and 24 orders## [1] 24
1.5 t-test between volumes
#test normality
ttest<-eDNAdata %>% select(Sample, Type, Reads) %>% group_by(Sample, Type) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
tapply(ttest$Reads,ttest$Type,shapiro.test) #not normal## $`12L`
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.71037, p-value = 0.000000000504
##
##
## $`2L`
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.53665, p-value = 0.000000000000518
#conduct wilcoxon ttest
wilcox.test(ttest$Reads~ttest$Type, mu=0, alt="two.sided",
conf.level=0.95, paired=TRUE) #V = 2077, p-value = 9.8e-12##
## Wilcoxon signed rank test with continuity correction
##
## data: ttest$Reads by ttest$Type
## V = 2077, p-value = 0.00000000005342
## alternative hypothesis: true location shift is not equal to 0
tapply(ttest$Reads, ttest$Type, median) #11376 for 12, 696 for 2## 12L 2L
## 11376 696
#metazoan reads/composition per sample
tapply(ttest$Reads, ttest$Type, mean)## 12L 2L
## 15318.63 2219.40
# 12L 2L
# 15318.63 2219.40
#overall reads
overall<-long_raw %>% select(Sample, Type, Reads) %>% group_by(Sample, Type) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
tapply(overall$Reads, overall$Type, mean)## 12L 2L
## 17883.97 19514.89
rm(overall, ttest)
#per zone differences
eDNAdata$Type<-as.factor(eDNAdata$Type)
eDNAdata$Zone<-as.factor(eDNAdata$Zone)
wilcoxdat<-eDNAdata %>% select(Sample, Zone, Type, Reads) %>% group_by(Sample, Zone, Type) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
wilcoxdat %>% group_by(Zone) %>% wilcox_test(Reads~Type, paired=TRUE)## # A tibble: 5 × 8
## Zone .y. group1 group2 n1 n2 statistic p
## * <fct> <chr> <chr> <chr> <int> <int> <dbl> <dbl>
## 1 STZ Reads 12L 2L 6 6 21 0.0313
## 2 SAZ Reads 12L 2L 17 17 153 0.0000153
## 3 PFZ Reads 12L 2L 12 12 78 0.000488
## 4 AAZ Reads 12L 2L 24 24 272 0.000175
## 5 SACCZ Reads 12L 2L 6 6 21 0.0313
1.6 Venn diagram
#make a list of the species found in 12L and 2L samples
Venn12<-subset(species_long, Type=="12L")
Venn2<-subset(species_long, Type=="2L")
#create venn plot
venn_plot <- venn.diagram(list('12L eDNA' = unique(Venn12$Species), '2L eDNA' = unique(Venn2$Species)),
fill = alpha(vennfill,0.95), lty = 'blank',fontfamily ="sans",
fontface="bold",cex = 6,cat.dist = c(0.05, 0.05), cat.cex = 1,cat.fontface = "bold",
cat.default.pos = "outer", cat.col=vennfill,rotation.degree=300,cat.pos = c(310,130),
cat.fontfamily="sans",scaled=FALSE,filename = NULL,disable.logging = TRUE);
grid.newpage(); grid.draw(venn_plot)#proportional
plot(euler(c("LargeVF" = 14, "SmallVF"= 2, "LargeVF&SmallVF"= 51), shape = "ellipse"),
fill = vennfill, alpha = 0.6, legend = TRUE, quantities = list(type = c("counts"), font=3, round=2, cex=0.8), labels=list(font=2, cex=1), c("#332288", "#44AA99"), alpha = 0.3, edges=list(lty = 0))#will have to fix up in photoshop1.7 Barplot of exclusive species per volume
#establish exclusive species and shared per volume
Exclusive12<-Venn12[!Venn12$Species %in% Venn2$Species,] #exclusive 12
Exclusive12<-Exclusive12 %>% select(Phylum, Species, Reads)
Exclusive12$ID<-"12 L exclusive"
Exclusive12<- Exclusive12 %>% group_by(Phylum, Species, ID) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
Exclusive2<-Venn2[!Venn2$Species %in% Venn12$Species,] #exclusive 2
Exclusive2<-Exclusive2 %>% select(Phylum, Species, Reads)
Exclusive2$ID<-"2 L exclusive"
Exclusive2<- Exclusive2 %>% group_by(Phylum, Species, ID) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
Shared<-Venn12[Venn12$Species %in% Venn2$Species,] #exclusiveshared
Shared<-Shared %>% select(Phylum, Species, Reads)
Shared$ID<-"Shared"
Shared<- Shared %>% group_by(Phylum, Species, ID) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
#merge the together
exc_sha<-merge(Exclusive12, Exclusive2, all=TRUE) %>% merge(Shared, all=TRUE)
#make PA data
exc_sha$PA<-1
#barplot: no. species
#plot1<-
ggplot(exc_sha, aes(fill=Phylum, y=PA, x=ID)) +
geom_bar(stat="identity", width = 0.95) + scale_fill_manual(values = fill12) +
theme(axis.line = element_line(colour = "black", linewidth = 0.4, linetype = "solid"),
axis.ticks = element_line(colour = "black", linewidth = 0.4, linetype = "solid"),
legend.text = element_text(size = 10),legend.background = element_blank(),
# legend.box.background = element_rect(colour = "black"),legend.position = c(0.89,0.92),
legend.title = element_text(face="bold", size=13), legend.key.height = unit(0.3, 'cm'),
legend.key.width = unit(0.4, 'cm'),plot.margin = margin(50,80,20,20))+
theme(axis.text.x = element_text(colour="black",size=14,vjust = -1),
axis.text.y = element_text(colour="black",size=16, vjust = -0.5, hjust=1),
axis.title.y = element_text(face="bold",size=16, vjust = 3),
axis.title.x = element_text(face="bold",size=16,vjust = -2.5)) +
scale_y_continuous(expand = c(0, 0))+ scale_x_discrete(expand = c(0.05,0.05), labels=c("12 L exclusive"= "LargeVF", "2 L exclusive"= "SmallVF-eDNA"))+
guides(fill=guide_legend(title="Phylum", byrow=TRUE))+ xlab("Detection")+ ylab("Number of species")#cvdPlot(plot1) #check colour blind appropriate>looks good
#ggsave(filename = "eDNA_excshare.jpeg", plot = last_plot(), width = 20, height = 14, units = "cm", dpi = 600)
#no. species as percentage
ggplot(exc_sha, aes(fill=Phylum, y=PA, x=ID)) +
geom_bar(position = "fill", stat="identity", width = 0.95) + scale_fill_manual(values = fill12) +
theme(axis.line = element_line(colour = "black", linewidth = 0.2, linetype = "solid"),
axis.ticks = element_line(colour = "black", linewidth = 0.2, linetype = "solid"),
legend.text = element_text(size = 16), legend.spacing = unit(0.1,"mm"),legend.background = element_blank(),
# legend.box.background = element_rect(colour = "black"),legend.position = c(0.89,0.92),
legend.title = element_text(face="bold", size=20), plot.margin = margin(50,80,20,20))+
theme(axis.text.x = element_text(colour="black",size=18,vjust = -1),
axis.text.y = element_text(colour="black",size=22, vjust = 0.5, hjust=1),
axis.title.y = element_text(face="bold",size=22, vjust = 3),
axis.title.x = element_text(face="bold",size=22,vjust = -3)) +
scale_y_continuous(labels=c("0"="0", "0.25"="25", "0.50"="50","0.75"="75", "1.00"="100"),expand = c(0, 0))+
scale_x_discrete(expand = c(0.05,0.05))+
guides(fill=guide_legend(title="Phylum", byrow=TRUE))+ xlab("Detection")+ ylab("Unique species composition (%)")#reads assigned to LargeVF samples
sum(exc_sha$Reads[exc_sha$ID=="12 L exclusive"])/sum(eDNAdata$Reads[eDNAdata$Type=="12L" & eDNAdata$Assignment=="Species"])## [1] 0.0007965421
#tidy environment
rm(Exclusive12, Exclusive2, Shared)
#zotus assigned to species per volume
length(unique(eDNAdata$zOTU[eDNAdata$Type=="12L" & eDNAdata$Assignment=="Species"]))/length(unique(eDNAdata$zOTU[eDNAdata$Type=="12L"])) #78.07## [1] 0.7806452
length(unique(eDNAdata$zOTU[eDNAdata$Type=="2L" & eDNAdata$Assignment=="Species"]))/length(unique(eDNAdata$zOTU[eDNAdata$Type=="2L"])) #76.95## [1] 0.7695167
1.8 Richness comparisons
#select data needed
Richness<-species_long %>% select(Type, Sample, Species, Reads)
Richness<-Richness %>% group_by(Type, Sample, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
Richness$PA<-1
Richness<-Richness %>% select(Type, Sample, PA) %>% group_by(Type, Sample) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
#t test
tapply(Richness$PA, Richness$Type, hist) #histogram looks pretty good no big skew## $`12L`
## $breaks
## [1] 2 4 6 8 10 12 14 16 18 20 22 24
##
## $counts
## [1] 2 5 15 16 13 8 3 2 0 0 1
##
## $density
## [1] 0.015384615 0.038461538 0.115384615 0.123076923 0.100000000 0.061538462
## [7] 0.023076923 0.015384615 0.000000000 0.000000000 0.007692308
##
## $mids
## [1] 3 5 7 9 11 13 15 17 19 21 23
##
## $xname
## [1] "X[[i]]"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
##
## $`2L`
## $breaks
## [1] 2 4 6 8 10 12 14 16 18
##
## $counts
## [1] 10 13 12 10 11 6 2 1
##
## $density
## [1] 0.076923077 0.100000000 0.092307692 0.076923077 0.084615385 0.046153846
## [7] 0.015384615 0.007692308
##
## $mids
## [1] 3 5 7 9 11 13 15 17
##
## $xname
## [1] "X[[i]]"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
tapply(Richness$PA, Richness$Type, shapiro.test) #not completely normal but histogram looks ok## $`12L`
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.93593, p-value = 0.002246
##
##
## $`2L`
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.97003, p-value = 0.1159
t.test(Richness$PA~Richness$Type, alternative = "two.sided", conf=0.95, paired = T) #t-test##
## Paired t-test
##
## data: Richness$PA by Richness$Type
## t = 4.8254, df = 64, p-value = 0.000009024
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 0.9556194 2.3059190
## sample estimates:
## mean difference
## 1.630769
#mean and standard error
tapply(Richness$PA, Richness$Type, mean)## 12L 2L
## 10.03077 8.40000
standard_error <- function(x) sd(x) / sqrt(length(x)) #standard error function for mean
standard_error(Richness$PA[Richness$Type=="12L"]) #0.43## [1] 0.4412445
standard_error(Richness$PA[Richness$Type=="2L"]) #0.43## [1] 0.4367714
1.9 Detection frequency: species assignments
SpeciesFreq<-species_long %>% select(Type, Sample, Phylum, Class, Order, Family, Genus, Species, Reads) #select data
SpeciesFreq<-SpeciesFreq %>% group_by(Type, Sample, Phylum, Class, Order, Family, Genus, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE) #summarise
SpeciesFreq$FOO<-1 #add frequency of occurence column
SpeciesFreq<-SpeciesFreq %>% group_by(Type, Phylum, Class, Order, Family, Genus, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE) #summarise without sample to add all the species FOO together
SpeciesFreq## # A tibble: 119 × 9
## # Groups: Type, Phylum, Class, Order, Family, Genus [96]
## Type Phylum Class Order Family Genus Species Reads FOO
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 12L Annelida Polychaeta Phyllodocida Polyno… Harm… Harmot… 14054 1
## 2 12L Annelida Polychaeta Phyllodocida Polyno… Poly… Polyeu… 25286 2
## 3 12L Arthropoda Branchiopoda Diplostraca Podoni… Evad… Evadne… 291 4
## 4 12L Arthropoda Branchiopoda Diplostraca Podoni… Podon Podon … 44 2
## 5 12L Arthropoda Copepoda Calanoida Acarti… Para… Parala… 255 1
## 6 12L Arthropoda Copepoda Calanoida Calani… Cala… Calano… 2720 35
## 7 12L Arthropoda Copepoda Calanoida Calani… Cala… Calanu… 2226 3
## 8 12L Arthropoda Copepoda Calanoida Calani… Cala… Calanu… 25349 30
## 9 12L Arthropoda Copepoda Calanoida Calani… Cala… Calanu… 232205 39
## 10 12L Arthropoda Copepoda Calanoida Calani… Cten… Ctenoc… 322253 52
## # ℹ 109 more rows
sum(SpeciesFreq$Reads[SpeciesFreq$Type=="2L" & SpeciesFreq$Species=="Ctenocalanus citer"])/sum(SpeciesFreq$Reads[SpeciesFreq$Type=="2L"])## [1] 0.4241434
sum(SpeciesFreq$Reads[SpeciesFreq$Type=="12L" & SpeciesFreq$Species=="Ctenocalanus citer"])/sum(SpeciesFreq$Reads[SpeciesFreq$Type=="12L"])## [1] 0.3286659
#plot FOO and reads in grid form
species_freqplot<-
plot_grid(nrow = 1, ncol = 3, labels = c("A", "B", "C"),label_size = 25,
ggplot(SpeciesFreq, aes(fill=Phylum, y=FOO, x=Type)) +
geom_bar(stat="identity", width = 0.95) + scale_fill_manual(values = fill12) +
theme(axis.line = element_line(colour = "black", linewidth = 0.4, linetype = "solid"),
axis.ticks = element_line(colour = "black", linewidth = 0.4, linetype = "solid"),
legend.position="none", plot.margin = margin(50,80,20,20))+
theme(axis.text.x = element_text(colour="black",size=14,vjust = -1),
axis.text.y = element_text(colour="black",size=16, vjust = -0.5, hjust=1),
axis.title.y = element_text(face="bold",size=16, vjust = 3),
axis.title.x = element_text(face="bold",size=16,vjust = -2.5)) +
scale_y_continuous(expand = c(0, 0))+ scale_x_discrete(expand = c(0.05,0.05), labels=c("12L"= "LargeVF", "2L"= "SmallVF"))+
guides(fill=guide_legend(title="Phylum", byrow=TRUE))+ xlab("Detection")+ ylab("Species detection frequency (FOO)")
,
ggplot(SpeciesFreq, aes(fill=Phylum, y=Reads, x=Type)) +
geom_bar(stat="identity", width = 0.95) + scale_fill_manual(values = fill12) +
theme(axis.line = element_line(colour = "black", linewidth = 0.4, linetype = "solid"),
axis.ticks = element_line(colour = "black", linewidth = 0.4, linetype = "solid"),
legend.position="none", plot.margin = margin(50,80,20,20))+
theme(axis.text.x = element_text(colour="black",size=14,vjust = -1),
axis.text.y = element_text(colour="black",size=16, vjust = -0.5, hjust=1),
axis.title.y = element_text(face="bold",size=16, vjust = 3),
axis.title.x = element_text(face="bold",size=16,vjust = -2.5)) +
scale_y_continuous(expand = c(0, 0))+ scale_x_discrete(expand = c(0.05,0.05), labels=c("12L"= "LargeVF", "2L"= "SmallVF"))+
guides(fill=guide_legend(title="Phylum", byrow=TRUE))+ xlab("Detection")+ ylab("Reads")
,
ggplot(SpeciesFreq, aes(fill=Phylum, y=Reads, x=Type)) +
geom_bar(position = "fill", stat="identity", width = 0.95) + scale_fill_manual(values = fill12) +
theme(axis.line = element_line(colour = "black", linewidth = 0.4, linetype = "solid"),
axis.ticks = element_line(colour = "black", linewidth = 0.4, linetype = "solid"),
legend.position="none", plot.margin = margin(50,80,20,20))+
theme(axis.text.x = element_text(colour="black",size=14,vjust = -1),
axis.text.y = element_text(colour="black",size=16, vjust = -0.5, hjust=1),
axis.title.y = element_text(face="bold",size=16, vjust = 3),
axis.title.x = element_text(face="bold",size=16,vjust = -2.5)) +
scale_y_continuous(labels=c("0"="0", "0.25"="25", "0.50"="50","0.75"="75", "1.00"="100"), expand = c(0, 0))+ scale_x_discrete(expand = c(0.05,0.05), labels=c("12L"= "LargeVF", "2L"= "SmallVF"))+
guides(fill=guide_legend(title="Phylum", byrow=TRUE))+ xlab("Detection")+ ylab("Read abundance (%)"))
#take the legend from one plot (code above just pasted here, need legend for plot_grid)
legend<-ggplot(SpeciesFreq, aes(fill=Phylum, y=Reads, x=Type)) +
geom_bar(position = "fill", stat="identity", width = 0.95) + scale_fill_manual(values = fill12)
legend<-get_legend(legend)
#plot heatmap and legend together
plot_grid(species_freqplot, legend, rel_widths = c(3, .3))rm(legend)
#export data
##write.csv(SpeciesFreq, "SpeciesFrequency.csv")1.10 Diversity across zones
#select data and convert abundance to PA
diversity<-species_long %>% select(-Comment, -PercentID, -zOTU, -Metazoan) %>% group_by(Type, Zone, Phylum, Class, Order, Family, Genus, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
diversity$FOO[diversity$Reads>0]<-1
#order zones
diversity$Zone<-factor(diversity$Zone, levels = c("STZ", "SAZ", "PFZ", "AAZ", "SACCZ"))
ggplot(data=diversity, aes(fill=Phylum, y=FOO, x=Type)) +
geom_bar(stat="identity", width = 0.97) +
scale_fill_manual(values = fill12) +
theme(axis.line = element_line(colour = "black", size = 0.4, linetype = "solid"),
axis.ticks = element_line(colour = "black", size = 0.4, linetype = "solid"),
axis.text.x = element_text(size=11,vjust = 0.5, hjust=0.5, colour="black"),
axis.text.y = element_text(size=16, vjust = 0.5, hjust=1,colour="black"),
axis.title.y = element_text(face="bold", size=15, vjust=3),
strip.text.x = element_text(size = 12, face = "bold"),
axis.title.x = element_text(face="bold", size=13,vjust = -2),
legend.text = element_text(size = 10), legend.title = element_text(size=13, face="bold"),
legend.key.height = unit(0.2, 'cm'), legend.key.width = unit(0.3, 'cm'),
plot.margin = margin(20,20,30,20)) + scale_y_continuous(expand = c(0, 0))+
facet_grid(~Zone, switch = "x", scales = "free_x", space = "free_x")+
theme(panel.spacing = unit(0.65, "cm"))+
scale_x_discrete(labels=stringr::str_wrap(c("12L"="LargeVF", "2L"="SmallVF"),width=5), expand = c(0, 0))+
guides(fill=guide_legend(byrow = TRUE, title="Phylum"))+
labs(x="Detection methods across Southern Ocean zones", y="Species diversity")#ggsave(filename = "CPR_eDNA_diversityzones.jpeg", plot = last_plot(), width = 24, height = 14, units = "cm", dpi = 600)
#as table
with(species_long, table(Phylum, Zone, Type))## , , Type = 12L
##
## Zone
## Phylum STZ SAZ PFZ AAZ SACCZ
## Annelida 0 0 0 0 8
## Arthropoda 483 1450 298 665 53
## Bryozoa 12 9 3 20 4
## Chaetognatha 0 3 1 3 0
## Chordata 2 10 3 6 0
## Cnidaria 39 27 11 50 6
## Echinodermata 7 4 0 4 3
## Mollusca 4 3 12 23 0
## Nemertea 0 0 0 0 0
## Porifera 0 0 0 0 0
##
## , , Type = 2L
##
## Zone
## Phylum STZ SAZ PFZ AAZ SACCZ
## Annelida 0 0 0 0 4
## Arthropoda 61 146 139 398 24
## Bryozoa 16 23 3 29 11
## Chaetognatha 0 1 2 3 0
## Chordata 1 1 0 2 0
## Cnidaria 35 29 20 98 17
## Echinodermata 6 0 0 1 1
## Mollusca 2 2 8 25 1
## Nemertea 0 0 0 0 1
## Porifera 2 0 0 1 0
1.11 Hills numbers
1.11.1 Plot
#merge species assignments by species (i.e unique)
speciesmerged<-species %>% select(-zOTU, -Comment, -PercentID, -Assignment)
speciesmerged<-speciesmerged %>% group_by(Phylum, Class, Order, Family, Genus, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
#12L only
species12<-cbind(speciesmerged[1:6], speciesmerged[as.character(samples$Sample[samples$Type=="12L"])])
species12<-species12[rowSums(species12[,7:71])!=0,]
#2L only
species2<-cbind(speciesmerged[1:6], speciesmerged[as.character(samples$Sample[samples$Type=="2L"])])
species2<-species2[rowSums(species2[,7:71])!=0,]
#eDNA 12L hills data
hill_0 <- hill_div(species12[7:71], qvalue = 0)
hill_1 <- hill_div(species12[7:71], qvalue = 1)
hill_2 <- hill_div(species12[7:71], qvalue = 2)
hill12 <- cbind("Sample"=names(hill_0), "Type"="12L", hill_0, hill_1, hill_2) #keep names to check the samples are correct
hill12<-merge(samples[samples$Type=="12L",], hill12, all=TRUE)
hill12 <- hill12 %>% mutate_at(c('hill_0', 'hill_1', 'hill_2'), as.numeric)
#2L hills data
hill_0 <- hill_div(species2[7:71], qvalue = 0)
hill_1 <- hill_div(species2[7:71], qvalue = 1)
hill_2 <- hill_div(species2[7:71], qvalue = 2)
hill2 <- cbind("Sample"=names(hill_0),"Type"="2L", hill_0, hill_1, hill_2) #keep names to check the samples are correct
hill2<-merge(samples[samples$Type=="2L",], hill2, all=TRUE)
hill2 <- hill2 %>% mutate_at(c('hill_0', 'hill_1', 'hill_2'), as.numeric)
#merge together
hills<-merge(hill12, hill2, all=TRUE)
hills$Zone<-factor(hills$Zone, levels=c("STZ", "SAZ", "PFZ","AAZ", "SACCZ"))
#seperate plots for each hills number
plothill_0 <- violin.plot(hills, ydata = hills$hill_0, col = vennfill, fill = vennfill, ylab = "Hills number (q = 0)", ylow = 0, yhigh = 26)+ theme_light()+
theme(plot.margin = unit(c(1,1,1,1), "cm"), strip.text.x = element_text(size=16, face="bold", colour="black"), legend.text = element_text(size = 14), legend.title = element_text(face="bold", size=18))+
theme(axis.text.x = element_blank(),axis.text.y = element_text(size=16, colour="black"),axis.title.y = element_text(size=15, hjust=0.7,vjust=4, face="bold"))+ facet_grid(~Zone, scales = "free_x", space = "free_x")
plothill_1 <- violin.plot(hills, ydata = hills$hill_1, col = vennfill, fill = vennfill, ylab = "Hills number (q = 1)", ylow = 0, yhigh = 6)+ theme_light()+
theme(plot.margin = unit(c(1,1,1,1), "cm"), strip.text.x = element_text(size=16, face="bold", colour="black"), legend.text = element_text(size = 14), legend.title = element_text(face="bold", size=18))+
theme(axis.text.x = element_blank(),axis.text.y = element_text(size=16, colour="black"),axis.title.y = element_text(size=15, hjust=0.7,vjust=4, face="bold"))+ facet_grid(~Zone, scales = "free_x", space = "free_x")
plothill_2 <- violin.plot(hills, ydata = hills$hill_2, col = vennfill, fill = vennfill, ylab = "Hills number (q = 2)", ylow = 0, yhigh = 6)+ theme_light()+
theme(plot.margin = unit(c(1,1,1,1), "cm"), axis.text.x = element_blank(), strip.text.x = element_text(size=16, face="bold", colour="black"), legend.text = element_text(size = 14), legend.title = element_text(face="bold", size=18))+
theme(axis.text.y = element_text(size=16, colour="black"),axis.title.y = element_text(size=15, hjust=0.7,vjust=4, face="bold"))+ facet_grid(~Zone, scales = "free_x", space = "free_x")
#combine into one plot and...plot
hillsplot<-plot_grid(plothill_0, plothill_1, plothill_2, ncol=1, labels = c("A", "B", "C"), label_size = 25)
plot(hillsplot)#tidy env
rm(hill_0, hill_1, hill_2)1.11.2 Summary stats & t-tests
#select hills numbers
hillsummstat<-hills %>% select(Type, Zone, hill_0, hill_1, hill_2)
#summary stats function
hillsummstat<-summaryBy(hillsummstat[,3:5]~Zone & Type,data=hillsummstat,FUN=function(x)
+ {c(mean=mean(x),sum=sum(x),se=sd(x)/sqrt(length(x)))})
#pivot table longer
hillsummstat<-hillsummstat %>% pivot_longer(!c(Zone, Type), names_to = c("hill",".value"), names_pattern = "(\\d*).([A-Za-z]+)", names_prefix = "hill_")
print(hillsummstat, n=30)## # A tibble: 30 × 6
## Zone Type hill mean sum se
## <fct> <chr> <chr> <dbl> <dbl> <dbl>
## 1 STZ 12L 0 12.7 76 2.79
## 2 STZ 12L 1 2.51 15.1 0.600
## 3 STZ 12L 2 1.89 11.3 0.371
## 4 STZ 2L 0 8.17 49 2.36
## 5 STZ 2L 1 2.62 15.7 0.665
## 6 STZ 2L 2 1.89 11.4 0.378
## 7 SAZ 12L 0 8 136 0.569
## 8 SAZ 12L 1 2.46 41.8 0.294
## 9 SAZ 12L 2 2.01 34.2 0.219
## 10 SAZ 2L 0 5.76 98 0.511
## 11 SAZ 2L 1 2.95 50.2 0.347
## 12 SAZ 2L 2 2.34 39.8 0.278
## 13 PFZ 12L 0 9.92 119 0.434
## 14 PFZ 12L 1 2.81 33.7 0.216
## 15 PFZ 12L 2 2.26 27.1 0.177
## 16 PFZ 2L 0 7.83 94 0.458
## 17 PFZ 2L 1 2.65 31.8 0.314
## 18 PFZ 2L 2 2.09 25.1 0.279
## 19 AAZ 12L 0 11.9 285 0.539
## 20 AAZ 12L 1 2.81 67.4 0.221
## 21 AAZ 12L 2 2.05 49.2 0.164
## 22 AAZ 2L 0 11 264 0.571
## 23 AAZ 2L 1 2.87 68.9 0.252
## 24 AAZ 2L 2 2.20 52.8 0.202
## 25 SACCZ 12L 0 6 36 0.632
## 26 SACCZ 12L 1 1.43 8.58 0.171
## 27 SACCZ 12L 2 1.25 7.50 0.139
## 28 SACCZ 2L 0 6.83 41 1.28
## 29 SACCZ 2L 1 2.54 15.2 0.325
## 30 SACCZ 2L 2 2.05 12.3 0.286
1.11.3 Richness (q=0)
#test for normality
#2L
tapply(hill2$hill_0, hill2$Zone, shapiro.test) #not normal in SACCZ ## $AAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.92594, p-value = 0.07914
##
##
## $PFZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.936, p-value = 0.4481
##
##
## $SACCZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.73312, p-value = 0.01355
##
##
## $SAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.97841, p-value = 0.9413
##
##
## $STZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.7975, p-value = 0.0558
tapply(hill2$hill_0[hill2$Zone=="SACCZ"], hill2$Zone[hill2$Zone=="SACCZ"], hist) #hist bimodal so will not become normal## $SACCZ
## $breaks
## [1] 4 6 8 10
##
## $counts
## [1] 3 0 3
##
## $density
## [1] 0.25 0.00 0.25
##
## $mids
## [1] 5 7 9
##
## $xname
## [1] "X[[i]]"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
#12L
tapply(hill12$hill_0, hill12$Zone, shapiro.test) #Not normal in saccz## $AAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.9799, p-value = 0.894
##
##
## $PFZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.90053, p-value = 0.1611
##
##
## $SACCZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.72072, p-value = 0.01014
##
##
## $SAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.9633, p-value = 0.694
##
##
## $STZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.8616, p-value = 0.1947
tapply(hill12$hill_0[hill12$Zone=="SACCZ"], hill12$Zone[hill12$Zone=="SACCZ"], hist) #skew but will do wilcox rather than transform and interpret## $SACCZ
## $breaks
## [1] 3 4 5 6 7
##
## $counts
## [1] 1 0 2 3
##
## $density
## [1] 0.1666667 0.0000000 0.3333333 0.5000000
##
## $mids
## [1] 3.5 4.5 5.5 6.5
##
## $xname
## [1] "X[[i]]"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
hills %>% group_by(Zone) %>% t_test(hill_0~Type, paired=TRUE) #default conf 0.95 and two sided## # A tibble: 5 × 9
## Zone .y. group1 group2 n1 n2 statistic df p
## * <fct> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 STZ hill_0 12L 2L 6 6 3.25 5 0.0227
## 2 SAZ hill_0 12L 2L 17 17 3.41 16 0.0036
## 3 PFZ hill_0 12L 2L 12 12 6.20 11 0.0000674
## 4 AAZ hill_0 12L 2L 24 24 1.68 23 0.107
## 5 SACCZ hill_0 12L 2L 6 6 -0.881 5 0.419
hills %>% group_by(Zone) %>% wilcox_test(hill_0~Type, paired=TRUE)## # A tibble: 5 × 8
## Zone .y. group1 group2 n1 n2 statistic p
## * <fct> <chr> <chr> <chr> <int> <int> <dbl> <dbl>
## 1 STZ hill_0 12L 2L 6 6 21 0.0355
## 2 SAZ hill_0 12L 2L 17 17 94 0.00932
## 3 PFZ hill_0 12L 2L 12 12 66 0.00355
## 4 AAZ hill_0 12L 2L 24 24 115 0.203
## 5 SACCZ hill_0 12L 2L 6 6 6 0.395
#significantly differs in the PFZ, SAZ, and STZ1.11.4 Shannons diversity (q=1)
#test for normality
#2L
tapply(hill2$hill_1, hill2$Zone, shapiro.test) #not normal in STZ or PFZ## $AAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.95572, p-value = 0.3586
##
##
## $PFZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.77634, p-value = 0.005106
##
##
## $SACCZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.89128, p-value = 0.325
##
##
## $SAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.92127, p-value = 0.1552
##
##
## $STZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.77093, p-value = 0.03168
tapply(hill2$hill_1[hill2$Zone=="STZ"], hill2$Zone[hill2$Zone=="STZ"], hist) #bimodal so will not become normal## $STZ
## $breaks
## [1] 1 2 3 4 5 6
##
## $counts
## [1] 4 0 0 1 1
##
## $density
## [1] 0.6666667 0.0000000 0.0000000 0.1666667 0.1666667
##
## $mids
## [1] 1.5 2.5 3.5 4.5 5.5
##
## $xname
## [1] "X[[i]]"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
tapply(hill2$hill_1[hill2$Zone=="PFZ"], hill2$Zone[hill2$Zone=="PFZ"], hist) #also fairly blocked## $PFZ
## $breaks
## [1] 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0
##
## $counts
## [1] 3 4 3 0 0 0 2
##
## $density
## [1] 0.5000000 0.6666667 0.5000000 0.0000000 0.0000000 0.0000000 0.3333333
##
## $mids
## [1] 1.75 2.25 2.75 3.25 3.75 4.25 4.75
##
## $xname
## [1] "X[[i]]"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
#12L
tapply(hill12$hill_1, hill12$Zone, shapiro.test) #not normal in AAZ## $AAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.90625, p-value = 0.02927
##
##
## $PFZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.95267, p-value = 0.6762
##
##
## $SACCZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.87997, p-value = 0.2689
##
##
## $SAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.89411, p-value = 0.05419
##
##
## $STZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.87167, p-value = 0.233
tapply(hill12$hill_1[hill12$Zone=="AAZ"], hill12$Zone[hill12$Zone=="AAZ"], hist) #will conduct wilcox for not normal zones rather than transform and interpret## $AAZ
## $breaks
## [1] 1 2 3 4 5 6
##
## $counts
## [1] 3 14 3 2 2
##
## $density
## [1] 0.12500000 0.58333333 0.12500000 0.08333333 0.08333333
##
## $mids
## [1] 1.5 2.5 3.5 4.5 5.5
##
## $xname
## [1] "X[[i]]"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
hills %>% group_by(Zone) %>% t_test(hill_1~Type, paired=TRUE) #default conf 0.95 and two sided## # A tibble: 5 × 9
## Zone .y. group1 group2 n1 n2 statistic df p
## * <fct> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 STZ hill_1 12L 2L 6 6 -0.718 5 0.505
## 2 SAZ hill_1 12L 2L 17 17 -1.57 16 0.137
## 3 PFZ hill_1 12L 2L 12 12 0.445 11 0.665
## 4 AAZ hill_1 12L 2L 24 24 -0.195 23 0.847
## 5 SACCZ hill_1 12L 2L 6 6 -2.33 5 0.0676
hills %>% group_by(Zone) %>% wilcox_test(hill_1~Type, paired=TRUE)## # A tibble: 5 × 8
## Zone .y. group1 group2 n1 n2 statistic p
## * <fct> <chr> <chr> <chr> <int> <int> <dbl> <dbl>
## 1 STZ hill_1 12L 2L 6 6 6 0.438
## 2 SAZ hill_1 12L 2L 17 17 40 0.0887
## 3 PFZ hill_1 12L 2L 12 12 45 0.677
## 4 AAZ hill_1 12L 2L 24 24 154 0.922
## 5 SACCZ hill_1 12L 2L 6 6 3 0.156
#no significant differences1.11.5 Simspons diversity (q=2)
#test for normality
#2L
tapply(hill2$hill_2, hill2$Zone, shapiro.test) #only normal in SACCZ conduct wilcox## $AAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.89926, p-value = 0.02076
##
##
## $PFZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.75954, p-value = 0.003368
##
##
## $SACCZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.95563, p-value = 0.7855
##
##
## $SAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.89048, p-value = 0.04718
##
##
## $STZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.76202, p-value = 0.02605
#12L
tapply(hill12$hill_2, hill12$Zone, shapiro.test) #will conduct wilcox as above aren't normal## $AAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.8823, p-value = 0.009249
##
##
## $PFZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.91429, p-value = 0.242
##
##
## $SACCZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.7618, p-value = 0.02593
##
##
## $SAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.87718, p-value = 0.02863
##
##
## $STZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.87408, p-value = 0.243
hills %>% group_by(Zone) %>% t_test(hill_2~Type, paired=TRUE) #default conf 0.95 and two sided## # A tibble: 5 × 9
## Zone .y. group1 group2 n1 n2 statistic df p
## * <fct> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 STZ hill_2 12L 2L 6 6 -0.00345 5 0.997
## 2 SAZ hill_2 12L 2L 17 17 -1.30 16 0.211
## 3 PFZ hill_2 12L 2L 12 12 0.523 11 0.612
## 4 AAZ hill_2 12L 2L 24 24 -0.588 23 0.562
## 5 SACCZ hill_2 12L 2L 6 6 -1.97 5 0.106
hills %>% group_by(Zone) %>% wilcox_test(hill_2~Type, paired=TRUE)## # A tibble: 5 × 8
## Zone .y. group1 group2 n1 n2 statistic p
## * <fct> <chr> <chr> <chr> <int> <int> <dbl> <dbl>
## 1 STZ hill_2 12L 2L 6 6 10 1
## 2 SAZ hill_2 12L 2L 17 17 43 0.12
## 3 PFZ hill_2 12L 2L 12 12 46 0.622
## 4 AAZ hill_2 12L 2L 24 24 132 0.623
## 5 SACCZ hill_2 12L 2L 6 6 3 0.156
#no significant differences1.12 Indicator species analysis
1.12.1 12L
#12L PA
indic_12L<-t(species12[7:71])
indic_12L[indic_12L>0]<-1 #convert to PA
colnames(indic_12L)<-species12$Species
#indic_12L<-multipatt(indic_12L, samples$Zone[samples$Type=="12L"], func = "IndVal.g",duleg = FALSE, control = how(nperm=9999))
#summary(indic_12L, alpha=0.05, digits=3, too.many=100, indvalcomp=TRUE, sort=TRUE)1.12.2 2L
#PA
indic_2L<-t(species2[7:71])
indic_2L[indic_2L>0]<-1 #convert to PA
colnames(indic_2L)<-species2$Species
#indic_2L<-multipatt(indic_2L, samples$Zone[samples$Type=="2L"], func = "IndVal.g",duleg = FALSE, control = how(nperm=9999))
#summary(indic_2L, alpha=0.05, digits=3, too.many=100, indvalcomp=TRUE, sort=TRUE)1.13 Potential invasive/pest identification
1.13.1 Plot
#select species identified as invasive, pest, or biofouling
Invasives<-species %>% filter(str_detect(Comment, c("^I")))
#shorten lat
samples$Lat<-format(round(as.numeric(samples$Latitude), 3), nsmall=3)
#samples$Sample1<-gsub('V1_', '', samples$Sample) #remove V1 from sample
samples$Lat<-paste(samples$Sample, samples$Lat, sep="_")
#summarise data
Invasives<-Invasives %>% select(-zOTU, -PercentID, -Assignment, -Comment) %>% group_by(Phylum, Class, Order, Family, Genus, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
#assign status
Invasives$Status<-if_else(Invasives$Genus=="Amphinema" | Invasives$Genus=="Lizzia" | Invasives$Genus=="Austrominius" | Invasives$Genus=="Coryne" | Invasives$Genus=="Clytia", "Unclear", "NA")
Invasives$Status<-if_else(Invasives$Genus=="Asterias" | Invasives$Genus=="Mytilus", "Invasive to Tasmania", Invasives$Status)
Invasives$Status<-if_else(Invasives$Genus=="Obelia" | Invasives$Genus=="Metacarcinus" |Invasives$Genus=="Bougainvillia" | Invasives$Genus=="Watersipora" | Invasives$Genus=="Patiriella" | Invasives$Genus=="Bugulina" | Invasives$Genus=="Cryptosula", "Introduced to Tasmania", Invasives$Status)
Invasives$Status<-if_else(Invasives$Genus=="Arachnopusia" | Invasives$Genus=="Ostrea", "Native in Tasmania", Invasives$Status)
#long format for ggplot
LongInvasives<-pivot_longer(Invasives, cols = starts_with("V"), names_to = "Sample", values_to = "Reads")
LongInvasives<-merge(LongInvasives, samples[samples$Method=="eDNA",], all=TRUE)
LongInvasives<-LongInvasives %>% select(Zone, Sample, eDNA_Pair, Type, Lat, Genus, Species, Status, Reads)
#write.xlsx(as.data.frame(LongInvasives), file="OverallInvasivesData.xlsx", sheetName = "RawData") #export raw
LongInvasives$Reads[LongInvasives$Reads=="0"]<-NA #change 0's to NA>nicer on plot
#plot individually for proportion
Amphinema<-droplevels(subset(LongInvasives, Genus=="Amphinema"))
sum(Amphinema$Reads, na.rm=T)## [1] 18
Amphinema$Proportion<-(Amphinema$Reads/18)*100 #calculation proportion reads per sample
Arach<-droplevels(subset(LongInvasives, Genus=="Arachnopusia"))
sum(Arach$Reads, na.rm=T)## [1] 29
Arach$Proportion<-(Arach$Reads/29)*100 #calculation proportion reads per sample
Aster<-droplevels(subset(LongInvasives, Genus=="Asterias"))
sum(Aster$Reads, na.rm=T)## [1] 78
Aster$Proportion<-(Aster$Reads/78)*100 #calculation proportion reads per sample
Austrominius<-droplevels(subset(LongInvasives, Genus=="Austrominius"))
sum(Austrominius$Reads, na.rm=T)## [1] 121
Austrominius$Proportion<-(Austrominius$Reads/121)*100 #calculation proportion reads per sample
Bougainvillia<-droplevels(subset(LongInvasives, Genus=="Bougainvillia"))
sum(Bougainvillia$Reads, na.rm=T)## [1] 1567
Bougainvillia$Proportion<-(Bougainvillia$Reads/1567)*100 #calculation proportion reads per sample
BugFlab<-droplevels(subset(LongInvasives, Genus=="Bugulina"))
sum(BugFlab$Reads, na.rm=T)## [1] 72755
BugFlab$Proportion<-(BugFlab$Reads/72755)*100 #calculation proportion reads per sample
Clytia<-droplevels(subset(LongInvasives, Genus=="Clytia"))
sum(Clytia$Reads, na.rm=T)## [1] 20
Clytia$Proportion<-(Clytia$Reads/20)*100 #calculation proportion reads per sample
Coryne<-droplevels(subset(LongInvasives, Genus=="Coryne"))
sum(Coryne$Reads, na.rm=T)## [1] 5908
Coryne$Proportion<-(Coryne$Reads/5908)*100 #calculation proportion reads per sample
Cryp<-droplevels(subset(LongInvasives, Genus=="Cryptosula"))
sum(Cryp$Reads, na.rm=T)## [1] 69
Cryp$Proportion<-(Cryp$Reads/69)*100 #calculation proportion reads per sample
Lizzia<-droplevels(subset(LongInvasives, Genus=="Lizzia"))
sum(Lizzia$Reads, na.rm=T)## [1] 287
Lizzia$Proportion<-(Lizzia$Reads/287)*100 #calculation proportion reads per sample
Metacarcinus<-droplevels(subset(LongInvasives, Genus=="Metacarcinus"))
sum(Metacarcinus$Reads, na.rm=T)## [1] 5
Metacarcinus$Proportion<-(Metacarcinus$Reads/5)*100 #calculation proportion reads per sample
#low read numbers: the sample had 16130 reads>using species specific would be better but impressive to still detect low read numbers
Mytilus<-droplevels(subset(LongInvasives, Genus=="Mytilus"))
sum(Mytilus$Reads, na.rm=T)## [1] 22
Mytilus$Proportion<-(Mytilus$Reads/22)*100 #calculation proportion reads per sample
Obelia<-droplevels(subset(LongInvasives, Genus=="Obelia"))
sum(Obelia$Reads, na.rm=T)## [1] 1143
Obelia$Proportion<-(Obelia$Reads/1143)*100 #calculation proportion reads per sample
Ostrea<-droplevels(subset(LongInvasives, Genus=="Ostrea"))
sum(Ostrea$Reads, na.rm=T)## [1] 314
Ostrea$Proportion<-(Ostrea$Reads/314)*100 #calculation proportion reads per sample
Patiriella<-droplevels(subset(LongInvasives, Genus=="Patiriella"))
sum(Patiriella$Reads, na.rm=T)## [1] 26
Patiriella$Proportion<-(Patiriella$Reads/26)*100 #calculation proportion reads per sample
Water<-droplevels(subset(LongInvasives, Genus=="Watersipora"))
sum(Water$Reads, na.rm=T)## [1] 154
Water$Proportion<-(Water$Reads/154)*100 #calculation proportion reads per sample
#plot time!
#figures not displayed as they warp
ggplot(Amphinema,aes(x=factor(Amphinema$eDNA_Pair, levels=unique(Amphinema$eDNA_Pair)), y=Type))+
geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
theme_bw() +
theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
panel.grid.major.x = element_blank(),
panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
panel.border = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
panel.grid.major.y = element_line(colour="#eeeeee"),
axis.text.x = element_blank())+
scale_x_discrete(expand = c(0.03, 0))+
scale_colour_manual(values=InvasivesCol)+
scale_fill_manual(values=InvasivesCol)+
scale_shape_manual(values=eDNAshape)+
scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12)) #ggsave(filename = "Amphinema.jpeg", plot = last_plot(), width = 18, height = 2.7, units = "cm", dpi = 600)
ggplot(Arach,aes(x=factor(Arach$eDNA_Pair, levels=unique(Arach$eDNA_Pair)), y=Type))+
geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
theme_bw() +
theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
panel.grid.major.x = element_blank(),
#panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
panel.border = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
panel.grid.major.y = element_line(colour="#eeeeee"),
axis.text.x = element_blank())+
scale_x_discrete(expand = c(0.03, 0))+
scale_colour_manual(values=InvasivesCol)+
scale_fill_manual(values=InvasivesCol)+
scale_shape_manual(values=eDNAshape)+
scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12)) #ggsave(filename = "Arach.jpeg", plot = last_plot(), width = 18, height = 2.7, units = "cm", dpi = 600)
ggplot(Aster,aes(x=factor(Aster$eDNA_Pair, levels=unique(Aster$eDNA_Pair)), y=Type))+
geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
theme_bw() +
theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
panel.grid.major.x = element_blank(),
panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
panel.border = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_blank(),
panel.grid.major.y = element_line(colour="#eeeeee"),
axis.text.x = element_blank())+
scale_x_discrete(expand = c(0.03, 0))+
scale_colour_manual(values=InvasivesCol)+
scale_fill_manual(values=InvasivesCol)+
scale_shape_manual(values=eDNAshape)+
scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12)) #ggsave(filename = "Aster.jpeg", plot = last_plot(), width = 18, height = 2.7, units = "cm", dpi = 600)
ggplot(Austrominius,aes(x=factor(Austrominius$eDNA_Pair, levels=unique(Austrominius$eDNA_Pair)), y=Type))+
geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
theme_bw() +
theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
panel.grid.major.x = element_blank(),
# panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
axis.text.y = element_blank(),
panel.border = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
panel.grid.major.y = element_line(colour="#eeeeee"),
axis.text.x = element_blank())+
scale_x_discrete(expand = c(0.03, 0))+
scale_colour_manual(values=InvasivesCol)+
scale_fill_manual(values=InvasivesCol)+
scale_shape_manual(values=eDNAshape)+
scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12)) #ggsave(filename = "Austro.jpeg", plot = last_plot(), width = 18, height = 2.7, units = "cm", dpi = 600)
ggplot(Bougainvillia,aes(x=factor(Bougainvillia$eDNA_Pair, levels=unique(Bougainvillia$eDNA_Pair)), y=Type))+
geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
theme_bw() +
theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
axis.text.y = element_blank(),
panel.grid.major.x = element_blank(),
panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
panel.border = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
panel.grid.major.y = element_line(colour="#eeeeee"),
axis.text.x = element_blank())+
scale_x_discrete(expand = c(0.03, 0))+
scale_colour_manual(values=InvasivesCol)+
scale_fill_manual(values=InvasivesCol)+
scale_shape_manual(values=eDNAshape)+
scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12)) #ggsave(filename = "Boug.jpeg", plot = last_plot(), width = 18, height = 2.7, units = "cm", dpi = 600)
ggplot(BugFlab,aes(x=factor(BugFlab$eDNA_Pair, levels=unique(BugFlab$eDNA_Pair)), y=Type))+
geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
theme_bw() +
theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
axis.text.y = element_blank(),
panel.grid.major.x = element_blank(),
#panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
panel.border = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
panel.grid.major.y = element_line(colour="#eeeeee"),
axis.text.x = element_blank())+
scale_x_discrete(expand = c(0.03, 0))+
scale_colour_manual(values=InvasivesCol)+
scale_fill_manual(values=InvasivesCol)+
scale_shape_manual(values=eDNAshape)+
scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12)) #ggsave(filename = "bugflab.jpeg", plot = last_plot(), width = 18, height = 2.7, units = "cm", dpi = 600)
ggplot(Clytia,aes(x=factor(Clytia$eDNA_Pair, levels=unique(Clytia$eDNA_Pair)), y=Type))+
geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
theme_bw() +
theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
axis.text.y = element_blank(),
panel.grid.major.x = element_blank(),
panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
panel.border = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
panel.grid.major.y = element_line(colour="#eeeeee"),
axis.text.x = element_blank())+
scale_x_discrete(expand = c(0.03, 0))+
scale_colour_manual(values=InvasivesCol)+
scale_fill_manual(values=InvasivesCol)+
scale_shape_manual(values=eDNAshape)+
scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12)) #ggsave(filename = "clytia.jpeg", plot = last_plot(), width = 18, height = 2.7, units = "cm", dpi = 600)
ggplot(Coryne,aes(x=factor(Coryne$eDNA_Pair, levels=unique(Coryne$eDNA_Pair)), y=Type))+
geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
theme_bw() +
theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
axis.text.y = element_blank(),
panel.grid.major.x = element_blank(),
# panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
panel.border = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
panel.grid.major.y = element_line(colour="#eeeeee"),
axis.text.x = element_blank())+
scale_x_discrete(expand = c(0.03, 0))+
scale_colour_manual(values=InvasivesCol)+
scale_fill_manual(values=InvasivesCol)+
scale_shape_manual(values=eDNAshape)+
scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12)) #ggsave(filename = "coryne.jpeg", plot = last_plot(), width = 18, height = 2.7, units = "cm", dpi = 600)
ggplot(Cryp,aes(x=factor(Cryp$eDNA_Pair, levels=unique(Cryp$eDNA_Pair)), y=Type))+
geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
theme_bw() +
theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
axis.text.y = element_blank(),
panel.grid.major.x = element_blank(),
panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
panel.border = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
panel.grid.major.y = element_line(colour="#eeeeee"),
axis.text.x = element_blank())+
scale_x_discrete(expand = c(0.03, 0))+
scale_colour_manual(values=InvasivesCol)+
scale_fill_manual(values=InvasivesCol)+
scale_shape_manual(values=eDNAshape)+
scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12)) #ggsave(filename = "cryp.jpeg", plot = last_plot(), width = 18, height = 2.7, units = "cm", dpi = 600)
ggplot(Lizzia,aes(x=factor(Lizzia$eDNA_Pair, levels=unique(Lizzia$eDNA_Pair)), y=Type))+
geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
theme_bw() +
theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
axis.text.y = element_blank(),
panel.grid.major.x = element_blank(),
#panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
panel.border = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
panel.grid.major.y = element_line(colour="#eeeeee"),
axis.text.x = element_blank())+
scale_x_discrete(expand = c(0.03, 0))+
scale_colour_manual(values=InvasivesCol)+
scale_fill_manual(values=InvasivesCol)+
scale_shape_manual(values=eDNAshape)+
scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12)) #ggsave(filename = "lizzia.jpeg", plot = last_plot(), width = 18, height = 2.7, units = "cm", dpi = 600)
ggplot(Metacarcinus,aes(x=factor(Metacarcinus$eDNA_Pair, levels=unique(Metacarcinus$eDNA_Pair)), y=Type))+
geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
theme_bw() +
theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
axis.text.y = element_blank(),
panel.grid.major.x = element_blank(),
panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
panel.border = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
panel.grid.major.y = element_line(colour="#eeeeee"),
axis.text.x = element_blank())+
scale_x_discrete(expand = c(0.03, 0))+
scale_colour_manual(values=InvasivesCol)+
scale_fill_manual(values=InvasivesCol)+
scale_shape_manual(values=eDNAshape)+
scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12)) #ggsave(filename = "Metac.jpeg", plot = last_plot(), width = 18, height = 2.7, units = "cm", dpi = 600)
ggplot(Mytilus,aes(x=factor(Mytilus$eDNA_Pair, levels=unique(Mytilus$eDNA_Pair)), y=Type))+
geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
theme_bw() +
theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
axis.text.y = element_blank(),
panel.grid.major.x = element_blank(),
# panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
panel.border = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
panel.grid.major.y = element_line(colour="#eeeeee"),
axis.text.x = element_blank())+
scale_x_discrete(expand = c(0.03, 0))+
scale_colour_manual(values=InvasivesCol)+
scale_fill_manual(values=InvasivesCol)+
scale_shape_manual(values=eDNAshape)+
scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12)) #ggsave(filename = "Mytilus.jpeg", plot = last_plot(), width = 18, height = 2.7, units = "cm", dpi = 600)
ggplot(Obelia,aes(x=factor(Obelia$eDNA_Pair, levels=unique(Obelia$eDNA_Pair)), y=Type))+
geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
theme_bw() +
theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
axis.text.y = element_blank(),
panel.grid.major.x = element_blank(),
panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
panel.border = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
panel.grid.major.y = element_line(colour="#eeeeee"),
axis.text.x = element_blank())+
scale_x_discrete(expand = c(0.03, 0))+
scale_colour_manual(values=InvasivesCol)+
scale_fill_manual(values=InvasivesCol)+
scale_shape_manual(values=eDNAshape)+
scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12)) #ggsave(filename = "obelia.jpeg", plot = last_plot(), width = 18, height = 2.7, units = "cm", dpi = 600)
ggplot(Ostrea,aes(x=factor(Ostrea$eDNA_Pair, levels=unique(Ostrea$eDNA_Pair)), y=Type))+
geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
theme_bw() +
theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
axis.text.y = element_blank(),
panel.grid.major.x = element_blank(),
# panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
panel.border = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
panel.grid.major.y = element_line(colour="#eeeeee"),
axis.text.x = element_blank())+
scale_x_discrete(expand = c(0.03, 0))+
scale_colour_manual(values=InvasivesCol)+
scale_fill_manual(values=InvasivesCol)+
scale_shape_manual(values=eDNAshape)+
scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12)) #ggsave(filename = "ostrea.jpeg", plot = last_plot(), width = 18, height = 2.7, units = "cm", dpi = 600)
ggplot(Patiriella,aes(x=factor(Patiriella$eDNA_Pair, levels=unique(Patiriella$eDNA_Pair)), y=Type))+
geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
theme_bw() +
theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
axis.text.y = element_blank(),
panel.grid.major.x = element_blank(),
panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
panel.border = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
panel.grid.major.y = element_line(colour="#eeeeee"),
axis.text.x = element_blank())+
scale_x_discrete(expand = c(0.03, 0))+
scale_colour_manual(values=InvasivesCol)+
scale_fill_manual(values=InvasivesCol)+
scale_shape_manual(values=eDNAshape)+
scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12)) #ggsave(filename = "patir.jpeg", plot = last_plot(), width = 18, height = 2.7, units = "cm", dpi = 600)
ggplot(Water,aes(x=factor(Water$eDNA_Pair, levels=unique(Water$eDNA_Pair)), y=Type))+
geom_point(aes(size=Proportion, shape=Type, color=Status, fill=Status), stroke=1.2, alpha = 0.5)+
theme_bw() +
theme(legend.position="none",axis.line = element_blank(), axis.ticks = element_blank(),
axis.text.y = element_blank(),
panel.grid.major.x = element_blank(),
# panel.background = element_rect(fill=alpha("#F2E9E2",0.4)),
panel.border = element_blank(),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
panel.grid.major.y = element_line(colour="#eeeeee"),
#axis.text.x = element_blank())+
axis.text.x = element_text(colour="black", size = 8, angle = 90))+
scale_x_discrete(expand = c(0.03, 0))+
scale_colour_manual(values=InvasivesCol)+
scale_fill_manual(values=InvasivesCol)+
scale_shape_manual(values=eDNAshape)+
scale_size(limits = c(0,100), breaks=c(20, 40, 60, 80, 100),range = c(1, 12)) #ggsave(filename = "watersip.jpeg", plot = last_plot(), width = 18, height = 2.7, units = "cm", dpi = 600)1.13.2 Summary and statistics
#make summary table
InvasiveSummary<-Invasives
InvasiveSummary$Sum<-rowSums(InvasiveSummary[7:136])
InvasiveSummary<-InvasiveSummary %>% select(-starts_with("V"))
#write.xlsx(as.data.frame(InvasiveSummary), file="OverallInvasivesData.xlsx", sheetName = "OverallSummary", append = TRUE)
##write.csv(InvasiveSummary, "InvasiveSummary.csv")
#compare detections between 12L and 2L
InvasivesComparison<-LongInvasives %>% select(-Lat, -Genus, -Status) %>% group_by(Zone, Sample, eDNA_Pair, Type, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
#total reads per volume
sum(InvasivesComparison$Reads[InvasivesComparison$Type=="12L"])## [1] 75866
sum(InvasivesComparison$Reads[InvasivesComparison$Type=="2L"])## [1] 6650
#t-test for reads
ttestinv<-InvasivesComparison %>% group_by(Sample, eDNA_Pair, Type) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
wilcox.test(ttestinv$Reads~ttestinv$Type, mu=0, alt="two.sided",
conf.level=0.95, paired=TRUE)##
## Wilcoxon signed rank test with continuity correction
##
## data: ttestinv$Reads by ttestinv$Type
## V = 560.5, p-value = 0.005743
## alternative hypothesis: true location shift is not equal to 0
tapply(ttestinv$Reads,ttestinv$Type,median)## 12L 2L
## 6 28
# tapply(InvasivesComparison$Reads,InvasivesComparison$Type,shapiro.test) #not normal
# wilcox.test(InvasivesComparison$Reads~InvasivesComparison$Type, mu=0, alt="two.sided",
# conf.level=0.95, paired=TRUE)
# tapply(InvasivesComparison$Reads,InvasivesComparison$Type,median)
#occurences
InvasivesComparison$Frequency<-0
InvasivesComparison$Frequency[InvasivesComparison$Reads>0]<-1
#how many total detections along the transect per volume
InvasivesVolume<-as.data.frame(with(InvasivesComparison, table(Type, Frequency)))
InvasivesVolume<-droplevels(subset(InvasivesVolume, Frequency==1))
InvasivesVolume## Type Frequency Freq
## 3 12L 1 140
## 4 2L 1 180
#Zone and vol frequency
InvasivesVolumeZone<-as.data.frame(with(InvasivesComparison, table(Type, Zone, Frequency)))
InvasivesVolumeZone<-droplevels(subset(InvasivesVolumeZone, Frequency==1))
InvasivesVolumeZone## Type Zone Frequency Freq
## 11 12L AAZ 1 51
## 12 2L AAZ 1 75
## 13 12L PFZ 1 13
## 14 2L PFZ 1 18
## 15 12L SACCZ 1 9
## 16 2L SACCZ 1 19
## 17 12L SAZ 1 33
## 18 2L SAZ 1 38
## 19 12L STZ 1 34
## 20 2L STZ 1 30
#number of unique species detected with each volume
InvasivesComparison<-InvasivesComparison %>% group_by(Type, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
InvasivesComparison$PA[InvasivesComparison$Frequency>0]<-1
with(InvasivesComparison, table
(Type, PA))## PA
## Type 1
## 12L 16
## 2L 14
#two less species detected with 2L samples
#they are:
unique(InvasivesComparison$Species[InvasivesComparison$Reads=="0"])## [1] "Metacarcinus novaezelandiae" "Patiriella regularis"
#seastar and crab, 'larger' taxa both detected in the first sample/as we depart>in derwent
#these are paired samples 2 & 3
#sums and metazoan proportions
#12l
sum(long_raw$Reads[long_raw$Type=="12L" & long_raw$eDNA_Pair=="1"]) ## [1] 16130
sum(long_raw$Reads[long_raw$Type=="12L" & long_raw$eDNA_Pair=="1" & long_raw$Subkingdom=="Metazoa"])/sum(long_raw$Reads[long_raw$Type=="12L" & long_raw$eDNA_Pair=="1"]) ## [1] 0.7293862
#2L
sum(long_raw$Reads[long_raw$Type=="2L" & long_raw$eDNA_Pair=="1"])## [1] 8229
sum(long_raw$Reads[long_raw$Type=="2L" & long_raw$eDNA_Pair=="1" & long_raw$Subkingdom=="Metazoa"])/sum(long_raw$Reads[long_raw$Type=="2L" & long_raw$eDNA_Pair=="1"]) ## [1] 0.5698141
#12L had higher read numbers (16130) and proportion of metazoan reads (72.94%) compared to 2L (8229, 56.98%)
#again more than likely due to the filter type
#interestingly, although 2L less species, it detected 'invasives' more frequently>so perhaps smaller filter size is better for small-bodied?1.14 Analysis without invasives
1.14.1 Hills numbers
1.14.1.1 Plot
#select species identified as invasive, pest, or biofouling
spcs_noinv<-species[!species$Species %in% Invasives$Species,]
#merge species assignments by species (i.e unique)
spcs_noinvmerged<-spcs_noinv %>% select(-zOTU, -Comment, -PercentID, -Assignment)
spcs_noinvmerged<-spcs_noinvmerged %>% group_by(Phylum, Class, Order, Family, Genus, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
#12L only
spcs_noinv12<-cbind(spcs_noinvmerged[1:6], spcs_noinvmerged[as.character(samples$Sample[samples$Type=="12L"])])
spcs_noinv12<-spcs_noinv12[rowSums(spcs_noinv12[,7:71])!=0,]
#2L only
spcs_noinv2<-cbind(spcs_noinvmerged[1:6], spcs_noinvmerged[as.character(samples$Sample[samples$Type=="2L"])])
spcs_noinv2<-spcs_noinv2[rowSums(spcs_noinv2[,7:71])!=0,]
#eDNA 12L hills data
hillinv_0 <- hill_div(spcs_noinv12[7:71], qvalue = 0)
hillinv_1 <- hill_div(spcs_noinv12[7:71], qvalue = 1)
hillinv_2 <- hill_div(spcs_noinv12[7:71], qvalue = 2)
hillinv12 <- cbind("Sample"=names(hillinv_0), "Type"="12L", hillinv_0, hillinv_1, hillinv_2) #keep names to check the samples are correct
hillinv12<-merge(samples[samples$Type=="12L",], hillinv12, all=TRUE)
hillinv12 <- hillinv12 %>% mutate_at(c('hillinv_0', 'hillinv_1', 'hillinv_2'), as.numeric)
#2L hills data
hillinv_0 <- hill_div(spcs_noinv2[7:71], qvalue = 0)
hillinv_1 <- hill_div(spcs_noinv2[7:71], qvalue = 1)
hillinv_2 <- hill_div(spcs_noinv2[7:71], qvalue = 2)
hillinv2 <- cbind("Sample"=names(hillinv_0),"Type"="2L", hillinv_0, hillinv_1, hillinv_2) #keep names to check the samples are correct
hillinv2<-merge(samples[samples$Type=="2L",], hillinv2, all=TRUE)
hillinv2 <- hillinv2 %>% mutate_at(c('hillinv_0', 'hillinv_1', 'hillinv_2'), as.numeric)
#merge together
hillsinv<-merge(hillinv12, hillinv2, all=TRUE)
hillsinv$Zone<-factor(hillsinv$Zone, levels=c("STZ", "SAZ", "PFZ","AAZ", "SACCZ"))
#seperate plots for each hillsinv number
plothillinv_0 <- violin.plot(hillsinv, ydata = hillsinv$hillinv_0, col = vennfill, fill = vennfill, ylab = "Hills number (q = 0)", ylow = 0, yhigh = 14)+ theme_light()+
theme(plot.margin = unit(c(1,1,1,1), "cm"), strip.text.x = element_text(size=16, face="bold", colour="black"), legend.text = element_text(size = 14), legend.title = element_text(face="bold", size=18))+
theme(axis.text.x = element_blank(),axis.text.y = element_text(size=16, colour="black"),axis.title.y = element_text(size=15, hjust=0.7,vjust=4, face="bold"))+ facet_grid(~Zone, scales = "free_x", space = "free_x")
plothillinv_1 <- violin.plot(hillsinv, ydata = hillsinv$hillinv_1, col = vennfill, fill = vennfill, ylab = "Hills number (q = 1)", ylow = 0, yhigh = 5.5)+ theme_light()+
theme(plot.margin = unit(c(1,1,1,1), "cm"), strip.text.x = element_text(size=16, face="bold", colour="black"), legend.text = element_text(size = 14), legend.title = element_text(face="bold", size=18))+
theme(axis.text.x = element_blank(),axis.text.y = element_text(size=16, colour="black"),axis.title.y = element_text(size=15, hjust=0.7,vjust=4, face="bold"))+ facet_grid(~Zone, scales = "free_x", space = "free_x")
plothillinv_2 <- violin.plot(hillsinv, ydata = hillsinv$hillinv_2, col = vennfill, fill = vennfill, ylab = "Hills number (q = 2)", ylow = 0, yhigh = 4.5)+ theme_light()+
theme(plot.margin = unit(c(1,1,1,1), "cm"), axis.text.x = element_blank(), strip.text.x = element_text(size=16, face="bold", colour="black"), legend.text = element_text(size = 14), legend.title = element_text(face="bold", size=18))+
theme(axis.text.y = element_text(size=16, colour="black"),axis.title.y = element_text(size=15, hjust=0.7,vjust=4, face="bold"))+ facet_grid(~Zone, scales = "free_x", space = "free_x")
#combine into one plot and...plot
hillsinvplot<-plot_grid(plothillinv_0, plothillinv_1, plothillinv_2, ncol=1, labels = c("A", "B", "C"), label_size = 25)
plot(hillsinvplot)#tidy env
rm(hillinv_0, hillinv_1, hillinv_2)1.14.1.2 Summary stats & t-tests
#select hillsinv numbers
hillsinvummstat<-droplevels(subset(hillsinv, hillinv_1!="NA"))
hillsinvummstat<-hillsinvummstat %>% select(Type, Zone, hillinv_0, hillinv_1, hillinv_2)
#summary stats function
hillsinvummstat<-summaryBy(hillsinvummstat[,3:5]~Zone & Type,data=hillsinvummstat,FUN=function(x)
+ {c(mean=mean(x),sum=sum(x),se=sd(x)/sqrt(length(x)))})
#pivot table longer
hillsinvummstat<-hillsinvummstat %>% pivot_longer(!c(Zone, Type), names_to = c("hill",".value"), names_pattern = "(\\d*).([A-Za-z]+)", names_prefix = "hillinv_")
hillsinvummstat## # A tibble: 30 × 6
## Zone Type hill mean sum se
## <fct> <chr> <chr> <dbl> <dbl> <dbl>
## 1 STZ 12L 0 7 42 1.44
## 2 STZ 12L 1 1.57 9.40 0.269
## 3 STZ 12L 2 1.31 7.88 0.184
## 4 STZ 2L 0 3.17 19 1.38
## 5 STZ 2L 1 1.66 9.98 0.445
## 6 STZ 2L 2 1.45 8.68 0.320
## 7 SAZ 12L 0 6.06 103 0.458
## 8 SAZ 12L 1 2.43 41.4 0.294
## 9 SAZ 12L 2 2.01 34.1 0.219
## 10 SAZ 2L 0 3.75 60 0.393
## # ℹ 20 more rows
#write.csv(hillsinvummstat, "hillsumstatinv.csv")1.14.1.3 Richness (q=0)
#test for normality
#2L
tapply(hillinv2$hillinv_0, hillinv2$Zone, shapiro.test) #not normal## $AAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.90965, p-value = 0.03466
##
##
## $PFZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.83643, p-value = 0.02506
##
##
## $SACCZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.91393, p-value = 0.4627
##
##
## $SAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.41746, p-value = 0.0000002785
##
##
## $STZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.67311, p-value = 0.003179
#12L
tapply(hillinv12$hillinv_0, hillinv12$Zone, shapiro.test) #normal ## $AAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.96531, p-value = 0.554
##
##
## $PFZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.94817, p-value = 0.6104
##
##
## $SACCZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.83173, p-value = 0.1112
##
##
## $SAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.93286, p-value = 0.2429
##
##
## $STZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.87691, p-value = 0.2552
#wilcox
hillsinv %>% group_by(Zone) %>% wilcox_test(hillinv_0~Type, paired=TRUE)## # A tibble: 5 × 8
## Zone .y. group1 group2 n1 n2 statistic p
## * <fct> <chr> <chr> <chr> <int> <int> <dbl> <dbl>
## 1 STZ hillinv_0 12L 2L 6 6 21 0.035
## 2 SAZ hillinv_0 12L 2L 17 17 105 0.0111
## 3 PFZ hillinv_0 12L 2L 12 12 66 0.00362
## 4 AAZ hillinv_0 12L 2L 24 24 236. 0.000381
## 5 SACCZ hillinv_0 12L 2L 6 6 16 0.281
hillsinv %>% group_by(Type) %>% wilcox_test(hillinv_0~Zone, paired=FALSE)## # A tibble: 20 × 10
## Type .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## * <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 12L hilli… STZ SAZ 6 17 56.5 7.23e-1 7.23e-1 ns
## 2 12L hilli… STZ PFZ 6 12 25.5 3.45e-1 6.9 e-1 ns
## 3 12L hilli… STZ AAZ 6 24 39.5 9.3 e-2 5.6 e-1 ns
## 4 12L hilli… STZ SACCZ 6 6 26 2.2 e-1 6.6 e-1 ns
## 5 12L hilli… SAZ PFZ 17 12 27.5 9.39e-4 8 e-3 **
## 6 12L hilli… SAZ AAZ 17 24 34.5 6.79e-6 6.79e-5 ****
## 7 12L hilli… SAZ SACCZ 17 6 74.5 1.03e-1 5.6 e-1 ns
## 8 12L hilli… PFZ AAZ 12 24 100 1.39e-1 5.6 e-1 ns
## 9 12L hilli… PFZ SACCZ 12 6 69.5 2 e-3 1.2 e-2 *
## 10 12L hilli… AAZ SACCZ 24 6 142. 3.13e-4 3 e-3 **
## 11 2L hilli… STZ SAZ 6 17 33.5 2.28e-1 6.84e-1 ns
## 12 2L hilli… STZ PFZ 6 12 19 1.14e-1 4.56e-1 ns
## 13 2L hilli… STZ AAZ 6 24 17 4 e-3 2.8 e-2 *
## 14 2L hilli… STZ SACCZ 6 6 12.5 4.05e-1 8.1 e-1 ns
## 15 2L hilli… SAZ PFZ 17 12 30.5 1 e-3 1.3 e-2 *
## 16 2L hilli… SAZ AAZ 17 24 53 5.86e-5 5.86e-4 ***
## 17 2L hilli… SAZ SACCZ 17 6 59.5 5.71e-1 8.1 e-1 ns
## 18 2L hilli… PFZ AAZ 12 24 72.5 1.5 e-2 9.2 e-2 ns
## 19 2L hilli… PFZ SACCZ 12 6 60 2.4 e-2 1.22e-1 ns
## 20 2L hilli… AAZ SACCZ 24 6 130 2 e-3 2 e-2 *
#significantly differs in the AAZ, PFZ, SAZ, and STZ1.14.1.4 Shannons diversity (q=1)
#test for normality
#2L
tapply(hillinv2$hillinv_1, hillinv2$Zone, shapiro.test) #not normal in STZ or PFZ## $AAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.95746, p-value = 0.3895
##
##
## $PFZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.75436, p-value = 0.002969
##
##
## $SACCZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.87849, p-value = 0.2622
##
##
## $SAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.91645, p-value = 0.1479
##
##
## $STZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.70463, p-value = 0.006915
tapply(hillinv2$hillinv_1[hillinv2$Zone=="STZ"], hillinv2$Zone[hillinv2$Zone=="STZ"], hist) #bimodal so will not become normal## $STZ
## $breaks
## [1] 1.0 1.5 2.0 2.5 3.0 3.5 4.0
##
## $counts
## [1] 4 0 1 0 0 1
##
## $density
## [1] 1.3333333 0.0000000 0.3333333 0.0000000 0.0000000 0.3333333
##
## $mids
## [1] 1.25 1.75 2.25 2.75 3.25 3.75
##
## $xname
## [1] "X[[i]]"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
tapply(hillinv2$hillinv_1[hillinv2$Zone=="PFZ"], hillinv2$Zone[hillinv2$Zone=="PFZ"], hist) #also fairly blocked## $PFZ
## $breaks
## [1] 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0
##
## $counts
## [1] 3 5 2 0 0 1 1
##
## $density
## [1] 0.5000000 0.8333333 0.3333333 0.0000000 0.0000000 0.1666667 0.1666667
##
## $mids
## [1] 1.75 2.25 2.75 3.25 3.75 4.25 4.75
##
## $xname
## [1] "X[[i]]"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
#12L
tapply(hillinv12$hillinv_1, hillinv12$Zone, shapiro.test) #not normal in SAZ## $AAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.93544, p-value = 0.129
##
##
## $PFZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.95321, p-value = 0.6843
##
##
## $SACCZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.83361, p-value = 0.1153
##
##
## $SAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.89109, p-value = 0.0483
##
##
## $STZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.79496, p-value = 0.05292
tapply(hillinv12$hillinv_1[hillinv12$Zone=="SAZ"], hillinv12$Zone[hillinv12$Zone=="SAZ"], hist) #will conduct wilcox for not normal zones rather than transform and interpret## $SAZ
## $breaks
## [1] 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5
##
## $counts
## [1] 7 0 1 2 3 2 2
##
## $density
## [1] 0.8235294 0.0000000 0.1176471 0.2352941 0.3529412 0.2352941 0.2352941
##
## $mids
## [1] 1.25 1.75 2.25 2.75 3.25 3.75 4.25
##
## $xname
## [1] "X[[i]]"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
hillsinv %>% group_by(Zone) %>% t_test(hillinv_1~Type, paired=TRUE) #default conf 0.95 and two sided## # A tibble: 5 × 9
## Zone .y. group1 group2 n1 n2 statistic df p
## * <fct> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 STZ hillinv_1 12L 2L 6 6 -0.429 5 0.686
## 2 SAZ hillinv_1 12L 2L 17 16 -0.164 15 0.872
## 3 PFZ hillinv_1 12L 2L 12 12 0.986 11 0.345
## 4 AAZ hillinv_1 12L 2L 24 24 0.912 23 0.371
## 5 SACCZ hillinv_1 12L 2L 6 6 -0.461 5 0.664
hillsinv %>% group_by(Zone) %>% wilcox_test(hillinv_1~Type, paired=TRUE)## # A tibble: 5 × 8
## Zone .y. group1 group2 n1 n2 statistic p
## * <fct> <chr> <chr> <chr> <int> <int> <dbl> <dbl>
## 1 STZ hillinv_1 12L 2L 6 6 10 1
## 2 SAZ hillinv_1 12L 2L 17 16 64 0.86
## 3 PFZ hillinv_1 12L 2L 12 12 52 0.339
## 4 AAZ hillinv_1 12L 2L 24 24 189 0.277
## 5 SACCZ hillinv_1 12L 2L 6 6 8 0.688
#no significant differences1.14.1.5 Simspons diversity (q=2)
#test for normality
#2L
tapply(hillinv2$hillinv_2, hillinv2$Zone, shapiro.test) #only normal in SAZ+SACCZ conduct wilcox## $AAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.89528, p-value = 0.01711
##
##
## $PFZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.75416, p-value = 0.002955
##
##
## $SACCZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.84435, p-value = 0.1416
##
##
## $SAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.90563, p-value = 0.09896
##
##
## $STZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.68397, p-value = 0.004172
#12L
tapply(hillinv12$hillinv_2, hillinv12$Zone, shapiro.test) #will conduct wilcox as above aren't normal## $AAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.88028, p-value = 0.008418
##
##
## $PFZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.91412, p-value = 0.2409
##
##
## $SACCZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.73879, p-value = 0.01544
##
##
## $SAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.87679, p-value = 0.02822
##
##
## $STZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.71705, p-value = 0.009302
hillsinv %>% group_by(Zone) %>% wilcox_test(hillinv_2~Type, paired=TRUE)## # A tibble: 5 × 8
## Zone .y. group1 group2 n1 n2 statistic p
## * <fct> <chr> <chr> <chr> <int> <int> <dbl> <dbl>
## 1 STZ hillinv_2 12L 2L 6 6 10 1
## 2 SAZ hillinv_2 12L 2L 17 16 61 0.744
## 3 PFZ hillinv_2 12L 2L 12 12 51 0.38
## 4 AAZ hillinv_2 12L 2L 24 24 156 0.877
## 5 SACCZ hillinv_2 12L 2L 6 6 8 0.688
#no significant differences- same in the STZ1.14.2 Indicator species analysis
1.14.2.1 12L
#12L PA
indicnoinv_12L<-t(spcs_noinv12[7:71])
indicnoinv_12L[indicnoinv_12L>0]<-1 #convert to PA
colnames(indicnoinv_12L)<-spcs_noinv12$Species
#indicnoinv_12L<-multipatt(indicnoinv_12L, samples$Zone[samples$Type=="12L"], func = "IndVal.g",duleg = FALSE, control = how(nperm=9999))
#summary(indicnoinv_12L, alpha=0.05, digits=3, too.many=100, indvalcomp=TRUE, sort=TRUE)1.14.2.2 2L
#PA
indicnoinv_2L<-t(spcs_noinv2[7:71])
indicnoinv_2L[indicnoinv_2L>0]<-1 #convert to PA
colnames(indicnoinv_2L)<-spcs_noinv2$Species
#indicnoinv_2L<-multipatt(indicnoinv_2L, samples$Zone[samples$Type=="2L"], func = "IndVal.g",duleg = FALSE, control = how(nperm=9999))
#summary(indicnoinv_2L, alpha=0.05, digits=3, too.many=100, indvalcomp=TRUE, sort=TRUE)2 eDNA combined
Combining 12L and 2L and removing hull bound species from analysis.
2.1 Formatting
#remove all besides eDNAdata, eDNAdata_wide, fill12, samples, CPR_eDNAvals, CPRspecies, species_long
rm("Amphinema","Arach", "Aster", "Austrominius", "Bougainvillia", "BugFlab","Clytia", "Coryne", "Cryp", "diversity", "eDNAshape", "exc_sha", "hill12", "hill2", "hillinv12", "hillinv2", "hills", "hillsinv", "hillsplot", "hillsummstat", "hillsinvplot", "hillsinvummstat", "indic_12L", "indic_2L", "indicnoinv_2L", "indicnoinv_12L","InvasivesCol", "InvasivesComparison", "InvasiveSummary", "InvasivesVolume", "InvasivesVolumeZone", "Lizzia", "long_raw", "LongInvasives", "matchedsamples", "Metacarcinus", "mysamples", "Mytilus", "Obelia", "Ostrea", "Patiriella", "plothill_0", "plothill_1", "plothill_2", "plothillinv_0","plothillinv_1", "plothillinv_2", "raweDNA", "rawfill", "rawsummary", "Richness", "spcs_noinv", "spcs_noinv12", "spcs_noinv2", "species_freqplot", "spcs_noinvmerged","species", "species12", "species2", "SpeciesFreq", "speciesmerged","standard_error", "ttestinv", "venn_plot", "Venn12", "Venn2", "vennfill", "Water")#combine 2 and 12L
eDNAmerged<-eDNAdata %>% select(-Sample, -Type, -Metazoan, -PercentID)
eDNAmerged<-eDNAmerged %>% group_by(Method, eDNA_Pair, Zone, CPR_Transect, Latitude, Longitude, CPRSubset, zOTU, Subkingdom, Phylum, Class, Order, Family, Genus, Species, Comment, Assignment) %>% summarize_if(is.numeric, sum, na.rm=TRUE)
eDNAmerged$eDNA_Pair<-paste("eDNA",eDNAmerged$eDNA_Pair, sep = "")
eDNAmerged<-eDNAmerged %>% group_by(Method, eDNA_Pair, Zone, CPR_Transect, Latitude, Longitude, CPRSubset, zOTU, Subkingdom,Phylum, Class,Order,Family,Genus, Species,Comment,Assignment) %>% summarize_if(is.numeric, sum, na.rm=TRUE)
#remove invasives/hull species
eDNAmerged<-eDNAmerged[!eDNAmerged$Species %in% Invasives$Species,]
#samples merged by pair
samples_paired<-droplevels(subset(samples, eDNA_Pair!="NA"))
samples_paired<-samples_paired %>% select(-Type,-Sample, -Lat)
samples_paired<-samples_paired %>% distinct()
samples_paired$eDNA_Pair<-paste("eDNA",samples_paired$eDNA_Pair, sep = "")2.2 Summary statistics
#summary stats
sum(eDNAmerged$Reads)## [1] 1057322
#1139972 prior, 1057322 remaining, 82650 reads assigned to hull species
#of the remaining reads, these were assigned to:
sum(eDNAmerged$Reads[eDNAmerged$Assignment=="Species"]) #1036536 98.03%## [1] 1036536
sum(eDNAmerged$Reads[eDNAmerged$Assignment=="Genus"]) #4635 0.004%## [1] 4635
sum(eDNAmerged$Reads[eDNAmerged$Assignment=="Family"]) #15584 0.015%## [1] 15584
sum(eDNAmerged$Reads[eDNAmerged$Assignment=="Order"]) #429 0.0004%## [1] 429
sum(eDNAmerged$Reads[eDNAmerged$Assignment=="Class"]) #16 1.513257e-05%## [1] 16
sum(eDNAmerged$Reads[eDNAmerged$Assignment=="Phyla"]) #122 0.0001153859%## [1] 122
#zotus assigned to species
length(unique(eDNAmerged$zOTU[eDNAmerged$Assignment=="Species"]))/length(unique(eDNAmerged$zOTU)) #76.6%## [1] 0.7558442
#species assignments
species<-droplevels(subset(eDNAmerged, Assignment=="Species"))
length(unique(species$Species)) #52 species detected## [1] 52
length(unique(species$Genus)) #from 38 genera## [1] 38
length(unique(species$Family)) #and 29 families## [1] 29
length(unique(species$Order)) #and 19 orders## [1] 19
2.3 Diversity across zones
#select data and convert abundance to PA
diversity<-species %>% select(-Comment, -zOTU) %>% group_by(Zone, Phylum, Class, Order, Family, Genus, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
diversity$FOO[diversity$Reads>0]<-1
#order zones
diversity$Zone<-factor(diversity$Zone, levels = c("STZ", "SAZ", "PFZ", "AAZ", "SACCZ"))
ggplot(data=diversity, aes(fill=Phylum, y=FOO, x=Zone)) +
geom_bar(stat="identity", width = 0.97) +
scale_fill_manual(values = fill12) +
theme(axis.line = element_line(colour = "black", size = 0.4, linetype = "solid"),
axis.ticks = element_line(colour = "black", size = 0.4, linetype = "solid"),
axis.text.x = element_text(size=11,vjust = 0.5, hjust=0.5, colour="black"),
axis.text.y = element_text(size=16, vjust = 0.5, hjust=1,colour="black"),
axis.title.y = element_text(face="bold", size=15, vjust=3),
strip.text.x = element_text(size = 12, face = "bold"),
axis.title.x = element_text(face="bold", size=13,vjust = -2),
legend.text = element_text(size = 10), legend.title = element_text(size=13, face="bold"),
legend.key.height = unit(0.2, 'cm'), legend.key.width = unit(0.3, 'cm'),
plot.margin = margin(20,20,30,20)) + scale_y_continuous(expand = c(0, 0))+
#scale_x_discrete(labels=stringr::str_wrap(c("12L"="LargeVF", "2L"="SmallVF"),width=5), expand = c(0, 0))+
guides(fill=guide_legend(byrow = TRUE, title="Phylum"))+
labs(x="Southern Ocean zones", y="Species diversity")#ggsave(filename = "CPR_eDNA_diversityzones.jpeg", plot = last_plot(), width = 24, height = 14, units = "cm", dpi = 600)
#as table
with(species_long, table(Phylum, Zone, Type))## , , Type = 12L
##
## Zone
## Phylum STZ SAZ PFZ AAZ SACCZ
## Annelida 0 0 0 0 8
## Arthropoda 483 1450 298 665 53
## Bryozoa 12 9 3 20 4
## Chaetognatha 0 3 1 3 0
## Chordata 2 10 3 6 0
## Cnidaria 39 27 11 50 6
## Echinodermata 7 4 0 4 3
## Mollusca 4 3 12 23 0
## Nemertea 0 0 0 0 0
## Porifera 0 0 0 0 0
##
## , , Type = 2L
##
## Zone
## Phylum STZ SAZ PFZ AAZ SACCZ
## Annelida 0 0 0 0 4
## Arthropoda 61 146 139 398 24
## Bryozoa 16 23 3 29 11
## Chaetognatha 0 1 2 3 0
## Chordata 1 1 0 2 0
## Cnidaria 35 29 20 98 17
## Echinodermata 6 0 0 1 1
## Mollusca 2 2 8 25 1
## Nemertea 0 0 0 0 1
## Porifera 2 0 0 1 0
2.4 Hills numbers
2.4.1 Plot
#merge species assignments by species (i.e unique)
spcs_merged<-species %>% ungroup() %>% select(-zOTU, -Method, -Assignment, -CPR_Transect, -Latitude, -Longitude, -CPRSubset) %>% group_by(eDNA_Pair, Phylum, Class, Order, Family, Genus, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
#wide format species
spcs_wide<-pivot_wider(spcs_merged, names_from = eDNA_Pair, values_from = Reads, values_fill = 0)
#hills data
hill0 <- hill_div(spcs_wide[7:71], qvalue = 0)
hill1 <- hill_div(spcs_wide[7:71], qvalue = 1)
hill2 <- hill_div(spcs_wide[7:71], qvalue = 2)
hill <- cbind.data.frame("eDNA_Pair"=names(hill0), hill0, hill1, hill2)
hill<-left_join(hill, samples_paired)
hill <- hill %>% mutate_at(c('hill0', 'hill1', 'hill2'), as.numeric)
#order zones
hill$Zone<-factor(hill$Zone, levels=c("STZ", "SAZ", "PFZ","AAZ", "SACCZ"))
#seperate plots for each hillsinv number
plot0<- violin.plot2(hill, ydata = hill$hill0, col = zonefill, fill = zonefill, ylab = "Hill number (q = 0)", ylow = 0, yhigh = 14)+ theme_light()+
theme(plot.margin = unit(c(1,1,1,1), "cm"), legend.position="none", axis.text.x = element_blank(),axis.text.y = element_text(size=16, colour="black"),axis.title.y = element_text(size=15, hjust=0.7,vjust=4, face="bold"))
plot1<- violin.plot2(hill, ydata = hill$hill1, col = zonefill, fill = zonefill, ylab = "Hill number (q = 1)", ylow = 0, yhigh = 5.5)+ theme_light()+
theme(plot.margin = unit(c(1,1,1,1), "cm"), legend.position="none", axis.text.x = element_blank(),axis.text.y = element_text(size=16, colour="black"),axis.title.y = element_text(size=15, hjust=0.7,vjust=4, face="bold"))
plot2<- violin.plot2(hill, ydata = hill$hill2, col = zonefill, fill = zonefill, ylab = "Hill number (q = 2)", ylow = 0, yhigh = 4.5)+ theme_light()+
theme(plot.margin = unit(c(1,1,1,1), "cm"), legend.position="none", axis.text.x = element_text(size=16, colour="black"),axis.text.y = element_text(size=16, colour="black"),axis.title.y = element_text(size=15, hjust=0.7,vjust=4, face="bold"))
#combine into one plot and...plot
hillplot<-plot_grid(plot0, plot1, plot2, ncol=1, labels = c("A", "B", "C"), label_size = 25)
plot(hillplot)#get legend
legend<-violin.plot2(hill, ydata = hill$hill2, col = zonefill, fill = zonefill, ylab = "hill number (q = 2)", ylow = 0, yhigh = 5)+ theme_light()+
theme(plot.margin = unit(c(1,1,1,1), "cm"), legend.text = element_text(size = 14), legend.title = element_text(face="bold", size=18), axis.text.x = element_text(size=16, colour="black"),axis.text.y = element_text(size=16, colour="black"),axis.title.y = element_text(size=15, hjust=0.7,vjust=4, face="bold"))
legend<-get_legend(legend)
#plot heatmap and legend together
plot_grid(hillplot, legend, rel_widths = c(5, 1))#tidy env
rm(plot0, plot1, plot2)2.4.2 Summary stats & anova
#summary stats (means se)
#select hills numbers
hillsummstat<-hill %>% select(Zone, hill0, hill1, hill2)
#summary stats function
hillsummstat<-summaryBy(hillsummstat[,2:4]~Zone ,data=hillsummstat,FUN=function(x)
+ {c(mean=mean(x),sum=sum(x),se=sd(x)/sqrt(length(x)))})
#pivot table longer
hillsummstat<-hillsummstat %>% pivot_longer(!c(Zone), names_to = c("hill",".value"), names_pattern = "(\\d*).([A-Za-z]+)", names_prefix = "hill")
hillsummstat## # A tibble: 15 × 5
## Zone hill mean sum se
## <fct> <chr> <dbl> <dbl> <dbl>
## 1 STZ 0 7.5 45 1.73
## 2 STZ 1 1.70 10.2 0.343
## 3 STZ 2 1.39 8.33 0.224
## 4 SAZ 0 6.59 112 0.446
## 5 SAZ 1 2.44 41.5 0.292
## 6 SAZ 2 2.00 34.1 0.217
## 7 PFZ 0 9.67 116 0.432
## 8 PFZ 1 2.83 34.0 0.212
## 9 PFZ 2 2.24 26.9 0.172
## 10 AAZ 0 10.5 251 0.356
## 11 AAZ 1 2.70 64.8 0.182
## 12 AAZ 2 1.98 47.4 0.146
## 13 SACCZ 0 5.33 32 0.843
## 14 SACCZ 1 1.61 9.66 0.335
## 15 SACCZ 2 1.43 8.59 0.303
#write.csv(hillsummstat, "hillsummstat_merged.csv")
#tests for normality
tapply(hill$hill0, hill$Zone, shapiro.test) #normal## $STZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.89303, p-value = 0.3344
##
##
## $SAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.97126, p-value = 0.8402
##
##
## $PFZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.95309, p-value = 0.6825
##
##
## $AAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.91605, p-value = 0.04779
##
##
## $SACCZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.91824, p-value = 0.4928
tapply(hill$hill1, hill$Zone, shapiro.test) #normal## $STZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.81515, p-value = 0.08009
##
##
## $SAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.89414, p-value = 0.05425
##
##
## $PFZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.9524, p-value = 0.6722
##
##
## $AAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.9705, p-value = 0.6794
##
##
## $SACCZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.77812, p-value = 0.03702
tapply(hill$hill2, hill$Zone, shapiro.test) #not normal## $STZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.75516, p-value = 0.02237
##
##
## $SAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.87902, p-value = 0.03066
##
##
## $PFZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.91424, p-value = 0.2417
##
##
## $AAZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.8801, p-value = 0.00835
##
##
## $SACCZ
##
## Shapiro-Wilk normality test
##
## data: X[[i]]
## W = 0.66766, p-value = 0.00277
hist(hill$hill2) #positive skew#variance
library(car)
leveneTest(hill0~Zone, data=hill) #not equal## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 3.9689 0.006359 **
## 60
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
leveneTest(hill1~Zone, data=hill) # equal## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 1.8347 0.1339
## 60
leveneTest(hill2~Zone, data=hill) # equal## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 1.5555 0.1979
## 60
#kruskal test
kruskal.test(hill0~Zone, data=hill)##
## Kruskal-Wallis rank sum test
##
## data: hill0 by Zone
## Kruskal-Wallis chi-squared = 30.693, df = 4, p-value = 0.000003536
kruskal.test(hill1~Zone, data=hill) ##
## Kruskal-Wallis rank sum test
##
## data: hill1 by Zone
## Kruskal-Wallis chi-squared = 11.458, df = 4, p-value = 0.02187
kruskal.test(hill2~Zone, data=hill)##
## Kruskal-Wallis rank sum test
##
## data: hill2 by Zone
## Kruskal-Wallis chi-squared = 10.862, df = 4, p-value = 0.02816
#anova for hill1
hill1lm<-lm(hill1~Zone, data=hill)
anova(hill1lm)## Analysis of Variance Table
##
## Response: hill1
## Df Sum Sq Mean Sq F value Pr(>F)
## Zone 4 10.812 2.70294 2.9818 0.02595 *
## Residuals 60 54.389 0.90648
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#similar p value use kruskal for all for consistencys2.5 Indicator species analysis
#12L PA
indic<-t(spcs_wide[7:71])
indic[indic>0]<-1 #convert to PA
colnames(indic)<-spcs_wide$Species
#indic<-multipatt(indic, samples_paired$Zone, func = "IndVal.g",duleg = FALSE, control = how(nperm=9999))
#summary(indic, alpha=0.05, digits=3, too.many=100, indvalcomp=TRUE, sort=TRUE)
#takes ages to run so pasted below instead
# Multilevel pattern analysis
# ---------------------------
#
# Association function: IndVal.g
# Significance level (alpha): 0.05
#
# Total number of species: 52
# Selected number of species: 16
# Number of species associated to 1 group: 6
# Number of species associated to 2 groups: 5
# Number of species associated to 3 groups: 5
# Number of species associated to 4 groups: 0
#
# List of species associated to each combination:
#
# Group PFZ #sps. 1
# A B stat p.value
# Vibilia antarctica 0.7273 0.3333 0.492 0.0474 *
#
# Group SACCZ #sps. 4
# A B stat p.value
# Stephos longipes 0.6667 0.5000 0.577 0.0085 **
# Polyeunoa laevis 1.0000 0.3333 0.577 0.0148 *
# Sterechinus neumayeri 1.0000 0.3333 0.577 0.0148 *
# Nematoscelis megalops 0.8889 0.3333 0.544 0.0190 *
#
# Group STZ #sps. 1
# A B stat p.value
# Evadne nordmanni 0.7391 0.3333 0.496 0.0404 *
#
# Group AAZ+PFZ #sps. 3
# A B stat p.value
# Calanoides acutus 0.8815 0.8611 0.871 1e-04 ***
# Rhincalanus gigas 0.9233 0.6667 0.785 2e-04 ***
# Calanus propinquus 0.8281 0.7222 0.773 7e-04 ***
#
# Group AAZ+SACCZ #sps. 1
# A B stat p.value
# Thysanoessa macrura 0.9200 0.5667 0.722 0.0011 **
#
# Group SAZ+STZ #sps. 1
# A B stat p.value
# Euphausia vallentini 0.8223 0.3478 0.535 0.045 *
#
# Group AAZ+PFZ+SAZ #sps. 2
# A B stat p.value
# Calanus propinquus/similimus 1.0000 0.8113 0.901 1e-04 ***
# Metridia lucens 0.9321 0.7358 0.828 8e-04 ***
#
# Group SACCZ+SAZ+STZ #sps. 3
# A B stat p.value
# Neocalanus tonsus 0.8265 0.8276 0.827 1e-04 ***
# Clausocalanus brevipes 0.8674 0.4828 0.647 1e-02 **
# Clausocalanus pergens 1.0000 0.2414 0.491 5e-02 *
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 2.6 PERMANOVA
#make a lat5
Lat5<-read.csv("sampslat5.csv")
Lat5<-Lat5 %>% select(-Zone)
Lat5<-left_join(Lat5, samples, by="Sample")
Lat5$eDNA_Pair<-paste("eDNA",Lat5$eDNA_Pair, sep = "")
Lat5<-Lat5 %>% select(eDNA_Pair, Lat5) %>% distinct()
samples_paired<-left_join(samples_paired, Lat5, by="eDNA_Pair")
#convert to PA
PA_matrix<-spcs_wide %>% ungroup() %>% select(-Phylum, -Class, -Order, -Family, -Genus)
PA_matrix<-pivot_longer(PA_matrix, cols = 2:66, names_to = "eDNA_Pair", values_to = "PA")
PA_matrix<-pivot_wider(PA_matrix, names_from = Species, values_from = PA, values_fill = 0)
PA_matrix<-as.data.frame(PA_matrix)
rownames(PA_matrix)<-PA_matrix$eDNA_Pair
PA_matrix<-PA_matrix %>% select(-eDNA_Pair)
adonis2(PA_matrix ~ Zone, data=samples_paired, method="jaccard", permutations=9999)## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = PA_matrix ~ Zone, data = samples_paired, permutations = 9999, method = "jaccard")
## Df SumOfSqs R2 F Pr(>F)
## Zone 4 3.9924 0.16406 2.9439 0.0001 ***
## Residual 60 20.3423 0.83594
## Total 64 24.3347 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
2.7 nMDS
#get jaccard matrix for nMDS plot
jaccard<-vegdist(PA_matrix, method = "jaccard")
jaccard<-as.matrix(jaccard, labels=TRUE)
#run nMDS
nMDS <- metaMDS(jaccard, k=3, distance = "jaccard", autotransform = FALSE, trymax = 10000)## Run 0 stress 0.1079726
## Run 1 stress 0.1007289
## ... New best solution
## ... Procrustes: rmse 0.08151526 max resid 0.3444544
## Run 2 stress 0.1056801
## Run 3 stress 0.1061836
## Run 4 stress 0.1050821
## Run 5 stress 0.1025566
## Run 6 stress 0.103335
## Run 7 stress 0.1027071
## Run 8 stress 0.1046893
## Run 9 stress 0.1042888
## Run 10 stress 0.1066247
## Run 11 stress 0.1081116
## Run 12 stress 0.1027057
## Run 13 stress 0.1123301
## Run 14 stress 0.1094867
## Run 15 stress 0.1097408
## Run 16 stress 0.1032063
## Run 17 stress 0.1007189
## ... New best solution
## ... Procrustes: rmse 0.001499893 max resid 0.007132327
## ... Similar to previous best
## Run 18 stress 0.1007191
## ... Procrustes: rmse 0.00003739295 max resid 0.0001685826
## ... Similar to previous best
## Run 19 stress 0.1081479
## Run 20 stress 0.1079959
## *** Best solution repeated 2 times
#stress=0.1007196
nMDS <- metaMDS(jaccard, previous = nMDS)## Starting from 3-dimensional configuration
## Stress differs from 'previous.best': reset tries
## Run 0 stress 0.1980241
## Run 1 stress 0.1513518
## ... New best solution
## ... Procrustes: rmse 0.08573331 max resid 0.3310584
## Run 2 stress 0.1583389
## Run 3 stress 0.1474427
## ... New best solution
## ... Procrustes: rmse 0.06298191 max resid 0.4237687
## Run 4 stress 0.1450595
## ... New best solution
## ... Procrustes: rmse 0.06172505 max resid 0.2872753
## Run 5 stress 0.1527898
## Run 6 stress 0.1425149
## ... New best solution
## ... Procrustes: rmse 0.03694813 max resid 0.2233
## Run 7 stress 0.1726683
## Run 8 stress 0.1603489
## Run 9 stress 0.1485234
## Run 10 stress 0.1628316
## Run 11 stress 0.1425145
## ... New best solution
## ... Procrustes: rmse 0.0001571397 max resid 0.001135846
## ... Similar to previous best
## Run 12 stress 0.1672463
## Run 13 stress 0.1639642
## Run 14 stress 0.1436714
## Run 15 stress 0.1568015
## Run 16 stress 0.1645933
## Run 17 stress 0.1589634
## Run 18 stress 0.1580686
## Run 19 stress 0.1585105
## Run 20 stress 0.1502189
## *** Best solution repeated 1 times
# Next, extract the NMDS scores
scrs <- scores(nMDS, display = 'sites')
scrs<-as.data.frame(scrs)
scrs$eDNA_Pair<-rownames(scrs)
scrs <- merge(scrs, samples_paired, all=TRUE)
# Now we compute the group centroids/mean of each zone:
cent <- aggregate(cbind(NMDS1, NMDS2) ~ Zone, data = scrs, FUN = mean)
# To draw the spider, we need to us geom_segment() which requires coordiates to draw the segment from and to our centroids. So we need to replicate the group centroid for each observation in the group. This we facilitate by a left join via merge:
segs <- merge(scrs, setNames(cent, c('Zone','oNMDS1','oNMDS2')),
by = 'Zone', sort = FALSE)
#rename the columns in cent eg oNMDS1 so these don't get confused for columns of the same names in scrs — we want these centroid variables to have different names
#plot
ggplot(scrs, aes(x = NMDS1, y = NMDS2, colour = Zone)) +
geom_segment(data = segs, mapping = aes(xend = oNMDS1, yend = oNMDS2)) + # spiders
geom_point(data = cent, size = 5, aes(fill=Zone), colour="black", stroke=0.2, shape=21) + # centroids
geom_point(data=scrs, size=5, colour="black", aes(fill=Zone), stroke=0.2, shape=21) +
#geom_point(data=scrs, size=4, aes(fill=Zone,colour=Zone)) + # sample scores
scale_fill_manual(values=zonefill)+
scale_colour_manual(values=zonefill)+
#scale_shape_manual(values=c(21))+
theme(axis.text.x = element_text(size=18,colour="black"),
axis.text.y = element_text(size=18,colour="black"),
axis.title.y = element_text(size=18, vjust=3, face="bold"),
axis.title.x = element_text(size=18,vjust = -2, face="bold"),
panel.grid = element_blank(),
axis.line = element_line(colour = "black", linewidth = 0.4, linetype = "solid"),
panel.background = element_rect(fill='transparent'), #transparent panel bg
plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
panel.grid.major = element_blank(), #remove major gridlines
legend.background = element_rect(fill='transparent'), #transparent legend bg
legend.box.background = element_rect(fill='transparent'),
plot.margin = margin(20,20,30,20),
panel.grid.minor = element_blank(), #remove minor gridlines
panel.border = element_blank()) +
geom_text(label=scrs$eDNA_Pair,
check_overlap = T)ggsave(filename = "ggborders1.png", plot = last_plot(), bg='transparent', width = 34, height = 18, units = "cm", dpi = 600)
# #obtain surf data
# species.scores1 <- as.data.frame(scores(nMDS, "species"))
# species.scores1$species <- rownames(species.scores1)
# names(species.scores1)[c(1, 2)] <- c("x", "y")
# species.scores1$z <- NA
#
# head(species.scores1)
newsf <- ordisurf(nMDS ~ as.numeric(samples_paired$Latitude), plot = FALSE, scaling = 3)
head(newsf)## $coefficients
## (Intercept) s(x1,x2).1 s(x1,x2).2 s(x1,x2).3 s(x1,x2).4 s(x1,x2).5
## -54.2160062 3.3105151 3.4175740 -2.5537027 0.7469594 1.6357877
## s(x1,x2).6 s(x1,x2).7 s(x1,x2).8 s(x1,x2).9
## -0.7631287 -4.4193811 -1.4355797 3.0602435
##
## $residuals
## 1 2 3 4 5 6
## 2.73373948 12.31132221 3.80505891 5.26650986 7.32699990 1.62833176
## 7 8 9 10 11 12
## -0.33713747 3.27715463 0.07237837 3.16888039 1.03285881 0.78393704
## 13 14 15 16 17 18
## 0.74185857 1.93967144 -1.63873930 5.98502696 3.35308168 2.09012973
## 19 20 21 22 23 24
## 1.54824317 4.25622029 0.31466661 6.69537612 5.44395990 2.23598993
## 25 26 27 28 29 30
## 1.71665024 2.41908146 3.64889197 2.14559315 0.44016165 2.07047101
## 31 32 33 34 35 36
## 1.03842776 1.81790937 4.38390540 1.05545414 0.77976891 -0.12823240
## 37 38 39 40 41 42
## -0.44859529 -0.93452281 -1.23944470 -2.25983636 -2.27657674 -0.71856240
## 43 44 45 46 47 48
## -2.06161614 1.32399152 -4.87858496 -3.81828066 -2.84863759 -0.28889367
## 49 50 51 52 53 54
## 1.89819925 1.05809576 0.74421234 -9.67205533 -4.87599303 -2.31656097
## 55 56 57 58 59 60
## -4.96118410 -6.21158311 -4.62550067 -5.03573498 -5.47235844 -0.76086232
## 61 62 63 64 65
## -2.40627262 -0.45946916 -8.94992402 -11.88724102 -11.03980943
##
## $fitted.values
## 1 2 3 4 5 6 7 8
## -45.61564 -55.19342 -47.93546 -49.61751 -51.85810 -46.79883 -45.78326 -49.55545
## 9 10 11 12 13 14 15 16
## -46.46158 -49.73908 -48.29426 -48.23444 -48.54226 -50.15997 -46.74246 -55.12273
## 17 18 19 20 21 22 23 24
## -52.64078 -51.58063 -51.65524 -54.40722 -50.75437 -57.61318 -56.73296 -53.64649
## 25 26 27 28 29 30 31 32
## -53.30695 -54.44398 -56.19749 -55.62189 -53.67056 -55.35747 -54.53533 -55.93841
## 33 34 35 36 37 38 39 40
## -58.64361 -55.48565 -55.33987 -55.03277 -54.82140 -54.56408 -54.68236 -53.94776
## 41 42 43 44 45 46 47 48
## -54.14022 -55.85194 -54.90478 -58.58449 -52.45462 -54.58252 -55.30146 -58.02751
## 49 50 51 52 53 54 55 56
## -60.30900 -60.05820 -59.88511 -49.75834 -54.87191 -58.07374 -55.91902 -55.19892
## 57 58 59 60 61 62 63 64
## -57.76510 -57.53467 -58.10924 -63.51534 -61.90643 -63.96133 -57.21098 -54.44316
## 65
## -55.39749
##
## $family
##
## Family: gaussian
## Link function: identity
##
##
## $linear.predictors
## 1 2 3 4 5 6 7 8
## -45.61564 -55.19342 -47.93546 -49.61751 -51.85810 -46.79883 -45.78326 -49.55545
## 9 10 11 12 13 14 15 16
## -46.46158 -49.73908 -48.29426 -48.23444 -48.54226 -50.15997 -46.74246 -55.12273
## 17 18 19 20 21 22 23 24
## -52.64078 -51.58063 -51.65524 -54.40722 -50.75437 -57.61318 -56.73296 -53.64649
## 25 26 27 28 29 30 31 32
## -53.30695 -54.44398 -56.19749 -55.62189 -53.67056 -55.35747 -54.53533 -55.93841
## 33 34 35 36 37 38 39 40
## -58.64361 -55.48565 -55.33987 -55.03277 -54.82140 -54.56408 -54.68236 -53.94776
## 41 42 43 44 45 46 47 48
## -54.14022 -55.85194 -54.90478 -58.58449 -52.45462 -54.58252 -55.30146 -58.02751
## 49 50 51 52 53 54 55 56
## -60.30900 -60.05820 -59.88511 -49.75834 -54.87191 -58.07374 -55.91902 -55.19892
## 57 58 59 60 61 62 63 64
## -57.76510 -57.53467 -58.10924 -63.51534 -61.90643 -63.96133 -57.21098 -54.44316
## 65
## -55.39749
##
## $deviance
## [1] 1178.947
extract.xyz <- function(obj) {
xy <- expand.grid(x = obj$grid$x, y = obj$grid$y)
xyz <- cbind(xy, c(obj$grid$z))
names(xyz) <- c("x", "y", "z")
return(xyz)
}
contour.vals1 <- extract.xyz(obj = newsf)
head(contour.vals1)## x y z
## 1 -0.5785276 -0.4212773 NA
## 2 -0.5519046 -0.4212773 NA
## 3 -0.5252815 -0.4212773 NA
## 4 -0.4986585 -0.4212773 NA
## 5 -0.4720354 -0.4212773 NA
## 6 -0.4454123 -0.4212773 NA
p1 <- ggplot(data = contour.vals1, aes(x, y, z = z)) +
stat_contour(aes(colour = ..level..)) +
coord_cartesian(xlim = c(-0.6, 0.2), ylim = c(-0.4, 0.2)) +
#scale_shape_manual(values=c(21))+
theme(axis.text.x = element_text(size=14,colour="black"),
axis.text.y = element_text(size=14,colour="black"),
axis.title = element_text(size=18,colour="black"),
panel.grid = element_blank(),
axis.line = element_line(colour = "black", linewidth = 0.4, linetype = "solid"),
panel.background = element_rect(fill='transparent'), #transparent panel bg
plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
panel.grid.major = element_blank(), #remove major gridlines
legend.background = element_rect(fill='transparent'), #transparent legend bg
legend.box.background = element_rect(fill='transparent'),
panel.grid.minor = element_blank(), #remove minor gridlines
panel.border = element_blank())
p1ggsave(filename = "ggsurf.png", plot = last_plot(), bg='transparent', width = 30, height = 18, units = "cm", dpi = 600)
#overlay in photoshop
# spcs_wide<-pivot_wider(spcs_merged, names_from = eDNA_Pair, values_from = Reads, values_fill = 0)
# adonis(Gcomm2 ~ Lat5, data = Gdat2, permutations = 10000)
#make jaccard matrix3 CPR
3.1 Overview of CPR detections
#raw CPR and metazoan only
CPR<-as.data.frame(read_excel("eDNA_CPR.xlsx",sheet="CPR"))
#total counts
sum(CPR$Sum)## [1] 55465
#Metazoan/non-metazoan counts/abundance
sum(CPR$Sum[CPR$Subkingdom!="Metazoan"])## [1] 7183
sum(CPR$Sum[CPR$Subkingdom!="Metazoan"])/sum(CPR$Sum)## [1] 0.1295051
sum(CPR$Sum[CPR$Subkingdom=="Metazoan"])/sum(CPR$Sum)## [1] 0.8704949
#drop to metazoan taxa
CPR<-droplevels(subset(CPR, Subkingdom=="Metazoan"))
#Metazoan assignments
table(CPR$Assignment)##
## Class Family Genus Order Other Phylum Species
## 4 4 15 5 1 2 44
#Phyla and classes that were dominant (abundance)
CPR %>% select(Phylum, Class, Sum) %>% group_by(Phylum, Class) %>% summarize_if(is.numeric, sum, na.rm=TRUE) #broad overview## # A tibble: 13 × 3
## # Groups: Phylum [6]
## Phylum Class Sum
## <chr> <chr> <dbl>
## 1 Annelida Polychaeta 113
## 2 Arthropoda Copepoda 46478
## 3 Arthropoda Decapoda 19
## 4 Arthropoda Malacostraca 915
## 5 Arthropoda Ostracoda 60
## 6 Chaetognatha Sagittoidea 36
## 7 Chaetognatha dropped 184
## 8 Chordata Thaliacea 159
## 9 Chordata dropped 2
## 10 Cnidaria Hydrozoa 55
## 11 Cnidaria dropped 7
## 12 Mollusca Cephalopoda 1
## 13 Mollusca Gastropoda 253
#CPR dominanted by Arthropod-specifically copepod in terms of abundance
#how many individuals were assigned to species level?
sum(CPR$Sum[CPR$Assignment=="Species"])## [1] 28441
sum(CPR$Sum[CPR$Assignment=="Species"])/sum(CPR$Sum)## [1] 0.5890601
#Drop to species level
CPRspecies<-droplevels(subset(CPR, Assignment=="Species"))
#what species were dominant?
CPRspecies %>% select(Phylum, Class, Family, Species, Sum) %>% group_by(Phylum, Class, Family, Species) %>% summarize_if(is.numeric, sum, na.rm=TRUE) %>% print(n=32) #what species were dominant## # A tibble: 32 × 5
## # Groups: Phylum, Class, Family [20]
## Phylum Class Family Species Sum
## <chr> <chr> <chr> <chr> <dbl>
## 1 Annelida Polychaeta Lopadorrhynchidae Pelagobia longicirrata 53
## 2 Annelida Polychaeta Tomopteridae Tomopteris carpenteri 1
## 3 Arthropoda Copepoda Augaptilidae Haloptilus oxycephalus 41
## 4 Arthropoda Copepoda Calanidae Calanoides acutus 1049
## 5 Arthropoda Copepoda Calanidae Calanus simillimus 608
## 6 Arthropoda Copepoda Calanidae Neocalanus tonsus 2200
## 7 Arthropoda Copepoda Clausocalanidae Clausocalanus brevipes 3062
## 8 Arthropoda Copepoda Clausocalanidae Clausocalanus laticeps 399
## 9 Arthropoda Copepoda Euchaetidae Paraeuchaeta exigua 1
## 10 Arthropoda Copepoda Metridinidae Metridia lucens 754
## 11 Arthropoda Copepoda Metridinidae Pleuromamma borealis 24
## 12 Arthropoda Copepoda Metridinidae Pleuromamma piseki 6
## 13 Arthropoda Copepoda Metridinidae Pleuromamma robusta 3
## 14 Arthropoda Copepoda Oithonidae Oithona similis 18788
## 15 Arthropoda Copepoda Paracalanidae Mecynocera clausi 20
## 16 Arthropoda Copepoda Rhincalanidae Rhincalanus gigas 596
## 17 Arthropoda Copepoda Subeucalanidae Subeucalanus longiceps 3
## 18 Arthropoda Malacostraca Euphausiidae Euphausia frigida 4
## 19 Arthropoda Malacostraca Euphausiidae Euphausia spinifera 4
## 20 Arthropoda Malacostraca Euphausiidae Euphausia triacantha 8
## 21 Arthropoda Malacostraca Euphausiidae Euphausia vallentini 26
## 22 Arthropoda Malacostraca Euphausiidae Nematoscelis megalops 11
## 23 Arthropoda Malacostraca Euphausiidae Thysanoessa gregaria 70
## 24 Arthropoda Malacostraca Euphausiidae Thysanoessa macrura 512
## 25 Arthropoda Malacostraca Hyperiidae Themisto gaudichaudii 8
## 26 Arthropoda Malacostraca Phrosinidae Primno macropa 8
## 27 Chaetognatha Sagittoidea Eukrohniidae Eukrohnia hamata 32
## 28 Chaetognatha Sagittoidea Sagittidae Pseudosagitta gazellae 1
## 29 Chordata Thaliacea Salpidae Salpa thompsoni 145
## 30 Cnidaria Hydrozoa Solmundaeginidae Solmundella bitentaculata 2
## 31 Mollusca Gastropoda Clionidae Clione limacina antarctica 1
## 32 Mollusca Gastropoda Pneumodermatidae Spongiobranchaea australis 1
#arthopod again at species level, highest abundance and number of unique species
#what % of abundance was Arthropod
sum(CPRspecies$Sum[CPRspecies$Phylum=="Arthropoda"])/sum(CPRspecies$Sum)## [1] 0.9917021
#number of individuals?
sum(CPRspecies$Sum[CPRspecies$Phylum=="Arthropoda"])## [1] 28205
#oithona similis is dominant>by how much?
sum(CPRspecies$Sum[CPRspecies$Species=="Oithona similis"])/sum(CPRspecies$Sum)## [1] 0.6605956
4 Tidy workspace
ls()## [1] "cent" "contour.vals1" "CPR" "CPR_eDNAvals"
## [5] "CPR_eDNAvals2" "CPRspecies" "diversity" "eDNAdata"
## [9] "eDNAdata_wide" "eDNAmerged" "eDNAsamples" "eulerfill"
## [13] "extract.xyz" "fill12" "fill13" "hill"
## [17] "hill0" "hill1" "hill1lm" "hill2"
## [21] "hillplot" "hillsummstat" "indic" "Invasives"
## [25] "jaccard" "Lat5" "legend" "MetazoanFill"
## [29] "methodfill" "newsf" "nMDS" "p1"
## [33] "PA_matrix" "rawfill2" "samples" "samples_paired"
## [37] "scrs" "segs" "SeqDep1" "SeqDepth"
## [41] "spcs_merged" "spcs_wide" "species" "species_long"
## [45] "violin.plot" "violin.plot2" "wilcoxdat" "zonefill"
#remove all besides eDNAdata, eDNAdata_wide, fill12, samples, CPR_eDNAvals, CPRspecies, species_long
rm("Amphinema","Arach", "Aster", "Austrominius", "Bougainvillia", "BugFlab","Clytia", "Coryne", "Cryp", "eDNAsamples", "eDNAdata", "eDNAshape", "exc_sha", "hill12", "hill2", "hills", "hillsplot", "hillsummstat", "Invasives", "InvasivesCol", "InvasivesComparison", "InvasiveSummary", "InvasivesVolume", "InvasivesVolumeZone", "Lizzia", "long_raw", "LongInvasives", "matchedsamples", "Metacarcinus", "mysamples", "Mytilus", "Obelia", "Ostrea", "Patiriella", "plothill_0", "plothill_1", "plothill_2", "raweDNA", "rawfill", "rawsummary", "Richness", "species", "species12", "species2", "SpeciesFreq", "speciesmerged", "standard_error", "ttestinv", "venn_plot", "Venn12", "Venn2", "vennfill", "violin.plot", "Water")#CPR vs eDNA merged
5 CPR vs eDNA
5.1 Pre-match: Species beyond CPR limits (sea-ice)
#species detected past the sea-ice where CPR cannot be cast
seaiceeDNA<-droplevels(subset(species_long, Zone=="AAZ" | Zone=="SACCZ"))
seaiceeDNA<-droplevels(subset(seaiceeDNA, CPRSubset!="match"))
seaiceeDNA<-seaiceeDNA[!seaiceeDNA$Species %in% CPRspecies$Species,]
length(unique(seaiceeDNA$Species))## [1] 29
unique(seaiceeDNA$Species)## [1] "Ctenocalanus citer" "Calanus propinquus"
## [3] "Obelia dichotoma" "Calanus propinquus/similimus"
## [5] "Ostrea angasi" "Coryne eximia"
## [7] "Bugulina flabellata" "Stephos longipes"
## [9] "Asterias amurensis" "Limacina retroversa"
## [11] "Bougainvillia muscus" "Cryptosula pallasiana"
## [13] "Clione antarctica" "Krefftichthys anderssoni"
## [15] "Euphausia superba" "Vibilia antarctica"
## [17] "Watersipora subtorquata/subatra" "Arachnopusia unicornis"
## [19] "Clytia gracilis" "Halichondria panicea"
## [21] "Austrominius modestus" "Lizzia blondina"
## [23] "Metridia gerlachei" "Amphinema dinema"
## [25] "Polyeunoa laevis" "Sterechinus neumayeri"
## [27] "Harmothoe fuligineum" "Parborlasia corrugatus"
## [29] "Paralabidocera grandispina"
5.2 Format data
#sample data and paired samples
matchedsamples<-droplevels(subset(samples, CPRSubset=="match"))
eDNAsamples<-droplevels(subset(matchedsamples, Method=="eDNA"))
#subset eDNA data and remove empty rows
eDNAdata<-eDNAdata_wide %>% select(-Comment, -PercentID) %>% group_by(zOTU, Subkingdom, Phylum, Class, Order, Family, Genus, Species, Assignment) %>% summarize_if(is.numeric, sum, na.rm=TRUE)
eDNAdata<-cbind.data.frame(eDNAdata[,1:9], eDNAdata[as.character(eDNAsamples$Sample)])
eDNAdata<-eDNAdata[rowSums(eDNAdata[,10:91])!=0,]
#12L and 2L
data12<-cbind.data.frame(eDNAdata[,1:9], eDNAdata[as.character(eDNAsamples$Sample[eDNAsamples$Type=="12L"])])
data12<-data12[rowSums(data12[,10:50])!=0,]
data2<-cbind.data.frame(eDNAdata[,1:9], eDNAdata[as.character(eDNAsamples$Sample[eDNAsamples$Type=="2L"])])
data2<-data2[rowSums(data2[,10:50])!=0,]
#add in sample data
eDNAlong<-eDNAdata %>% select(-zOTU) %>% group_by(Subkingdom, Phylum, Class, Order, Family, Genus, Species, Assignment) %>% summarize_if(is.numeric, sum, na.rm=TRUE)
eDNAlong<-pivot_longer(eDNAlong, cols = starts_with("V"), names_to = "Sample", values_to = "Abundance")
eDNAlong<-eDNAlong[eDNAlong$Abundance!=0,]
eDNAlong<-merge(eDNAlong, eDNAsamples, all=TRUE)
#subset CPR data and remove empty rows
CPR<-as.data.frame(cbind(CPR[,c(1:8)], CPR[as.character(matchedsamples$Sample[matchedsamples$Method=="CPR"])]))
CPR<-CPR[rowSums(CPR[,9:49])!=0,]
#rename order "Amphipoda (Hyperiidea)"
CPR$Order<-recode_factor(CPR$Order, "Amphipoda (Hyperiidea)"="Amphipoda")
CPR<-CPR %>% group_by(Subkingdom, Phylum, Class, Order, Family, Genus, Species, Assignment) %>% summarize_if(is.numeric, sum, na.rm=TRUE)
#long format CPR and combine with sample data
CPRlong<-pivot_longer(CPR, cols = 9:49, names_to = "Sample", values_to = "Abundance")
CPRlong<-CPRlong[CPRlong$Abundance!=0,]
CPRlong<-merge(matchedsamples, CPRlong)
#merge all
LongData<-merge(eDNAlong, CPRlong, all=TRUE)5.3 Diversity
5.3.1 Overview
#Zotu's 12L and 2L
length(unique(data12$zOTU))## [1] 469
length(unique(data2$zOTU))## [1] 151
#reads comparison
sum(eDNAlong$Abundance[eDNAlong$Type=="12L"])## [1] 682136
sum(eDNAlong$Abundance[eDNAlong$Type=="12L"])/sum(eDNAlong$Abundance)## [1] 0.9156434
sum(eDNAlong$Abundance[eDNAlong$Type=="2L"])## [1] 62844
sum(eDNAlong$Abundance[eDNAlong$Type=="2L"])/sum(eDNAlong$Abundance)## [1] 0.08435663
rm(eDNAdata_wide)
#abundance of CPR
sum(CPRlong$Abundance)## [1] 5190
#Assignments: number of zotus assigned to taxonomic levels
table(data12$Assignment)##
## Family Genus Order Phyla Species
## 46 40 6 2 375
(375/469)*100 #79.96% of zotus to species level## [1] 79.95736
table(data2$Assignment)##
## Class Family Genus Order Phyla Species
## 1 9 16 2 4 119
(119/151)*100 #78.81% of zotus to species level## [1] 78.80795
#number of individuals assigned to species level for CPR
sum(CPRlong$Abundance[CPRlong$Assignment=="Species"])/sum(CPRlong$Abundance)## [1] 0.5884393
#Unique assignments
#remove zotu from eDNA first
data12<-data12 %>% select(-zOTU) %>% group_by(Subkingdom, Phylum, Class, Order, Family, Genus, Species, Assignment) %>% summarize_if(is.numeric, sum, na.rm=TRUE)
data2<-data2 %>% select(-zOTU) %>% group_by(Subkingdom, Phylum, Class, Order, Family, Genus, Species, Assignment) %>% summarize_if(is.numeric, sum, na.rm=TRUE)
table(data12$Assignment)##
## Family Genus Order Phyla Species
## 7 8 6 2 35
table(data2$Assignment)##
## Class Family Genus Order Phyla Species
## 1 5 7 2 4 29
table(CPR$Assignment)##
## Class Family Genus Order Phylum Species
## 3 2 11 4 1 18
#Phyla and classes that were dominant (abundance)
CPRlong %>% select(Phylum, Class, Abundance) %>% group_by(Phylum, Class) %>% summarize_if(is.numeric, sum, na.rm=TRUE) #broad overview## # A tibble: 10 × 3
## # Groups: Phylum [6]
## Phylum Class Abundance
## <chr> <chr> <dbl>
## 1 Annelida Polychaeta 12
## 2 Arthropoda Copepoda 4994
## 3 Arthropoda Decapoda 2
## 4 Arthropoda Malacostraca 93
## 5 Arthropoda Ostracoda 10
## 6 Chaetognatha Sagittoidea 9
## 7 Chaetognatha dropped 21
## 8 Chordata Thaliacea 9
## 9 Cnidaria Hydrozoa 13
## 10 Mollusca Gastropoda 27
#CPR again dominanted by Arthropod-specifically copepod in terms of abundance
#reads for eDNA
eDNAlong %>% select(Type, Phylum, Class, Abundance) %>% group_by(Type, Phylum, Class) %>% summarize_if(is.numeric, sum, na.rm=TRUE) %>% print(n=40)## # A tibble: 25 × 4
## # Groups: Type, Phylum [19]
## Type Phylum Class Abundance
## <chr> <chr> <chr> <dbl>
## 1 12L Arthropoda Branchiopoda 3
## 2 12L Arthropoda Copepoda 648225
## 3 12L Arthropoda Malacostraca 1246
## 4 12L Bryozoa Gymnolaemata 173
## 5 12L Chaetognatha Sagittoidea 272
## 6 12L Chordata Actinopteri 28438
## 7 12L Cnidaria Anthozoa 25
## 8 12L Cnidaria Hydrozoa 209
## 9 12L Ctenophora dropped 8
## 10 12L Echinodermata Asteroidea 3
## 11 12L Mollusca Gastropoda 3526
## 12 12L Porifera Demospongiae 1
## 13 12L Rotifera Eurotatoria 7
## 14 2L Annelida Polychaeta 23
## 15 2L Arthropoda Copepoda 57347
## 16 2L Arthropoda Malacostraca 438
## 17 2L Bryozoa Gymnolaemata 439
## 18 2L Chaetognatha Sagittoidea 366
## 19 2L Chordata Actinopteri 2560
## 20 2L Cnidaria Hydrozoa 642
## 21 2L Ctenophora dropped 76
## 22 2L Echinodermata Asteroidea 1
## 23 2L Mollusca Bivalvia 4
## 24 2L Mollusca Gastropoda 947
## 25 2L Mollusca dropped 1
#also dominated by copepod5.3.2 Species comparison
5.3.2.1 Data formatting
length(unique(data12$Species[data12$Assignment=="Species"]))## [1] 35
length(unique(data2$Species[data2$Assignment=="Species"]))## [1] 29
length(unique(CPR$Species[CPR$Assignment=="Species"]))## [1] 18
5.3.4 Heatmap
#format data
heatmap<-merge(CPRlong, eDNAlong, all=TRUE)
heatmap<-droplevels(subset(heatmap, Assignment=="Species")) #spcs only
heatmap<-heatmap %>% select(Sample, Latitude, Type, Phylum, Class, Order, Family, Genus, Species, Abundance) %>% group_by(Sample, Latitude, Type, Phylum, Class, Order, Family, Genus, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
heatmap$FOO[heatmap$Abundance>0]<-1 #FOO column
heatmap$shortspcs<-spCodes(heatmap$Species, nchar.gen = 1, nchar.sp = 20,nchar.ssp = 20, sep.spcode = " ") #short names for plotting## OK - no duplicated spcodes found.
heatmap$Lat<-format(round(as.numeric(heatmap$Latitude), 3), nsmall=3) #shorten latitude for plotting
#plot
ggplot(data=heatmap, mapping = aes(x=Lat, y=shortspcs, fill=FOO))+
geom_tile(colour="grey")+xlab(label="Sample")+
theme_bw()+
scale_fill_gradient(low="grey", high="blue")+
facet_grid(~Type, scales = "free_x", space = "free_x")+
theme(legend.position="none",strip.text.x = element_text(size=16, face="bold"),
axis.title.x = element_text(vjust = -3, size=12, face="bold"),
panel.background = element_rect(fill="grey"),
panel.grid.major = element_blank(), axis.ticks.x = element_line(linewidth=0.4),
axis.title.y = element_text(size=12, face="bold", colour = "black"),
axis.text.x = element_text(angle = 90, size=6, colour="black"),
axis.ticks.y = element_line(linewidth=0.4), plot.margin = margin(10,10,20,20),
axis.text.y = element_text(size=9, face="italic", colour="black"))+
labs(x="Paired eDNA and CPR samples (Latitude)", y="Species detected")#as table
with(heatmap, table(Species, Type, FOO))## , , FOO = 1
##
## Type
## Species 12L 2L CPR
## Arachnopusia unicornis 2 2 0
## Asterias amurensis 2 1 0
## Bougainvillia muscus 4 19 0
## Bugulina flabellata 10 20 0
## Calanoides acutus 26 17 21
## Calanus propinquus 22 15 0
## Calanus propinquus/similimus 32 31 0
## Calanus simillimus 0 0 20
## Clausocalanus brevipes 13 4 37
## Clausocalanus laticeps 31 19 23
## Clausocalanus pergens 4 2 0
## Corymorpha bigelowi 0 1 0
## Coryne eximia 27 31 0
## Ctenocalanus citer 40 39 0
## Electrona antarctica 2 0 0
## Eukrohnia hamata 5 6 6
## Euphausia vallentini 6 5 2
## Evadne nordmanni 1 0 0
## Gymnoscopelus bolini 1 0 0
## Gymnoscopelus fraseri 8 1 0
## Haloptilus oxycephalus 0 0 2
## Krefftichthys anderssoni 3 2 0
## Limacina retroversa 26 20 0
## Lizzia blondina 2 1 0
## Lucicutia flavicornis 1 0 0
## Mecynocera clausi 0 0 1
## Metridia gerlachei 1 0 0
## Metridia lucens 35 29 16
## Neocalanus tonsus 19 11 11
## Obelia dichotoma 5 5 0
## Oithona similis 41 36 41
## Ostrea angasi 0 3 0
## Paracalanus sp. B AC-2013 1 0 0
## Pelagobia longicirrata 0 0 4
## Platycephalus bassensis 1 0 0
## Rhincalanus gigas 23 11 22
## Salpa thompsoni 0 0 3
## Themisto gaudichaudii 2 0 1
## Thysanoessa gregaria 2 2 7
## Thysanoessa macrura 8 7 19
## Tomopteris carpenteri 0 0 1
## Vibilia antarctica 5 1 0
## Watersipora subtorquata/subatra 1 1 0
#CPR had more occurences of C. brevipes and T. macrura5.3.5 Richness across zones
#calculate hills numbers at q=0 (richness)
hill12<-cbind.data.frame(matchedsamples[matchedsamples$Type=="12L",], "hill0"=hill_div(PA12, qvalue = 0))
hill2<-cbind.data.frame(matchedsamples[matchedsamples$Type=="2L",], "hill0"=hill_div(PA2, qvalue = 0))
hillCPR<-cbind.data.frame(matchedsamples[matchedsamples$Type=="CPR",], "hill0"=hill_div(CPRPA, qvalue = 0))
#merge
hills<-merge(hill12, hill2, all=TRUE) %>% merge(hillCPR, all=TRUE)
#summary stats
hillsummstat<-summaryBy(hill0~Type*Zone,data=hills,FUN=function(x)
+ {c(mean=mean(x),sum=sum(x),se=sd(x)/sqrt(length(x)))})
hillsummstat## Type Zone hill0.mean hill0.sum hill0.se
## 1 12L AAZ 11.750000 188 0.4873397
## 2 12L PFZ 9.916667 119 0.4344682
## 3 12L SAZ 8.076923 105 0.7378202
## 4 2L AAZ 10.625000 170 0.5977388
## 5 2L PFZ 7.833333 94 0.4578165
## 6 2L SAZ 6.000000 78 0.6097498
## 7 CPR AAZ 6.583333 79 0.6210735
## 8 CPR PFZ 5.411765 92 0.6809316
## 9 CPR SAZ 5.500000 66 0.3588703
#order zones
hillsummstat$Zone<-factor(hillsummstat$Zone, levels = c("SAZ", "PFZ", "AAZ"))
#rename methods for figure
hillsummstat$Type<-recode_factor(hillsummstat$Type, "12L"="LargeVF-eDNA", "2L"="SmallVF-eDNA")
#plot richness
ggplot(hillsummstat,aes(x=Zone,y=hill0.mean,fill=Type))+
geom_bar(aes(x=Zone,y=hill0.mean), position = 'dodge',stat="identity") +
scale_fill_manual(values = CPR_eDNAvals2) +
geom_errorbar(aes(x=Zone, ymin= hill0.mean-hill0.se, ymax= hill0.mean+hill0.se), position=position_dodge(0.9),width=0.4) +
theme(axis.line = element_line(colour = "black", size = 0.2, linetype = "solid"),
axis.ticks = element_line(colour = "black", size = 0.2, linetype = "solid"),
axis.text.x = element_text(size=18,vjust = 0.5, hjust=0.5, colour="black"),
axis.text.y = element_text(size=18, vjust = 0.5, hjust=1,colour="black"),
axis.title.y = element_text(face="bold", size=16, vjust=3),
axis.title.x = element_text(face="bold",size=20,vjust = -2),
legend.text = element_text(size = 14), legend.title = element_text(size=14, face="bold"),
plot.margin = margin(20,10,20,20))+
scale_y_continuous(expand = c(0, 0), limits = c(0,12.3))+
scale_x_discrete(expand = c(0.25, 0))+
guides(fill=guide_legend(title="Method"))+
labs(x="SO zone", y="Mean species richness")#ggsave(filename = "CPR_eDNA_meanrichness.jpeg", plot = last_plot(), width = 18, height = 15, units = "cm", dpi = 600)5.3.6 Diversity across zones
#select data and convert abundance to PA
species_long<-droplevels(subset(LongData, Assignment=="Species")) %>% group_by(Type, Zone, Phylum, Class, Order, Family, Genus, Species) %>% summarise_if(is.numeric, sum, na.rm=TRUE)
species_long$FOO[species_long$Abundance>0]<-1
#order zones
species_long$Zone<-factor(species_long$Zone, levels = c("SAZ", "PFZ", "AAZ"))
ggplot(data=species_long, aes(fill=Phylum, y=FOO, x=Type)) +
geom_bar(stat="identity", width = 0.97) +
scale_fill_manual(values = fill12) +
theme(axis.line = element_line(colour = "black", size = 0.4, linetype = "solid"),
axis.ticks = element_line(colour = "black", size = 0.4, linetype = "solid"),
axis.text.x = element_text(size=10,vjust = 0.5, hjust=0.5, colour="black"),
axis.text.y = element_text(size=16, vjust = 0.5, hjust=1,colour="black"),
axis.title.y = element_text(face="bold", size=15, vjust=3),
strip.text.x = element_text(size = 12, face = "bold"),
axis.title.x = element_text(face="bold", size=13,vjust = -2),
legend.text = element_text(size = 9.5), legend.title = element_text(size=13, face="bold"),
legend.key.height = unit(0.2, 'cm'), legend.key.width = unit(0.3, 'cm'),
plot.margin = margin(20,20,30,20)) + scale_y_continuous(expand = c(0, 0))+
facet_grid(~Zone, switch = "x", scales = "free_x", space = "free_x")+
theme(panel.spacing = unit(0.75, "cm"))+
scale_x_discrete(labels=stringr::str_wrap(c("12L"="LargeVF", "2L"="SmallVF", "CPR"="CPR"),width=5), expand = c(0, 0))+
guides(fill=guide_legend(byrow = TRUE, title="Phylum"))+
labs(x="Detection methods across Southern Ocean zones", y="Species diversity")#ggsave(filename = "CPR_eDNA_diversityzones.jpeg", plot = last_plot(), width = 24, height = 14, units = "cm", dpi = 600)
#as table
with(species_long, table(Phylum, Zone, Type))## , , Type = 12L
##
## Zone
## Phylum SAZ PFZ AAZ
## Annelida 0 0 0
## Arthropoda 12 14 15
## Bryozoa 1 2 2
## Chaetognatha 1 1 1
## Chordata 2 3 2
## Cnidaria 4 3 3
## Echinodermata 1 0 0
## Mollusca 0 1 1
##
## , , Type = 2L
##
## Zone
## Phylum SAZ PFZ AAZ
## Annelida 0 0 0
## Arthropoda 9 11 12
## Bryozoa 3 1 2
## Chaetognatha 1 1 1
## Chordata 1 0 1
## Cnidaria 3 4 4
## Echinodermata 0 0 1
## Mollusca 1 2 1
##
## , , Type = CPR
##
## Zone
## Phylum SAZ PFZ AAZ
## Annelida 0 1 2
## Arthropoda 9 10 9
## Bryozoa 0 0 0
## Chaetognatha 1 1 1
## Chordata 0 1 1
## Cnidaria 0 0 0
## Echinodermata 0 0 0
## Mollusca 0 0 0
5.3.7 Indicator analyses
#12L
indic_12 <- multipatt(t(PA12), matchedsamples$Zone[matchedsamples$Type=="12L"], func = "IndVal.g", duleg = FALSE, control = how(nperm=9999))
summary(indic_12, alpha=0.05, digits=3, too.many=100, indvalcomp=TRUE, sort=TRUE)##
## Multilevel pattern analysis
## ---------------------------
##
## Association function: IndVal.g
## Significance level (alpha): 0.05
##
## Total number of species: 35
## Selected number of species: 10
## Number of species associated to 1 group: 5
## Number of species associated to 2 groups: 5
##
## List of species associated to each combination:
##
## Group AAZ #sps. 1
## A B stat p.value
## Thysanoessa macrura 1.0 0.5 0.707 0.0006 ***
##
## Group SAZ #sps. 4
## A B stat p.value
## Neocalanus tonsus 0.6957 1.0000 0.834 0.0001 ***
## Gymnoscopelus fraseri 0.8660 0.5385 0.683 0.0011 **
## Clausocalanus brevipes 0.6347 0.6154 0.625 0.0433 *
## Clausocalanus pergens 1.0000 0.3077 0.555 0.0136 *
##
## Group AAZ+PFZ #sps. 5
## A B stat p.value
## Calanoides acutus 1.0000 0.9286 0.964 0.0001 ***
## Limacina retroversa 1.0000 0.9286 0.964 0.0001 ***
## Calanus propinquus/similimus 0.8667 1.0000 0.931 0.0001 ***
## Rhincalanus gigas 1.0000 0.8214 0.906 0.0001 ***
## Calanus propinquus 1.0000 0.7857 0.886 0.0001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#2L
indic_2 <- multipatt(t(PA2), matchedsamples$Zone[matchedsamples$Type=="2L"], func = "IndVal.g", duleg = FALSE, control = how(nperm=9999))
summary(indic_2, alpha=0.05, digits=3, too.many=100, indvalcomp=TRUE, sort=TRUE)##
## Multilevel pattern analysis
## ---------------------------
##
## Association function: IndVal.g
## Significance level (alpha): 0.05
##
## Total number of species: 29
## Selected number of species: 10
## Number of species associated to 1 group: 5
## Number of species associated to 2 groups: 5
##
## List of species associated to each combination:
##
## Group AAZ #sps. 3
## A B stat p.value
## Calanoides acutus 0.7778 0.8750 0.825 0.0001 ***
## Calanus propinquus 0.7500 0.7500 0.750 0.0006 ***
## Thysanoessa macrura 1.0000 0.4375 0.661 0.0016 **
##
## Group SAZ #sps. 2
## A B stat p.value
## Neocalanus tonsus 0.8260 0.6923 0.756 0.0001 ***
## Clausocalanus brevipes 1.0000 0.3077 0.555 0.0120 *
##
## Group AAZ+PFZ #sps. 5
## A B stat p.value
## Calanus propinquus/similimus 0.8966 1.0000 0.947 0.0001 ***
## Metridia lucens 0.8893 0.9286 0.909 0.0001 ***
## Limacina retroversa 1.0000 0.7143 0.845 0.0001 ***
## Clausocalanus laticeps 0.8871 0.6071 0.734 0.0143 *
## Rhincalanus gigas 1.0000 0.3929 0.627 0.0153 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#CPR
indic_CPR <- multipatt(t(CPRPA), matchedsamples$Zone[matchedsamples$Type=="CPR"], func = "IndVal.g", duleg = FALSE, control = how(nperm=9999))
summary(indic_CPR, alpha=0.05, digits=3, too.many=100, indvalcomp=TRUE, sort=TRUE)##
## Multilevel pattern analysis
## ---------------------------
##
## Association function: IndVal.g
## Significance level (alpha): 0.05
##
## Total number of species: 18
## Selected number of species: 6
## Number of species associated to 1 group: 2
## Number of species associated to 2 groups: 4
##
## List of species associated to each combination:
##
## Group SAZ #sps. 2
## A B stat p.value
## Neocalanus tonsus 1.0000 0.9167 0.957 0.0001 ***
## Thysanoessa gregaria 1.0000 0.5833 0.764 0.0001 ***
##
## Group AAZ+PFZ #sps. 4
## A B stat p.value
## Calanus simillimus 1.0000 0.6897 0.830 0.0001 ***
## Rhincalanus gigas 0.9475 0.7241 0.828 0.0004 ***
## Thysanoessa macrura 1.0000 0.6552 0.809 0.0003 ***
## Calanoides acutus 0.9464 0.6897 0.808 0.0004 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
5.3.8 Format data for primer v7
#only need species assignment matrices
species12<-species12 %>% select(-Abundance)
species2<-species2 %>% select(-Abundance)
CPRspecies<-CPRspecies %>% select(-Abundance)
overallmatrix<-merge(species12, species2, all=TRUE) %>% merge(CPRspecies, all=TRUE)
#replace NAs with 0s
overallmatrix <- replace(overallmatrix, is.na(overallmatrix), 0)
#export
#write.xlsx(overallmatrix, "permanova_CPR.xlsx", sheetName = "PermanovaData", row.names = FALSE)
#add sample data
#write.xlsx(as.data.frame(matchedsamples), "permanova_CPR.xlsx", sheetName ="SampleData", append = TRUE, row.names = FALSE)