These are codes I used to run NMDS on Count data collected by the BC DEPS at all benthic sites after 2009. Please note that I added all sites sampled by the BC DEPS. Some sites were sampled every year or every other year. Some sites were not sampled as often. The purpose of this analysis was to determine whether spatially close sites in the same sub-watershed would be also close on the ordination graph.
Package vegan is the package in R that you can use to run multivariate analyses (Oksanen et al., 2022).
library(vegan)
Data can be found here.
library(readxl)
count_combined_allsites <- read_excel("F:/GitHub Projects/thesis_codes/Part b (09-19)/NMDS (Count) - all sites - spatial question/count_combined_allsites.xlsx",
sheet = "CHIRAE F OLIGTA O")
Since the data set contains a lot of columns with zeros, it is essential to clean up all zeros to have a clean data set.
count_nozero <- count_combined_allsites[, colSums(count_combined_allsites != 0) > 0]
#Another way is to use dplyr
library(dplyr)
count_nozero_dplyr <- count_combined_allsites %>% select(where(~ any(. != 0))) #this will yield the same file
#Write out a non-zero file to use for later. I will use PC-ORD v9 to cross check NMDS outputs
library("writexl")
write_xlsx(count_nozero,"F:/One Drive - Towson U/Thesis stuff/Data Analysis/Part b (2009 - recent)/count_nozero_allsites.xlsx")
library(funrar)
count_wo_su <- count_nozero[,6:ncol(count_nozero)] #exclude the first few columns that contains SU information
rel_count_wo_su <- make_relative(as.matrix(count_nozero[,6:ncol(count_nozero)]))
set.seed(1)
nmds_count <- metaMDS(rel_count_wo_su,distance="bray",maxit=999,trymax = 500,wascores = T,k=3,autotransform = F) #gotta put autotransform on FALSE so R won't transform your data when you don't want it to.
library(dplyr) #load the package
data.scores <- as.data.frame(scores(nmds_count)$sites) #extract NMDS scores for sites
data.scores.dplyr <- data.scores %>% mutate(Site = count_nozero$Site,Year=count_nozero$Year,
Subwatershed=count_nozero$Subwatershed, Period = count_nozero$Period)
taxon.scores <- as.data.frame(scores(nmds_count,display=c("species")))
library("writexl")
write_xlsx(data.scores.dplyr,"F:/GitHub Projects/thesis_codes/Part b (09-19)/NMDS (Count) - all sites - spatial question/NMDS all sites count part b scores.xlsx")
library(ggplot2) #load the package
data.scores.dplyr$Subwatershed = factor(data.scores.dplyr$Subwatershed,levels=c("B9 (2%)","B7 (12%)",
"B9A (16%)","B1 (21%)"))
count_allsite_part2_graph <- ggplot(data.scores.dplyr, aes(x = NMDS1, y = NMDS2)) +
geom_point(size = 4,stroke =2, aes( shape = as.factor(Year), fill = Subwatershed,color=Subwatershed))+ # geom_polygon(data=data.scores.dplyr,aes(x=NMDS1,y=NMDS2,group=Subwatershed),alpha=0.30); this is to add a little convex polygon to visualize the clusters better. You can try to see how it looks.
scale_shape_manual(values = c(0,1,2,5,6,7,21,22,23,24,25)) +
theme(axis.text.y = element_text(colour = "black", size = 25, face = "bold"),
axis.text.x = element_text(colour = "black", face = "bold", size = 25),
legend.text = element_text(size = 22, face ="bold", colour ="black"),
legend.position = "right", axis.title.y = element_text(face = "bold", size = 18),
axis.title.x = element_text(face = "bold", size = 25, colour = "black"),
axis.title.y.left = element_text(face = "bold", size = 25, colour = "black"),
legend.title = element_text(size = 20, colour = "black", face = "bold"),
panel.background = element_blank(), panel.border = element_rect(colour = "black", fill = NA, size = 1.2),
legend.key=element_blank(),
plot.title = element_text(color = "black", size = 30, face = "bold", hjust = 0.5)) +
labs(
title = "NMDS graph of Part B Relative Count for all sites") +
theme(axis.title.x = element_text(margin=margin(t=10)), #add margin to x-axis title
axis.title.y = element_text(margin=margin(r=10)))+
labs(x = "NMDS1", fill = "Sub-watershed",color="Sub-watershed", y = "NMDS2", shape = "Year") +
scale_fill_manual(values = c( "#ffeea1","#feb14c","#fc4d2a","#b10026")) +
scale_colour_manual(values = c( "#ffeea1","#feb14c","#fc4d2a","#b10026")) +
geom_text(aes(label=Site),hjust=0.4, vjust=1.5,size=10) +
geom_hline(yintercept=0, linetype="dashed",
color = "black", size=1) + #add horizontal and vertical lines at 0
geom_vline(xintercept=0, linetype="dashed",
color = "black", size=1) #+
#scale_x_reverse() + scale_y_reverse()
count_allsite_part2_graph
ggsave("Part 2 Count NMDS for all Sites.jpeg",count_allsite_part2_graph,width=25,height=13)
library(ggplot2) #load the package
data.scores.dplyr$Subwatershed = factor(data.scores.dplyr$Subwatershed,levels=c("B9 (2%)","B7 (12%)",
"B9A (16%)","B1 (21%)"))
data.scores.dplyr$Period = factor(data.scores.dplyr$Period,levels=c("2009-2015","2016-2019"))
count_allsite_part2_graph_simplified <- ggplot(data.scores.dplyr, aes(x = NMDS1, y = NMDS2)) +
geom_point(size = 6,stroke =2, aes( shape = Period, fill = Subwatershed, color= Subwatershed))+ # geom_polygon(data=data.scores.dplyr,aes(x=NMDS1,y=NMDS2,group=Subwatershed),alpha=0.30); this is to add a little convex polygon to visualize the clusters better. You can try to see how it looks.
scale_shape_manual(values = c(22,24)) +
scale_fill_manual(values = c( "#ffeea1","#feb14c","#fc4d2a","#b10026")) +
scale_colour_manual(values = c( "#ffeea1","#feb14c","#fc4d2a","#b10026")) +
theme(axis.text.y = element_text(colour = "black", size = 25, face = "bold"),
axis.text.x = element_text(colour = "black", face = "bold", size = 25),
legend.text = element_text(size = 22, face ="bold", colour ="black"),
legend.position = "right", axis.title.y = element_text(face = "bold", size = 18),
axis.title.x = element_text(face = "bold", size = 25, colour = "black"),
axis.title.y.left = element_text(face = "bold", size = 25, colour = "black"),
legend.title = element_text(size = 20, colour = "black", face = "bold"),
panel.background = element_blank(), panel.border = element_rect(colour = "black", fill = NA, size = 1.2),
legend.key=element_blank(),
plot.title = element_text(color = "black", size = 30, face = "bold", hjust = 0.5)) +
labs(
title = "NMDS graph of Part B Relative Count for all sites") +
theme(axis.title.x = element_text(margin=margin(t=10)), #add margin to x-axis title
axis.title.y = element_text(margin=margin(r=10)))+
labs(x = "NMDS1", fill = "Subwatershed", y = "NMDS2", shape = "Period") +
#scale_colour_manual(values = c( "#ffeea1","#feb14c","#fc4d2a","#b10026")) +
geom_text(aes(label=Site),hjust=0.4, vjust=1.5,size=10) +
geom_hline(yintercept=0, linetype="dashed",
color = "black", size=1) + #add horizontal and vertical lines at 0
geom_vline(xintercept=0, linetype="dashed",
color = "black", size=1) #+
#scale_x_reverse() + scale_y_reverse()
count_allsite_part2_graph_simplified
ggsave("Part 2 Count NMDS for all Sites - Few symbols.jpeg",count_allsite_part2_graph_simplified,width=25,height=13)
library(goeveg) #load package goeveg
dimcheckMDS(count_wo_su,distance = "bray",k=10,trymax=500,autotransform = F)
The purpose is to visualize which benthic taxa are driving the differences in assemblage composition.
all_taxa_biplot_part2_allsites <- count_allsite_part2_graph +
geom_segment(data=taxon.scores, aes(x=0, xend=NMDS1, y=0, yend=NMDS2),
color="black", arrow=arrow(length=unit(0.01,"npc"))) + #add arrow
geom_text(data=taxon.scores,
aes(x=NMDS1,y=NMDS2,label=rownames(taxon.scores),
hjust=0.5*(1-sign(NMDS1)),vjust=0.5*(1-sign(NMDS2))),
color="black", size=4,fontface="bold")
ggsave("Biplot Count All Taxa NMDS All Sites.jpeg", all_taxa_biplot_part2_allsites,width=25,height=13)
#previous step was to screen which taxa could be included in the biplot
Biplot of selected taxa is below:
count_part2_allsites_graph_biplot <- count_allsite_part2_graph + #sensitive taxa come first; bold
geom_segment(data=taxon.scores[c('LEUCTA','LEUCRA','BAETIS','WORMIA','ACROIA',
'RHYALA','AMPHRA','STYLUS','MACCUM','DOLOES',
'ISONIA','DIPLNA','GLOSMA', 'NIGRIA','EPHERA',
'ECTOIA','EURYLA'),], aes(x=0, xend=NMDS1, y=0, yend=NMDS2),
color="black", arrow=arrow(length=unit(0.01,"npc"))) + #add arrow
geom_text(data=taxon.scores[c('LEUCTA','LEUCRA','BAETIS','WORMIA','ACROIA',
'RHYALA','AMPHRA','STYLUS','MACCUM','DOLOES',
'ISONIA','DIPLNA','GLOSMA','NIGRIA','EPHERA',
'ECTOIA','EURYLA'),],
aes(x=NMDS1,y=NMDS2,label=rownames(taxon.scores[c('LEUCTA','LEUCRA','BAETIS','WORMIA','ACROIA',
'RHYALA','AMPHRA','STYLUS','MACCUM','DOLOES',
'ISONIA','DIPLNA','GLOSMA','NIGRIA','EPHERA',
'ECTOIA','EURYLA'),]),
hjust=0.5*(1-sign(NMDS1)),vjust=0.5*(1-sign(NMDS2))),
color="black", size=6,fontface="bold") +
#add text to arrow; now comes tolerant taxa, italicized
geom_segment(data=taxon.scores[c('MACRUS','CHELRA','OLIGTA','FERRIA','CHEUHE',
'ANTOHA','LIMNRA','HYDRHE',
'CLINRA','HEMEIA','CALOYX'),], aes(x=0, xend=NMDS1, y=0, yend=NMDS2),
color="black", arrow=arrow(length=unit(0.01,"npc"))) + #add arrow
geom_text(data=taxon.scores[c('MACRUS','CHELRA','OLIGTA','FERRIA','CHEUHE','ANTOHA','LIMNRA','HYDRHE',
'CLINRA','HEMEIA','CALOYX'),],
aes(x=NMDS1,y=NMDS2,label=rownames(taxon.scores[c('MACRUS','CHELRA','OLIGTA','FERRIA','CHEUHE',
'ANTOHA','LIMNRA','HYDRHE','CLINRA','HEMEIA',
'CALOYX'),]),
hjust=0.5*(1-sign(NMDS1)),vjust=0.5*(1-sign(NMDS2))),
color="black", size=6,fontface="italic")
count_part2_allsites_graph_biplot
ggsave("Count Chapter 2 Biplot All Sites.jpeg", count_part2_allsites_graph_biplot,width=25,height=13)