Introduction

These are codes I used to run NMDS on Count data collected by the BC DEPS at all benthic sites after 2009. Please note that I added all sites sampled by the BC DEPS. Some sites were sampled every year or every other year. Some sites were not sampled as often. The purpose of this analysis was to determine whether spatially close sites in the same sub-watershed would be also close on the ordination graph.

Load the package

Package vegan is the package in R that you can use to run multivariate analyses (Oksanen et al., 2022).

library(vegan)

Load data

Data can be found here.

library(readxl)
count_combined_allsites <- read_excel("F:/GitHub Projects/thesis_codes/Part b (09-19)/NMDS (Count) - all sites - spatial question/count_combined_allsites.xlsx", 
    sheet = "CHIRAE F OLIGTA O")

Clean up data

Since the data set contains a lot of columns with zeros, it is essential to clean up all zeros to have a clean data set.

count_nozero <- count_combined_allsites[, colSums(count_combined_allsites != 0) > 0]

#Another way is to use dplyr 

library(dplyr)
count_nozero_dplyr <- count_combined_allsites %>% select(where(~ any(. != 0))) #this will yield the same file

#Write out a non-zero file to use for later. I will use PC-ORD v9 to cross check NMDS outputs
library("writexl")

write_xlsx(count_nozero,"F:/One Drive - Towson U/Thesis stuff/Data Analysis/Part b (2009 - recent)/count_nozero_allsites.xlsx")

Make a matrix from count data

library(funrar)
count_wo_su <- count_nozero[,6:ncol(count_nozero)] #exclude the first few columns that contains SU information
rel_count_wo_su <- make_relative(as.matrix(count_nozero[,6:ncol(count_nozero)]))

Run NMDS

set.seed(1)
nmds_count <- metaMDS(rel_count_wo_su,distance="bray",maxit=999,trymax = 500,wascores = T,k=3,autotransform = F) #gotta put autotransform on FALSE so R won't transform your data when you don't want it to.

Add back SU columns

library(dplyr) #load the package
data.scores <- as.data.frame(scores(nmds_count)$sites) #extract NMDS scores for sites
data.scores.dplyr <- data.scores %>% mutate(Site = count_nozero$Site,Year=count_nozero$Year,
                                            Subwatershed=count_nozero$Subwatershed, Period = count_nozero$Period)

taxon.scores <- as.data.frame(scores(nmds_count,display=c("species")))

library("writexl")

write_xlsx(data.scores.dplyr,"F:/GitHub Projects/thesis_codes/Part b (09-19)/NMDS (Count) - all sites - spatial question/NMDS all sites count part b scores.xlsx")

Plot the graph using ggplot2

library(ggplot2) #load the package

data.scores.dplyr$Subwatershed = factor(data.scores.dplyr$Subwatershed,levels=c("B9 (2%)","B7 (12%)",
                                                                                "B9A (16%)","B1 (21%)"))

count_allsite_part2_graph <- ggplot(data.scores.dplyr, aes(x = NMDS1, y = NMDS2)) + 
    geom_point(size = 4,stroke =2, aes( shape = as.factor(Year), fill = Subwatershed,color=Subwatershed))+ # geom_polygon(data=data.scores.dplyr,aes(x=NMDS1,y=NMDS2,group=Subwatershed),alpha=0.30); this is to add a little convex polygon to visualize the clusters better. You can try to see how it looks. 
    scale_shape_manual(values = c(0,1,2,5,6,7,21,22,23,24,25)) +
    theme(axis.text.y = element_text(colour = "black", size = 25, face = "bold"), 
    axis.text.x = element_text(colour = "black", face = "bold", size = 25), 
    legend.text = element_text(size = 22, face ="bold", colour ="black"), 
    legend.position = "right", axis.title.y = element_text(face = "bold", size = 18), 
    axis.title.x = element_text(face = "bold", size = 25, colour = "black"), 
    axis.title.y.left = element_text(face = "bold", size = 25, colour = "black"),
    legend.title = element_text(size = 20, colour = "black", face = "bold"), 
    panel.background = element_blank(), panel.border = element_rect(colour = "black", fill = NA, size = 1.2),
    legend.key=element_blank(),
    plot.title = element_text(color = "black", size = 30, face = "bold", hjust = 0.5)) + 
    labs(
    title = "NMDS graph of Part B Relative Count for all sites") + 
    theme(axis.title.x = element_text(margin=margin(t=10)), #add margin to x-axis title
        axis.title.y = element_text(margin=margin(r=10)))+
    labs(x = "NMDS1", fill = "Sub-watershed",color="Sub-watershed", y = "NMDS2", shape = "Year")  + 
    scale_fill_manual(values = c( "#ffeea1","#feb14c","#fc4d2a","#b10026")) +  
    scale_colour_manual(values = c( "#ffeea1","#feb14c","#fc4d2a","#b10026")) +
    geom_text(aes(label=Site),hjust=0.4, vjust=1.5,size=10) + 
    geom_hline(yintercept=0, linetype="dashed", 
                color = "black", size=1) + #add horizontal and vertical lines at 0
    geom_vline(xintercept=0, linetype="dashed", 
                color = "black", size=1) #+
    #scale_x_reverse() + scale_y_reverse()

count_allsite_part2_graph

ggsave("Part 2 Count NMDS for all Sites.jpeg",count_allsite_part2_graph,width=25,height=13)

Plot an NMDS graph (symbology modified to make it easier to visualize changes)

library(ggplot2) #load the package

data.scores.dplyr$Subwatershed = factor(data.scores.dplyr$Subwatershed,levels=c("B9 (2%)","B7 (12%)",
                                                                                "B9A (16%)","B1 (21%)"))
data.scores.dplyr$Period = factor(data.scores.dplyr$Period,levels=c("2009-2015","2016-2019"))

count_allsite_part2_graph_simplified <- ggplot(data.scores.dplyr, aes(x = NMDS1, y = NMDS2)) + 
    geom_point(size = 6,stroke =2, aes( shape = Period, fill = Subwatershed, color= Subwatershed))+ # geom_polygon(data=data.scores.dplyr,aes(x=NMDS1,y=NMDS2,group=Subwatershed),alpha=0.30); this is to add a little convex polygon to visualize the clusters better. You can try to see how it looks. 
    scale_shape_manual(values = c(22,24)) +
    scale_fill_manual(values = c( "#ffeea1","#feb14c","#fc4d2a","#b10026")) + 
    scale_colour_manual(values = c( "#ffeea1","#feb14c","#fc4d2a","#b10026")) +
    theme(axis.text.y = element_text(colour = "black", size = 25, face = "bold"), 
    axis.text.x = element_text(colour = "black", face = "bold", size = 25), 
    legend.text = element_text(size = 22, face ="bold", colour ="black"), 
    legend.position = "right", axis.title.y = element_text(face = "bold", size = 18), 
    axis.title.x = element_text(face = "bold", size = 25, colour = "black"), 
    axis.title.y.left = element_text(face = "bold", size = 25, colour = "black"),
    legend.title = element_text(size = 20, colour = "black", face = "bold"), 
    panel.background = element_blank(), panel.border = element_rect(colour = "black", fill = NA, size = 1.2),
    legend.key=element_blank(),
    plot.title = element_text(color = "black", size = 30, face = "bold", hjust = 0.5)) + 
    labs(
    title = "NMDS graph of Part B Relative Count for all sites") + 
    theme(axis.title.x = element_text(margin=margin(t=10)), #add margin to x-axis title
        axis.title.y = element_text(margin=margin(r=10)))+
    labs(x = "NMDS1", fill = "Subwatershed", y = "NMDS2", shape = "Period")  + 
    #scale_colour_manual(values = c( "#ffeea1","#feb14c","#fc4d2a","#b10026")) +
    geom_text(aes(label=Site),hjust=0.4, vjust=1.5,size=10) + 
    geom_hline(yintercept=0, linetype="dashed", 
                color = "black", size=1) + #add horizontal and vertical lines at 0
    geom_vline(xintercept=0, linetype="dashed", 
                color = "black", size=1) #+
    #scale_x_reverse() + scale_y_reverse()

count_allsite_part2_graph_simplified

ggsave("Part 2 Count NMDS for all Sites - Few symbols.jpeg",count_allsite_part2_graph_simplified,width=25,height=13)

Plot a Screeplot

library(goeveg) #load package goeveg

dimcheckMDS(count_wo_su,distance = "bray",k=10,trymax=500,autotransform = F)

Plot a biplot

The purpose is to visualize which benthic taxa are driving the differences in assemblage composition.

all_taxa_biplot_part2_allsites <- count_allsite_part2_graph + 
  geom_segment(data=taxon.scores, aes(x=0, xend=NMDS1, y=0, yend=NMDS2), 
               color="black", arrow=arrow(length=unit(0.01,"npc"))) + #add arrow
  geom_text(data=taxon.scores, 
            aes(x=NMDS1,y=NMDS2,label=rownames(taxon.scores),
                hjust=0.5*(1-sign(NMDS1)),vjust=0.5*(1-sign(NMDS2))), 
            color="black", size=4,fontface="bold")

ggsave("Biplot Count All Taxa NMDS All Sites.jpeg", all_taxa_biplot_part2_allsites,width=25,height=13)

#previous step was to screen which taxa could be included in the biplot

Biplot of selected taxa is below:

count_part2_allsites_graph_biplot <- count_allsite_part2_graph + #sensitive taxa come first; bold
  geom_segment(data=taxon.scores[c('LEUCTA','LEUCRA','BAETIS','WORMIA','ACROIA',
                                   'RHYALA','AMPHRA','STYLUS','MACCUM','DOLOES',
                                   'ISONIA','DIPLNA','GLOSMA', 'NIGRIA','EPHERA',
                                   'ECTOIA','EURYLA'),], aes(x=0, xend=NMDS1, y=0, yend=NMDS2), 
               color="black", arrow=arrow(length=unit(0.01,"npc"))) + #add arrow
  geom_text(data=taxon.scores[c('LEUCTA','LEUCRA','BAETIS','WORMIA','ACROIA',
                                   'RHYALA','AMPHRA','STYLUS','MACCUM','DOLOES',
                                   'ISONIA','DIPLNA','GLOSMA','NIGRIA','EPHERA',
                                'ECTOIA','EURYLA'),], 
            aes(x=NMDS1,y=NMDS2,label=rownames(taxon.scores[c('LEUCTA','LEUCRA','BAETIS','WORMIA','ACROIA',
                                   'RHYALA','AMPHRA','STYLUS','MACCUM','DOLOES',
                                   'ISONIA','DIPLNA','GLOSMA','NIGRIA','EPHERA',
                                   'ECTOIA','EURYLA'),]),
                hjust=0.5*(1-sign(NMDS1)),vjust=0.5*(1-sign(NMDS2))), 
            color="black", size=6,fontface="bold") +
  #add text to arrow; now comes tolerant taxa, italicized
  geom_segment(data=taxon.scores[c('MACRUS','CHELRA','OLIGTA','FERRIA','CHEUHE',
                                   'ANTOHA','LIMNRA','HYDRHE',
                                   'CLINRA','HEMEIA','CALOYX'),], aes(x=0, xend=NMDS1, y=0, yend=NMDS2), 
               color="black", arrow=arrow(length=unit(0.01,"npc"))) + #add arrow
  geom_text(data=taxon.scores[c('MACRUS','CHELRA','OLIGTA','FERRIA','CHEUHE','ANTOHA','LIMNRA','HYDRHE',
                                   'CLINRA','HEMEIA','CALOYX'),], 
            aes(x=NMDS1,y=NMDS2,label=rownames(taxon.scores[c('MACRUS','CHELRA','OLIGTA','FERRIA','CHEUHE',
                                                              'ANTOHA','LIMNRA','HYDRHE','CLINRA','HEMEIA',
                                                              'CALOYX'),]),
                hjust=0.5*(1-sign(NMDS1)),vjust=0.5*(1-sign(NMDS2))), 
            color="black", size=6,fontface="italic")

count_part2_allsites_graph_biplot

ggsave("Count Chapter 2 Biplot All Sites.jpeg", count_part2_allsites_graph_biplot,width=25,height=13)

NMDS (Count) - SPATIALLY EXAMINED

Frank

2023-02-13