Introduction

These are codes I used to run NMDS on Count data collected by the BC DEPS at several benthic sites after 2009.

Load the package

Package vegan is the package in R that you can use to run multivariate analyses (Oksanen et al., 2022).

library(vegan)

Load data

Data can be found here.

library(readxl)
count_combined <- read_excel("F:/GitHub Projects/thesis_codes/Part b (09-19)/NMDS (Count) - B1, 7 ,10, REF 12-19/count_combined.xlsx", 
    sheet = "CHIRAE F")

Clean up data

Since the data set contains a lot of columns with zeros, it is essential to clean up all zeros to have a clean data set.

count_nozero <- count_combined[, colSums(count_combined != 0) > 0]

#Another way is to use dplyr 

library(dplyr)
count_nozero_dplyr <- count_combined %>% select(where(~ any(. != 0))) #this will yield the same file

#Write out a non-zero file to use for later. I will use PC-ORD v9 to cross check NMDS outputs
library("writexl")

write_xlsx(count_nozero,"F:/One Drive - Towson U/Thesis stuff/Data Analysis/Part b (2009 - recent)/count_nozero.xlsx")

Make a matrix from count data

count_wo_su <- count_nozero[,4:ncol(count_nozero)] #exclude the first few columns that contains SU information 
log_count_wo_su <- log(count_wo_su+1) #log transform count data to narrow down the influence of abundant taxa

Run NMDS

set.seed(1)
nmds_count <- metaMDS(log_count_wo_su,distance="bray",maxit=999,trymax = 500,wascores = T,k=3,autotransform = F) #gotta put autotransform on FALSE so R won't transform your data when you don't want it to.

Add back SU columns

library(dplyr) #load the package
data.scores <- as.data.frame(scores(nmds_count)$sites) #extract NMDS scores
data.scores.dplyr <- data.scores %>% mutate(Site = count_nozero$Site,Year=count_nozero$Year,
                                            Subwatershed=count_nozero$Subwatershed)

Plot the graph using ggplot2

library(ggplot2) #load the package

count_part2_graph <- ggplot(data.scores.dplyr, aes(x = NMDS1, y = NMDS3)) + 
    geom_point(size = 4,aes( shape = as.factor(Year), fill = Subwatershed,color=Subwatershed))+ # geom_polygon(data=data.scores.dplyr,aes(x=NMDS1,y=NMDS2,group=Subwatershed),alpha=0.30); this is to add a little convex polygon to visualize the clusters better. You can try to see how it looks. 
    scale_shape_manual(values = c(11,21,22,23,24,25)) +
    theme(axis.text.y = element_text(colour = "black", size = 25, face = "bold"), 
    axis.text.x = element_text(colour = "black", face = "bold", size = 25), 
    legend.text = element_text(size = 22, face ="bold", colour ="black"), 
    legend.position = "right", axis.title.y = element_text(face = "bold", size = 18), 
    axis.title.x = element_text(face = "bold", size = 25, colour = "black"), 
    axis.title.y.left = element_text(face = "bold", size = 25, colour = "black"),
    legend.title = element_text(size = 20, colour = "black", face = "bold"), 
    panel.background = element_blank(), panel.border = element_rect(colour = "black", fill = NA, size = 1.2),
    legend.key=element_blank(),
    plot.title = element_text(color = "black", size = 30, face = "bold", hjust = 0.5)) + 
    labs(
    title = "NMDS graph of Part B Ln(x+1) Count") + 
    theme(axis.title.x = element_text(margin=margin(t=10)), #add margin to x-axis title
        axis.title.y = element_text(margin=margin(r=10)))+
    labs(x = "NMDS1", fill = "Sub-watershed",color="Sub-watershed", y = "NMDS3", shape = "Year")  + 
    scale_fill_manual(values = c("#b10026", "#feb14c","#ffeea1","#fc4d2a")) +  
    scale_colour_manual(values = c("#b10026", "#feb14c","#ffeea1","#fc4d2a")) +
    geom_text(aes(label=Site),hjust=0.4, vjust=1.5,size=10) + 
    geom_hline(yintercept=0, linetype="dashed", 
                color = "black", size=1) + #add horizontal and vertical lines at 0
    geom_vline(xintercept=0, linetype="dashed", 
                color = "black", size=1)

count_part2_graph

ggsave("Part 2 ln x+1 Count NMDS (B1, 7, 10, REF - only CHIRAE).jpeg",count_part2_graph,width=25,height=13)

Plot a Screeplot

library(goeveg) #load package goeveg

dimcheckMDS(count_wo_su,distance = "bray",k=10,trymax=500,autotransform = F)