The purpose of this code is to Run NMDS ordinations for each metabarcoding marker to determine which species are influential and may be interesting to include in mock community analysis. This file is for the oblique tows. There is another file for the vertical tows. I decided to remove stations that were not strictly WOAC stations. Replicates were also combined so that any species that was found in either of the replicates was counted. NMDS plots are made using presence-absence data due to concerns about amplification bias. For taxon-specific markers, I decided to subset the data to that specific taxon.

load packages

library(stringr)
library(vegan)
library(EcolUtils)
library(ggplot2)
library(ggrepel)
library(dplyr)
library(reshape2)

Import Data

Import ASV count tables - I downloaded the tsv and csv files from google drive into the data directory

files_to_read <- list.files(path = "/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-3/data/",
                            pattern = "\\.tsv$", full.names = TRUE)

# Read each file and assign to a variable named after the file
for (file in files_to_read) {
  # Read the file into a data frame
  df <- read.table(file = file, sep = '\t', header = TRUE)
  
  # Rename the column "x" to "ASV" if it exists
  if ("x" %in% colnames(df)) {
    colnames(df)[colnames(df) == "x"] <- "ASV"
  }
  
  # Create a variable name from the filename
  # Remove the directory path and file extension, replace spaces with underscores
  var_name <- gsub("\\.tsv$", "", basename(file))
  var_name <- gsub(" ", "_", var_name)
  
  # Assign the data frame to a variable with the created name
  assign(var_name, df)
}

Import ASV to taxonomy tables

files_to_read <- list.files(path = "/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-3/data/",
                            pattern = "\\.txt$", full.names = TRUE)

# Read each file and assign to a variable named after the file
for (file in files_to_read) {
  # Read the file into a data frame
  df <- read.table(file = file, sep = '\t', header = TRUE)
  
  # Create a variable name from the filename
  # Remove the directory path and file extension, replace spaces with underscores
  var_name <- gsub("\\.txt$", "", basename(file))
  var_name <- gsub(" ", "_", var_name)
  
  # Assign the data frame to a variable with the created name
  assign(var_name, df)
}

add taxa ids to count data

M28S_counts <- merge(RV_28S_asvTaxonomyTable, marker_28S_ASVs_counts, by = "ASV", all = TRUE)

COI_counts <- merge(RV_COI_asvTaxonomyTable, COI_ASVs_counts, by = "ASV", all = TRUE)

Cop16S_counts <- merge(RV_Cop16S_asvTaxonomyTable, Cop16S_ASVs_counts, by = "ASV", all = TRUE)

Fishcytb_counts <- merge(RV_Fishcytb_asvTaxonomyTable, Fishcytb_ASVs_counts, by = "ASV", all = TRUE)

Machida18S_counts <- merge(RV_Machida18S_asvTaxonomyTable, Machida18S_ASVs_counts, by = "ASV", all = TRUE)

Mol16S_counts <- merge(RV_Mol16S_asvTaxonomyTable, Mol16S_ASVs_counts, by = "ASV", all = TRUE)

COI

Data Setup

Remove non-zooplanton phyla

unique(COI_counts$Phylum)
##  [1] "Arthropoda"                 "Bacillariophyta"           
##  [3] "Bryozoa"                    "Mollusca"                  
##  [5] "Rhodophyta"                 NA                          
##  [7] "Echinodermata"              "Annelida"                  
##  [9] "Cnidaria"                   "Platyhelminthes"           
## [11] "Oomycota"                   "Chordata"                  
## [13] "Chaetognatha"               "Nemertea"                  
## [15] "Ctenophora"                 "Ascomycota"                
## [17] "Onychophora"                "Cryptophyceae__p"          
## [19] "Zoopagomycota"              "Rotifera"                  
## [21] "Phaeophyceae__p"            "Haptophyta"                
## [23] "Basidiomycota"              "Chlorophyta"               
## [25] "Dinophyceae__p"             "Phoronida"                 
## [27] "Discosea"                   "Porifera"                  
## [29] "Tubulinea"                  "Dictyochophyceae__p"       
## [31] "Bacteroidota"               "Blastocladiomycota"        
## [33] "Pseudomonadota"             "Chrysophyceae__p"          
## [35] "Nematoda"                   "Malawimonadida__c__p"      
## [37] "Prasinodermophyta"          "Chrysoparadoxa__f__o__c__p"
## [39] "Apusomonadida__c__p"        "Streptophyta"              
## [41] "Pelagophyceae__p"           "Eustigmatophyceae__p"
levels_to_remove <- c('Bacillariophyta', 'Rhodophyta',NA,'Oomycota','Ascomycota','Onychophora','Cryptophyceae__p','Zoopagomycota','Phaeophyceae__p','Haptophyta','Basidiomycota','Chlorophyta','Dinophyceae__p','Discosea','Tubulinea','Dictyochophyceae__p','Bacteroidota','Blastocladiomycota','Pseudomonadota','Chrysophyceae__p','Malawimonadida__c__p','Prasinodermophyta','Chrysoparadoxa__f__o__c__p','Apusomonadida__c__p','Streptophyta','Pelagophyceae__p','Eustigmatophyceae__p')

# Remove those levels and drop them from the factor
COI_counts <- droplevels(COI_counts[!COI_counts$Phylum %in% levels_to_remove,])

remove non-species rows

COI_comp<- COI_counts[complete.cases(COI_counts), ]

convert to presence-absence

COI_comp_pa<-COI_comp %>%
  mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))

remove excess columns

COI_comp_pa <- COI_comp_pa[ -c(1:7) ]

wide to long format

COI_long<-melt(COI_comp_pa, id.vars=c("Species"))

add up multiple lines per station and convert back to presence-absence

COI_long_sum <- COI_long %>%
  group_by(Species,variable) %>%
  summarise(
    value = sum(value))

COI_long_sum<-COI_long_sum %>%
  mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))

split code string up to extract metadata

COI_long_sum$variable<-str_split_fixed(COI_long_sum$variable,"_",4)

remove samples of non-interest

unique(COI_long_sum$variable[,3])
##  [1] "P12"   "P22"   "P28"   "P38"   "P402"  "P4"    "P8"    "CA042" "P105" 
## [10] "P123"  "P132"  "P136"  "P381"  "P7"    ""
unique(COI_long_sum$variable[,2])
##  [1] "1804"     "1806"     "1807"     "1809"     "1904"     "1907"    
##  [7] "1909"     "2007"     "2009"     "2010"     "Control1" "Control2"

remove excess stations

levels_to_remove <- c('CA042','',"P105","P123","P132","P136","P381","P7")
COI_long_sum <- droplevels(COI_long_sum[!COI_long_sum$variable[,3] %in% levels_to_remove,])

split up replicate columns

unique(COI_long_sum$variable[,4])
## [1] "VeA" "Ve"  "ObB" "ObA" "Ob"  "VeB"
COI_long_sum <- COI_long_sum %>%
  mutate(
    tow_type = substr(variable[,4], 1, 2),
    replicate = substr(variable[,4], 3, nchar(variable[,4]))
  )

split up year and month columns

COI_long_sum <- COI_long_sum %>%
  mutate(
    Year = substr(variable[,2], 1, 2),
    Month = substr(variable[,2], 3, 4)
  )

make station column

COI_long_sum$station<-COI_long_sum$variable[,3]

remove vertical samples

COI_long_sum<-subset(COI_long_sum,tow_type=="Ob")

merge replicates

colnames(COI_long_sum)
## [1] "Species"   "variable"  "value"     "tow_type"  "replicate" "Year"     
## [7] "Month"     "station"
COI_long_sum$sample<-paste(COI_long_sum$Year,COI_long_sum$Month,COI_long_sum$station,COI_long_sum$tow_type,sep="")

COI_long_sum <- COI_long_sum %>%
  group_by(Year,Month,station,sample,Species) %>%
  summarise(
    value = sum(value))
#convert back to presence-absence
COI_long_sum<-COI_long_sum %>%
  mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))

back to wide format

COI_wide<-dcast(COI_long_sum, sample+Year+Month+station ~ Species )

remove any columns with all zeroes

target_cols <- 6:ncol(COI_wide)

non_zero_cols <- target_cols[colSums(COI_wide[, target_cols]) != 0]

COI_wide <- COI_wide[, c(1:5, non_zero_cols)]

remove environmental data

COI_wide_simple<-COI_wide[ , 5:231]

Make environmental columns categorical

# Specify the columns to convert to factors
cols_to_factor <- c("station", "Year", "Month")

# Convert the specified columns to factors
COI_wide <- COI_wide %>%
  mutate_at(vars(one_of(cols_to_factor)), as.factor)

PERMANOVA

dist<-vegdist(COI_wide_simple, method='bray')
perm<-adonis2(dist~station*Month, data=COI_wide, permutations = 999, method="bray")
perm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist ~ station * Month, data = COI_wide, permutations = 999, method = "bray")
##               Df SumOfSqs      R2      F Pr(>F)    
## station        6   3.3012 0.35816 5.3775  0.001 ***
## Month          3   1.1019 0.11955 3.5899  0.001 ***
## station:Month 13   1.6423 0.17818 1.2348  0.044 *  
## Residual      31   3.1717 0.34411                  
## Total         53   9.2171 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

station comparisons

adonis.pair(vegdist(COI_wide_simple),COI_wide$station)
##     combination SumsOfSqs   MeanSqs  F.Model        R2     P.value
## 1   P12 <-> P22 0.8441661 0.8441661 6.818192 0.3440370 0.001998002
## 2   P12 <-> P28 0.3392037 0.3392037 2.732722 0.1633161 0.000999001
## 3   P12 <-> P38 0.4858547 0.4858547 3.715211 0.2364086 0.002997003
## 4    P12 <-> P4 0.6119630 0.6119630 4.910171 0.2741555 0.000999001
## 5  P12 <-> P402 0.6108084 0.6108084 4.372799 0.2670770 0.003996004
## 6    P12 <-> P8 0.7420610 0.7420610 5.163161 0.2842656 0.000999001
## 7   P22 <-> P28 0.6673237 0.6673237 5.954230 0.2841541 0.000999001
## 8   P22 <-> P38 0.5308458 0.5308458 4.562179 0.2597730 0.000999001
## 9    P22 <-> P4 0.7407450 0.7407450 6.632524 0.3214596 0.000999001
## 10 P22 <-> P402 0.7811113 0.7811113 6.269866 0.3253716 0.000999001
## 11   P22 <-> P8 0.2256816 0.2256816 1.743919 0.1107678 0.051948052
## 12  P28 <-> P38 0.3680673 0.3680673 3.140342 0.1832135 0.000999001
## 13   P28 <-> P4 0.7315381 0.7315381 6.486016 0.3018715 0.000999001
## 14 P28 <-> P402 0.7381860 0.7381860 5.912936 0.2969394 0.000999001
## 15   P28 <-> P8 0.5058739 0.5058739 3.911434 0.2068290 0.001998002
## 16   P38 <-> P4 0.3524163 0.3524163 3.007509 0.1878812 0.000999001
## 17 P38 <-> P402 0.3856336 0.3856336 2.930131 0.1962562 0.005994006
## 18   P38 <-> P8 0.2911071 0.2911071 2.136263 0.1411354 0.009990010
## 19  P4 <-> P402 0.4367466 0.4367466 3.482754 0.2112968 0.000999001
## 20    P4 <-> P8 0.5927355 0.5927355 4.553449 0.2454233 0.001998002
## 21  P402 <-> P8 0.5467140 0.5467140 3.783666 0.2254374 0.002997003
##    P.value.corrected
## 1        0.002797203
## 2        0.001748252
## 3        0.003702180
## 4        0.001748252
## 5        0.004662005
## 6        0.001748252
## 7        0.001748252
## 8        0.001748252
## 9        0.001748252
## 10       0.001748252
## 11       0.051948052
## 12       0.001748252
## 13       0.001748252
## 14       0.001748252
## 15       0.002797203
## 16       0.001748252
## 17       0.006624954
## 18       0.010489510
## 19       0.001748252
## 20       0.002797203
## 21       0.003702180

month comparisons

adonis.pair(vegdist(COI_wide_simple),COI_wide$Month)
##   combination SumsOfSqs   MeanSqs   F.Model         R2     P.value
## 1   04 <-> 07 0.5235489 0.5235489 3.5498196 0.10580741 0.000999001
## 2   04 <-> 09 0.7506380 0.7506380 4.3752162 0.12368027 0.000999001
## 3   04 <-> 10 0.2498979 0.2498979 1.5026453 0.10361181 0.104895105
## 4   07 <-> 09 0.2884489 0.2884489 1.8093841 0.04662233 0.047952048
## 5   07 <-> 10 0.1392565 0.1392565 0.9649775 0.04833351 0.494505495
## 6   09 <-> 10 0.1370137 0.1370137 0.7536787 0.03631543 0.680319680
##   P.value.corrected
## 1       0.002997003
## 2       0.002997003
## 3       0.157342657
## 4       0.095904096
## 5       0.593406593
## 6       0.680319680

run NMDS

COI_NMDSmodel <- metaMDS(COI_wide_simple, distance = "bray",trymax = 999)
## Run 0 stress 0.226526 
## Run 1 stress 0.2447922 
## Run 2 stress 0.252478 
## Run 3 stress 0.2346737 
## Run 4 stress 0.2285766 
## Run 5 stress 0.25453 
## Run 6 stress 0.2276202 
## Run 7 stress 0.2384844 
## Run 8 stress 0.2350877 
## Run 9 stress 0.226526 
## ... Procrustes: rmse 2.106009e-05  max resid 0.0001057249 
## ... Similar to previous best
## Run 10 stress 0.2592979 
## Run 11 stress 0.2296912 
## Run 12 stress 0.2504095 
## Run 13 stress 0.2517093 
## Run 14 stress 0.2485717 
## Run 15 stress 0.2713318 
## Run 16 stress 0.2574965 
## Run 17 stress 0.2265261 
## ... Procrustes: rmse 6.446179e-05  max resid 0.0003724996 
## ... Similar to previous best
## Run 18 stress 0.2327687 
## Run 19 stress 0.2704882 
## Run 20 stress 0.2276842 
## *** Best solution repeated 2 times
COI_NMDSmodel
## 
## Call:
## metaMDS(comm = COI_wide_simple, distance = "bray", trymax = 999) 
## 
## global Multidimensional Scaling using monoMDS
## 
## Data:     COI_wide_simple 
## Distance: bray 
## 
## Dimensions: 2 
## Stress:     0.226526 
## Stress type 1, weak ties
## Best solution was repeated 2 times in 20 tries
## The best solution was from try 0 (metric scaling or null solution)
## Scaling: centring, PC rotation, halfchange scaling 
## Species: expanded scores based on 'COI_wide_simple'

get datascores

COI.data.scores<-scores(COI_NMDSmodel,display="sites")
COI.data.scores<- as.data.frame(COI.data.scores)

add environmental variables back in

# Extract the first 5 columns from df1
first_5_columns <- COI_wide[, 1:4]

# Combine the first 5 columns from df1 with df2
COI.data.scores <- cbind(first_5_columns, COI.data.scores)

plot NMDS

plot datascores

xx <- ggplot(COI.data.scores, aes(x = NMDS1, y = NMDS2,colour = station)) +geom_point(aes(colour = station))+theme_bw()+theme(panel.grid.major = element_blank(),
                                                                                           panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
xx

extract hulls

unique(COI.data.scores$station)
## [1] P22  P28  P38  P402 P4   P8   P12 
## Levels: P12 P22 P28 P38 P4 P402 P8
P12 <- COI.data.scores[COI.data.scores$station == "P12", ][chull(COI.data.scores[COI.data.scores$station ==                                                           "P12", c("NMDS1", "NMDS2")]), ]  
P22 <- COI.data.scores[COI.data.scores$station == "P22", ][chull(COI.data.scores[COI.data.scores$station ==                                                           "P22", c("NMDS1", "NMDS2")]), ]  
P28 <- COI.data.scores[COI.data.scores$station == "P28", ][chull(COI.data.scores[COI.data.scores$station ==                                                           "P28", c("NMDS1", "NMDS2")]), ]
P38 <- COI.data.scores[COI.data.scores$station == "P38", ][chull(COI.data.scores[COI.data.scores$station ==                                                           "P38", c("NMDS1", "NMDS2")]), ]  
P4 <- COI.data.scores[COI.data.scores$station == "P4", ][chull(COI.data.scores[COI.data.scores$station ==                                                           "P4", c("NMDS1", "NMDS2")]), ]  
P402 <- COI.data.scores[COI.data.scores$station == "P402", ][chull(COI.data.scores[COI.data.scores$station ==                                                           "P402", c("NMDS1", "NMDS2")]), ]  
P8 <- COI.data.scores[COI.data.scores$station == "P8", ][chull(COI.data.scores[COI.data.scores$station ==                                                           "P8", c("NMDS1", "NMDS2")]), ]  

get hull data

hull.data <- rbind(P12,P22,P28,P38,P402,P4,P8)  #combine grp.a and grp.b
hull.data
##        sample Year Month station        NMDS1        NMDS2
## 12  1809P12Ob   18    09     P12  0.713489965 -0.342848681
## 33  1909P12Ob   19    09     P12  0.455358562 -0.311146940
## 19  1904P12Ob   19    04     P12  0.170347293  0.133811623
## 39  2007P12Ob   20    07     P12  0.457820765  0.104799253
## 26  1907P12Ob   19    07     P12  0.598141034 -0.004805131
## 1   1804P22Ob   18    04     P22 -0.387878578 -0.532164753
## 20  1904P22Ob   19    04     P22 -0.499709794 -0.513621197
## 53  2010P22Ob   20    10     P22 -0.584301914 -0.126212631
## 47  2009P22Ob   20    09     P22 -0.443064235 -0.119353116
## 40  2007P22Ob   20    07     P22 -0.296111165 -0.120300088
## 13  1809P22Ob   18    09     P22 -0.120729672 -0.153832973
## 27  1907P22Ob   19    07     P22 -0.127814701 -0.196333318
## 14  1809P28Ob   18    09     P28  0.402913089 -0.505697376
## 21  1904P28Ob   19    04     P28 -0.102549744 -0.350172019
## 41  2007P28Ob   20    07     P28 -0.057103259 -0.204335220
## 8   1807P28Ob   18    07     P28  0.171593110 -0.098777628
## 54  2010P28Ob   20    10     P28  0.367005687 -0.250153935
## 36  1909P38Ob   19    09     P38  0.188327608 -0.070878325
## 22  1904P38Ob   19    04     P38  0.145904772 -0.160418774
## 42  2007P38Ob   20    07     P38 -0.229100826  0.065102238
## 49  2009P38Ob   20    09     P38 -0.309460006  0.261420853
## 15  1809P38Ob   18    09     P38  0.055886735  0.417824116
## 16 1809P402Ob   18    09    P402  0.336710849  0.079483228
## 30 1907P402Ob   19    07    P402 -0.066377474  0.073864091
## 9  1807P402Ob   18    07    P402 -0.356591639  0.307195379
## 23 1904P402Ob   19    04    P402 -0.378868662  0.551504499
## 50 2009P402Ob   20    09    P402 -0.298333464  0.737893937
## 4  1804P402Ob   18    04    P402 -0.012364394  0.637948606
## 17   1809P4Ob   18    09      P4  0.560735958  0.290274064
## 44   2007P4Ob   20    07      P4  0.004194868  0.180514980
## 31   1907P4Ob   19    07      P4 -0.049093821  0.174387598
## 24   1904P4Ob   19    04      P4 -0.100773759  0.357828001
## 37   1909P4Ob   19    09      P4  0.153803605  0.454601739
## 32   1907P8Ob   19    07      P8 -0.020162991 -0.159863266
## 6    1804P8Ob   18    04      P8 -0.464154873 -0.367341837
## 52   2009P8Ob   20    09      P8 -0.555890115  0.012938163
## 18   1809P8Ob   18    09      P8  0.537098993  0.418450707

Species fitting

vf <- envfit(COI_NMDSmodel, COI_wide, perm = 999)

spp.scrs <- as.data.frame(scores(vf, display = "vectors"))
spp.scrs <- cbind(spp.scrs, Species = rownames(spp.scrs))
####for ggplot
arrow_factor <- ordiArrowMul(vf)
spp.scrs <- as.data.frame(scores(vf, display = "vectors")) * arrow_factor
spp.scrs <- cbind(spp.scrs, Species = rownames(spp.scrs), Pvalues = vf$vectors$pvals, R_squared = vf$vectors$r)

# select significance similarly to `plot(vf, p.max = 0.01)`
spp.scrs <- subset(spp.scrs, Pvalues < 0.05)
spp.scrs <- subset(spp.scrs, R_squared > 0.3)
spp.scrs
##                                NMDS1      NMDS2                   Species
## Acartia longiremis        -0.4822590 -0.4714815        Acartia longiremis
## Balanus glandula          -0.5855395  0.3388121          Balanus glandula
## Balanus rostratus         -0.3268131 -0.5340929         Balanus rostratus
## Crangon crangon           -0.5018258  0.4302424           Crangon crangon
## Discorsopagurus schmitti  -0.4134237 -0.4797192  Discorsopagurus schmitti
## Eucalanus bungii          -0.3503248 -0.5593713          Eucalanus bungii
## Fabia subquadrata         -0.6405165 -0.5734933         Fabia subquadrata
## Glebocarcinus oregonensis -0.4339432 -0.5031502 Glebocarcinus oregonensis
## Heptacarpus tridens       -0.4134237 -0.4797192       Heptacarpus tridens
## Lepidasthenia berkeleyae  -0.2436771  0.7500000  Lepidasthenia berkeleyae
## Metacarcinus gracilis     -0.5549191 -0.3089166     Metacarcinus gracilis
## Metridia pacifica          0.5004458 -0.3889989         Metridia pacifica
## Pagurus beringanus        -0.5880015 -0.2561552        Pagurus beringanus
## Paraeuchaeta elongata      0.6999164 -0.0342669     Paraeuchaeta elongata
## Pasiphaea pacifica         0.4645366 -0.5290621        Pasiphaea pacifica
## Pseudocalanus moultoni    -0.6108149 -0.5860453    Pseudocalanus moultoni
## Rathkea octopunctata      -0.2036083  0.6974968      Rathkea octopunctata
## Sarsia bella              -0.4134237 -0.4797192              Sarsia bella
## Scyra acutifrons          -0.4922006 -0.5224270          Scyra acutifrons
## Stomotoca atra            -0.5394189 -0.4749028            Stomotoca atra
##                           Pvalues R_squared
## Acartia longiremis          0.001 0.3511338
## Balanus glandula            0.001 0.3532810
## Balanus rostratus           0.001 0.3026506
## Crangon crangon             0.001 0.3372921
## Discorsopagurus schmitti    0.001 0.3095886
## Eucalanus bungii            0.001 0.3362778
## Fabia subquadrata           0.001 0.5705882
## Glebocarcinus oregonensis   0.001 0.3407883
## Heptacarpus tridens         0.001 0.3095886
## Lepidasthenia berkeleyae    0.001 0.4800564
## Metacarcinus gracilis       0.001 0.3113756
## Metridia pacifica           0.001 0.3101416
## Pagurus beringanus          0.001 0.3175485
## Paraeuchaeta elongata       0.001 0.3790695
## Pasiphaea pacifica          0.001 0.3826542
## Pseudocalanus moultoni      0.001 0.5531329
## Rathkea octopunctata        0.001 0.4075549
## Sarsia bella                0.001 0.3095886
## Scyra acutifrons            0.001 0.3976998
## Stomotoca atra              0.001 0.3987141
COI_Species_plot <- ggplot(COI.data.scores, aes(x = NMDS1, y = NMDS2)) +geom_point(aes(colour =  station))+theme_bw()+theme(panel.grid.major = element_blank(),
                                                                                           panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+
  geom_segment(data = spp.scrs,size=0.4,
               aes(x = 0, xend = NMDS1, y = 0, yend = NMDS2),
               arrow = arrow(length = unit(0.2, "cm")), colour = "darkgrey")+ 
  geom_polygon(data=hull.data,aes(x=NMDS1,y=NMDS2,fill=station,group=station),alpha=0.20, size=0.1, linetype=1, colour="black") +
  annotate("text", label = "2D Stress: 0.227", x = 0.55, y = 0.8, size = 5, colour = "black")+
  geom_label_repel(data = spp.scrs, aes(x = NMDS1, y = NMDS2, label = Species),
                  size = 2, fontface="bold", fill="white", label.padding = unit(0.15, "lines"), box.padding = unit(0.01, "lines"), label.size = 0.02)
COI_Species_plot

save plot

setwd("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-3/output")
ggsave(filename = "COI_NMDS_oblique.png", plot = COI_Species_plot, height = 170, width = 200, units="mm", device='png', dpi=300)

Cop16S

Data Setup

Remove non-zooplanton phyla

unique(Cop16S_counts$Phylum)
##  [1] "Arthropoda"      NA                "Mollusca"        "Phoronida"      
##  [5] "Annelida"        "Nemertea"        "Cnidaria"        "Chaetognatha"   
##  [9] "Echinodermata"   "Nematoda"        "Bryozoa"         "Acanthocephala" 
## [13] "Platyhelminthes"
levels_to_remove <- c('Bacillariophyta', 'Rhodophyta',NA,'Oomycota','Ascomycota','Onychophora','Cryptophyceae__p','Zoopagomycota','Phaeophyceae__p','Haptophyta','Basidiomycota','Chlorophyta','Dinophyceae__p','Discosea','Tubulinea','Dictyochophyceae__p','Bacteroidota','Blastocladiomycota','Pseudomonadota','Chrysophyceae__p','Malawimonadida__c__p','Prasinodermophyta','Chrysoparadoxa__f__o__c__p','Apusomonadida__c__p','Streptophyta','Pelagophyceae__p','Eustigmatophyceae__p')

# Remove those levels and drop them from the factor
Cop16S_counts <- droplevels(Cop16S_counts[!Cop16S_counts$Phylum %in% levels_to_remove,])

subset to copepods only

Cop16S_counts<-subset(Cop16S_counts,Class=="Hexanauplia")

remove non-species rows

Cop16S_comp<- Cop16S_counts[complete.cases(Cop16S_counts), ]

convert to presence-absence

Cop16S_comp_pa<-Cop16S_comp %>%
  mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))

remove excess columns

Cop16S_comp_pa <- Cop16S_comp_pa[ -c(1:7) ]

wide to long format

Cop16S_long<-melt(Cop16S_comp_pa, id.vars=c("Species"))

add up multiple lines per station and convert back to presence-absence

Cop16S_long_sum <- Cop16S_long %>%
  group_by(Species,variable) %>%
  summarise(
    value = sum(value))

Cop16S_long_sum<-Cop16S_long_sum %>%
  mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))

split code string up to extract metadata

Cop16S_long_sum$variable<-str_split_fixed(Cop16S_long_sum$variable,"_",4)

remove samples of non-interest

unique(Cop16S_long_sum$variable[,3])
##  [1] "P12"   "P22"   "P28"   "P38"   "P402"  "P4"    "P8"    "CA042" "P105" 
## [10] "P123"  "P132"  "P136"  "P381"  "P7"    ""
unique(Cop16S_long_sum$variable[,2])
##  [1] "1804"     "1806"     "1807"     "1809"     "1904"     "1907"    
##  [7] "1909"     "2007"     "2009"     "2010"     "Control1" "Control2"
#remove excess stations
levels_to_remove <- c('CA042','',"P105","P123","P132","P136","P381","P7")
Cop16S_long_sum <- droplevels(Cop16S_long_sum[!Cop16S_long_sum$variable[,3] %in% levels_to_remove,])

split up replicate columns

unique(Cop16S_long_sum$variable[,4])
## [1] "Ob"  "Ve"  "ObB" "VeA" "ObA" "VeB"
Cop16S_long_sum <- Cop16S_long_sum %>%
  mutate(
    tow_type = substr(variable[,4], 1, 2),
    replicate = substr(variable[,4], 3, nchar(variable[,4]))
  )

split up year and month columns

Cop16S_long_sum <- Cop16S_long_sum %>%
  mutate(
    Year = substr(variable[,2], 1, 2),
    Month = substr(variable[,2], 3, 4)
  )

make station column

Cop16S_long_sum$station<-Cop16S_long_sum$variable[,3]

remove vertical samples

Cop16S_long_sum<-subset(Cop16S_long_sum,tow_type=="Ob")

merge replicates (and oblique and vertical tows)

colnames(Cop16S_long_sum)
## [1] "Species"   "variable"  "value"     "tow_type"  "replicate" "Year"     
## [7] "Month"     "station"
Cop16S_long_sum$sample<-paste(Cop16S_long_sum$Year,Cop16S_long_sum$Month,Cop16S_long_sum$station,Cop16S_long_sum$tow_type,sep="")

Cop16S_long_sum <- Cop16S_long_sum %>%
  group_by(Year,Month,station,sample,Species) %>%
  summarise(
    value = sum(value))
#convert back to presence-absence
Cop16S_long_sum<-Cop16S_long_sum %>%
  mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))

back to wide format

Cop16S_wide<-dcast(Cop16S_long_sum, sample+Year+Month+station ~ Species )

remove any columns with all zeroes

target_cols <- 6:ncol(Cop16S_wide)

non_zero_cols <- target_cols[colSums(Cop16S_wide[, target_cols]) != 0]

Cop16S_wide <- Cop16S_wide[, c(1:5, non_zero_cols)]

remove environmental data

Cop16S_wide_simple<-Cop16S_wide[ , 5:17]

Make environmental columns categorical

# Specify the columns to convert to factors
cols_to_factor <- c("station", "Year", "Month")

# Convert the specified columns to factors
Cop16S_wide <- Cop16S_wide %>%
  mutate_at(vars(one_of(cols_to_factor)), as.factor)

PERMANOVA

dist<-vegdist(Cop16S_wide_simple, method='bray')
perm<-adonis2(dist~station*Month, data=Cop16S_wide, permutations = 999, method="bray")
perm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist ~ station * Month, data = Cop16S_wide, permutations = 999, method = "bray")
##               Df SumOfSqs      R2      F Pr(>F)    
## station        6  0.82213 0.32630 3.6804  0.001 ***
## Month          3  0.19740 0.07835 1.7673  0.099 .  
## station:Month 13  0.60649 0.24072 1.2531  0.229    
## Residual      24  0.89352 0.35464                  
## Total         46  2.51954 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

station comparisons

adonis.pair(vegdist(Cop16S_wide_simple),Cop16S_wide$station)
##     combination    SumsOfSqs      MeanSqs     F.Model           R2     P.value
## 1   P12 <-> P22  0.459598705  0.459598705 13.07684176  0.501473372 0.000999001
## 2   P12 <-> P28  0.058984168  0.058984168  1.21783820  0.085655652 0.336663337
## 3   P12 <-> P38  0.036826815  0.036826815  1.08530367  0.082940656 0.383616384
## 4    P12 <-> P4  0.103127635  0.103127635  2.45381936  0.197033479 0.086913087
## 5  P12 <-> P402  0.049619527  0.049619527  1.18099498  0.096953901 0.347652348
## 6    P12 <-> P8  0.097116997  0.097116997  2.10779880  0.160804940 0.154845155
## 7   P22 <-> P28  0.239546946  0.239546946  5.47711809  0.281207829 0.016983017
## 8   P22 <-> P38  0.323144638  0.323144638 10.77549533  0.453218542 0.001998002
## 9    P22 <-> P4  0.170734702  0.170734702  4.66092680  0.297615004 0.003996004
## 10 P22 <-> P402  0.411842178  0.411842178 11.11002693  0.480744872 0.000999001
## 11   P22 <-> P8  0.185915832  0.185915832  4.55775099  0.275263892 0.030969031
## 12  P28 <-> P38  0.008981190  0.008981190  0.20753116  0.015713093 0.835164835
## 13   P28 <-> P4  0.067694554  0.067694554  1.29349839  0.105218088 0.322677323
## 14 P28 <-> P402  0.093247579  0.093247579  1.81189199  0.131183475 0.171828172
## 15   P28 <-> P8 -0.002244164 -0.002244164 -0.04066551 -0.003400316 0.928071928
## 16   P38 <-> P4  0.081850068  0.081850068  2.31718323  0.188126066 0.121878122
## 17 P38 <-> P402  0.068078615  0.068078615  1.89527407  0.146974315 0.246753247
## 18   P38 <-> P8  0.022882636  0.022882636  0.57234843  0.049458278 0.492507493
## 19  P4 <-> P402  0.129215853  0.129215853  2.84889115  0.240435254 0.068931069
## 20    P4 <-> P8  0.053734225  0.053734225  1.06787494  0.106067561 0.407592408
## 21  P402 <-> P8  0.122224665  0.122224665  2.47037891  0.198099747 0.095904096
##    P.value.corrected
## 1         0.01048951
## 2         0.45629371
## 3         0.47387906
## 4         0.22377622
## 5         0.45629371
## 6         0.29561348
## 7         0.07132867
## 8         0.01398601
## 9         0.02097902
## 10        0.01048951
## 11        0.10839161
## 12        0.87692308
## 13        0.45629371
## 14        0.30069930
## 15        0.92807193
## 16        0.25594406
## 17        0.39860140
## 18        0.54435039
## 19        0.20679321
## 20        0.47552448
## 21        0.22377622

month comparisons

adonis.pair(vegdist(Cop16S_wide_simple),Cop16S_wide$Month)
##   combination    SumsOfSqs      MeanSqs      F.Model           R2   P.value
## 1   04 <-> 07 0.0396521431 0.0396521431 0.8590613962 3.455727e-02 0.4235764
## 2   04 <-> 09 0.0348810420 0.0348810420 0.5427633964 1.970621e-02 0.6293706
## 3   04 <-> 10 0.0669444714 0.0669444714 1.0879229397 9.811783e-02 0.3326673
## 4   07 <-> 09 0.0000125265 0.0000125265 0.0002301713 6.974840e-06 0.9320679
## 5   07 <-> 10 0.0586162538 0.0586162538 1.3871373305 7.977951e-02 0.2717283
## 6   09 <-> 10 0.0565087073 0.0565087073 0.8236763897 4.155013e-02 0.4895105
##   P.value.corrected
## 1         0.7342657
## 2         0.7552448
## 3         0.7342657
## 4         0.9320679
## 5         0.7342657
## 6         0.7342657

run NMDS

Cop16S_NMDSmodel <- metaMDS(Cop16S_wide_simple, distance = "bray",trymax = 999)
## Run 0 stress 0.09415055 
## Run 1 stress 0.1209909 
## Run 2 stress 0.09415044 
## ... New best solution
## ... Procrustes: rmse 7.711639e-05  max resid 0.000152219 
## ... Similar to previous best
## Run 3 stress 0.09681938 
## Run 4 stress 0.1219042 
## Run 5 stress 0.1080135 
## Run 6 stress 0.1082108 
## Run 7 stress 0.09415049 
## ... Procrustes: rmse 0.0001767031  max resid 0.000344989 
## ... Similar to previous best
## Run 8 stress 0.1079082 
## Run 9 stress 0.1370283 
## Run 10 stress 0.1304382 
## Run 11 stress 0.09415053 
## ... Procrustes: rmse 6.856108e-05  max resid 0.0001420486 
## ... Similar to previous best
## Run 12 stress 0.1199078 
## Run 13 stress 0.09429338 
## ... Procrustes: rmse 0.003202509  max resid 0.01605773 
## Run 14 stress 0.1360864 
## Run 15 stress 0.09377016 
## ... New best solution
## ... Procrustes: rmse 0.02075863  max resid 0.1284749 
## Run 16 stress 0.138071 
## Run 17 stress 0.1219042 
## Run 18 stress 0.0939106 
## ... Procrustes: rmse 0.003483345  max resid 0.01834119 
## Run 19 stress 0.1082108 
## Run 20 stress 0.1079082 
## Run 21 stress 0.09681938 
## Run 22 stress 0.1081069 
## Run 23 stress 0.09415048 
## ... Procrustes: rmse 0.02073858  max resid 0.1284859 
## Run 24 stress 0.0941504 
## ... Procrustes: rmse 0.02077492  max resid 0.128555 
## Run 25 stress 0.1157494 
## Run 26 stress 0.09677197 
## Run 27 stress 0.09429336 
## Run 28 stress 0.09677196 
## Run 29 stress 0.09415039 
## ... Procrustes: rmse 0.02077787  max resid 0.1285601 
## Run 30 stress 0.1082108 
## Run 31 stress 0.0937701 
## ... New best solution
## ... Procrustes: rmse 9.473911e-05  max resid 0.0002575655 
## ... Similar to previous best
## *** Best solution repeated 1 times
Cop16S_NMDSmodel
## 
## Call:
## metaMDS(comm = Cop16S_wide_simple, distance = "bray", trymax = 999) 
## 
## global Multidimensional Scaling using monoMDS
## 
## Data:     Cop16S_wide_simple 
## Distance: bray 
## 
## Dimensions: 2 
## Stress:     0.0937701 
## Stress type 1, weak ties
## Best solution was repeated 1 time in 31 tries
## The best solution was from try 31 (random start)
## Scaling: centring, PC rotation, halfchange scaling 
## Species: expanded scores based on 'Cop16S_wide_simple'

get datascores

Cop16S.data.scores<-scores(Cop16S_NMDSmodel,display="sites")
Cop16S.data.scores<- as.data.frame(Cop16S.data.scores)

add environmental variables back in

# Extract the first 5 columns from df1
first_5_columns <- Cop16S_wide[, 1:4]

# Combine the first 5 columns from df1 with df2
Cop16S.data.scores <- cbind(first_5_columns, Cop16S.data.scores)

plot NMDS

plot datascores

xx <- ggplot(Cop16S.data.scores, aes(x = NMDS1, y = NMDS2,colour = station)) +geom_point(aes(colour = station))+theme_bw()+theme(panel.grid.major = element_blank(),
                                                                                           panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
xx

extract hulls

unique(Cop16S.data.scores$station)
## [1] P12  P22  P28  P38  P402 P4   P8  
## Levels: P12 P22 P28 P38 P4 P402 P8
P12 <- Cop16S.data.scores[Cop16S.data.scores$station == "P12", ][chull(Cop16S.data.scores[Cop16S.data.scores$station ==                                                           "P12", c("NMDS1", "NMDS2")]), ]  
P22 <- Cop16S.data.scores[Cop16S.data.scores$station == "P22", ][chull(Cop16S.data.scores[Cop16S.data.scores$station ==                                                           "P22", c("NMDS1", "NMDS2")]), ]  
P28 <- Cop16S.data.scores[Cop16S.data.scores$station == "P28", ][chull(Cop16S.data.scores[Cop16S.data.scores$station ==                                                           "P28", c("NMDS1", "NMDS2")]), ]
P38 <- Cop16S.data.scores[Cop16S.data.scores$station == "P38", ][chull(Cop16S.data.scores[Cop16S.data.scores$station ==                                                           "P38", c("NMDS1", "NMDS2")]), ]  
P4 <- Cop16S.data.scores[Cop16S.data.scores$station == "P4", ][chull(Cop16S.data.scores[Cop16S.data.scores$station ==                                                           "P4", c("NMDS1", "NMDS2")]), ]  
P402 <- Cop16S.data.scores[Cop16S.data.scores$station == "P402", ][chull(Cop16S.data.scores[Cop16S.data.scores$station ==                                                           "P402", c("NMDS1", "NMDS2")]), ]  
P8 <- Cop16S.data.scores[Cop16S.data.scores$station == "P8", ][chull(Cop16S.data.scores[Cop16S.data.scores$station ==                                                           "P8", c("NMDS1", "NMDS2")]), ]  

get hull data

hull.data <- rbind(P12,P22,P28,P38,P402,P4,P8)  #combine grp.a and grp.b
hull.data
##        sample Year Month station       NMDS1         NMDS2
## 11  1809P12Ob   18    09     P12  0.09483426 -0.4055790381
## 1   1804P12Ob   18    04     P12 -0.58838114  0.0001825864
## 8   1807P12Ob   18    07     P12 -0.58838125  0.0001850032
## 39  2009P12Ob   20    09     P12 -0.24745734  0.1907048364
## 27  1909P12Ob   19    09     P12 -0.02759480 -0.0330026043
## 2   1804P22Ob   18    04     P22  0.67566518 -0.1702912021
## 22  1907P22Ob   19    07     P22  0.46873051 -0.1283340394
## 18  1904P22Ob   19    04     P22  0.46873051 -0.1283340394
## 46  2010P22Ob   20    10     P22 -0.01286390  0.4407132778
## 12  1809P22Ob   18    09     P22  0.54822128  0.2182857927
## 28  1909P22Ob   19    09     P22  0.56936244  0.1569100030
## 34  2007P28Ob   20    07     P28  0.46738037 -0.2617466878
## 13  1809P28Ob   18    09     P28 -0.58838111  0.0001819462
## 47  2010P28Ob   20    10     P28 -0.58838119  0.0001837652
## 29  1909P28Ob   19    09     P28  0.01305402  0.2557123435
## 9   1807P28Ob   18    07     P28  0.17889316  0.2350923578
## 3   1804P28Ob   18    04     P28  0.42744692  0.0867423072
## 19  1904P38Ob   19    04     P38 -0.02759480 -0.0330026043
## 42  2009P38Ob   20    09     P38 -0.20214800 -0.0299290709
## 14  1809P38Ob   18    09     P38 -0.58838113  0.0001824058
## 4   1804P38Ob   18    04     P38  0.01305402  0.2557123435
## 30  1909P38Ob   19    09     P38  0.34162097  0.2308266212
## 43 2009P402Ob   20    09    P402  0.25438110 -0.8030822135
## 15 1809P402Ob   18    09    P402 -0.58838113  0.0001823084
## 5  1804P402Ob   18    04    P402 -0.20214800 -0.0299290709
## 25 1907P402Ob   19    07    P402 -0.20214800 -0.0299290709
## 16   1809P4Ob   18    09      P4  0.35269132 -0.2965862860
## 6    1804P4Ob   18    04      P4 -0.58838125  0.0001850558
## 37   2007P4Ob   20    07      P4  0.17889316  0.2350923578
## 17   1809P8Ob   18    09      P8 -0.11136462 -0.2712000093
## 45   2009P8Ob   20    09      P8 -0.58838125  0.0001850491
## 31   1909P8Ob   19    09      P8  0.01305402  0.2557123435
## 26   1907P8Ob   19    07      P8  0.34162097  0.2308266212
## 7    1804P8Ob   18    04      P8  0.48626850  0.0780765961

Species fitting

vf <- envfit(Cop16S_NMDSmodel, Cop16S_wide, perm = 999)

spp.scrs <- as.data.frame(scores(vf, display = "vectors"))
spp.scrs <- cbind(spp.scrs, Species = rownames(spp.scrs))
####for ggplot
arrow_factor <- ordiArrowMul(vf)
spp.scrs <- as.data.frame(scores(vf, display = "vectors")) * arrow_factor
spp.scrs <- cbind(spp.scrs, Species = rownames(spp.scrs), Pvalues = vf$vectors$pvals, R_squared = vf$vectors$r)

# select significance similarly to `plot(vf, p.max = 0.01)`
spp.scrs <- subset(spp.scrs, Pvalues < 0.05)
spp.scrs <- subset(spp.scrs, R_squared > 0.3)
spp.scrs
##                          NMDS1         NMDS2            Species Pvalues
## Acartia longiremis  0.31076910  0.7500000000 Acartia longiremis   0.001
## Calanus glacialis   0.57387237  0.0102990728  Calanus glacialis   0.001
## Calanus marshallae  0.68216569 -0.0006285338 Calanus marshallae   0.001
## Eucalanus bungii    0.61255339 -0.4684059248   Eucalanus bungii   0.001
## Metridia lucens     0.71595441  0.1008227318    Metridia lucens   0.001
## Metridia pacifica  -0.05647834  0.5267238704  Metridia pacifica   0.015
##                    R_squared
## Acartia longiremis 0.7388724
## Calanus glacialis  0.3693206
## Calanus marshallae 0.5216908
## Eucalanus bungii   0.6666176
## Metridia lucens    0.5860464
## Metridia pacifica  0.3146037
Cop16S_Species_plot <- ggplot(Cop16S.data.scores, aes(x = NMDS1, y = NMDS2)) +geom_point(aes(colour =  station))+theme_bw()+theme(panel.grid.major = element_blank(),
                                                                                           panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+
  geom_segment(data = spp.scrs,size=0.4,
               aes(x = 0, xend = NMDS1, y = 0, yend = NMDS2),
               arrow = arrow(length = unit(0.2, "cm")), colour = "darkgrey")+ 
  geom_polygon(data=hull.data,aes(x=NMDS1,y=NMDS2,fill=station,group=station),alpha=0.20, size=0.1, linetype=1, colour="black") +
  annotate("text", label = "2D Stress: 0.92", x = 0.55, y = 0.8, size = 5, colour = "black")+
  geom_label_repel(data = spp.scrs, aes(x = NMDS1, y = NMDS2, label = Species),
                  size = 3, fontface="bold", fill="white", label.padding = unit(0.15, "lines"), box.padding = unit(0.16, "lines"), label.size = 0.05)
Cop16S_Species_plot

save plot

setwd("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-3/output")
ggsave(filename = "Cop16S_NMDS_oblique.png", plot = Cop16S_Species_plot, height = 170, width = 200, units="mm", device='png', dpi=300)

Mol16S

Data Setup

Remove non-zooplanton phyla

unique(Mol16S_counts$Phylum)
##  [1] "Arthropoda"          "Nemertea"            "Mollusca"           
##  [4] "Chordata"            "Annelida"            "Cnidaria"           
##  [7] "Echinodermata"       NA                    "Rotifera"           
## [10] "Chlorophyta"         "Bacillariophyta"     "Bryozoa"            
## [13] "Gastrotricha"        "Phoronida"           "Entoprocta"         
## [16] "Hemichordata"        "Bacteroidota"        "Platyhelminthes"    
## [19] "Choanoflagellata__p"
levels_to_remove <- c('Bacillariophyta', 'Rhodophyta',NA,'Oomycota','Ascomycota','Onychophora','Cryptophyceae__p','Zoopagomycota','Phaeophyceae__p','Haptophyta','Basidiomycota','Chlorophyta','Dinophyceae__p','Discosea','Tubulinea','Dictyochophyceae__p','Bacteroidota','Blastocladiomycota','Pseudomonadota','Chrysophyceae__p','Malawimonadida__c__p','Prasinodermophyta','Chrysoparadoxa__f__o__c__p','Apusomonadida__c__p','Streptophyta','Pelagophyceae__p','Eustigmatophyceae__p')

# Remove those levels and drop them from the factor
Mol16S_counts <- droplevels(Mol16S_counts[!Mol16S_counts$Phylum %in% levels_to_remove,])

subset to copepods only

Mol16S_counts<-subset(Mol16S_counts,Phylum=="Mollusca")

remove non-species rows

Mol16S_comp<- Mol16S_counts[complete.cases(Mol16S_counts), ]

convert to presence-absence

Mol16S_comp_pa<-Mol16S_comp %>%
  mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))

remove excess columns

Mol16S_comp_pa <- Mol16S_comp_pa[ -c(1:7) ]

wide to long format

Mol16S_long<-melt(Mol16S_comp_pa, id.vars=c("Species"))

add up multiple lines per station and convert back to presence-absence

Mol16S_long_sum <- Mol16S_long %>%
  group_by(Species,variable) %>%
  summarise(
    value = sum(value))

Mol16S_long_sum<-Mol16S_long_sum %>%
  mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))

split code string up to extract metadata

Mol16S_long_sum$variable<-str_split_fixed(Mol16S_long_sum$variable,"_",4)

remove samples of non-interest

unique(Mol16S_long_sum$variable[,3])
##  [1] "P12"   "P22"   "P28"   "P38"   "P402"  "P4"    "P8"    "CA042" "P105" 
## [10] "P123"  "P132"  "P136"  "P381"  "P7"    ""
unique(Mol16S_long_sum$variable[,2])
##  [1] "1804"     "1806"     "1807"     "1809"     "1904"     "1907"    
##  [7] "1909"     "2007"     "2009"     "2010"     "Control2"
#remove excess stations
levels_to_remove <- c('CA042','',"P105","P123","P132","P136","P381","P7")
Mol16S_long_sum <- droplevels(Mol16S_long_sum[!Mol16S_long_sum$variable[,3] %in% levels_to_remove,])

split up replicate columns

unique(Mol16S_long_sum$variable[,4])
## [1] "Ob"  "VeA" "Ve"  "ObB" "ObA" "VeB"
Mol16S_long_sum <- Mol16S_long_sum %>%
  mutate(
    tow_type = substr(variable[,4], 1, 2),
    replicate = substr(variable[,4], 3, nchar(variable[,4]))
  )

split up year and month columns

Mol16S_long_sum <- Mol16S_long_sum %>%
  mutate(
    Year = substr(variable[,2], 1, 2),
    Month = substr(variable[,2], 3, 4)
  )

make station column

Mol16S_long_sum$station<-Mol16S_long_sum$variable[,3]

remove vertical samples

Mol16S_long_sum<-subset(Mol16S_long_sum,tow_type=="Ob")

merge replicates

colnames(Mol16S_long_sum)
## [1] "Species"   "variable"  "value"     "tow_type"  "replicate" "Year"     
## [7] "Month"     "station"
Mol16S_long_sum$sample<-paste(Mol16S_long_sum$Year,Mol16S_long_sum$Month,Mol16S_long_sum$station,Mol16S_long_sum$tow_type,sep="")

Mol16S_long_sum <- Mol16S_long_sum %>%
  group_by(Year,Month,station,sample,Species) %>%
  summarise(
    value = sum(value))
#convert back to presence-absence
Mol16S_long_sum<-Mol16S_long_sum %>%
  mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))

back to wide format

Mol16S_wide<-dcast(Mol16S_long_sum, sample+Year+Month+station ~ Species )

remove any columns with all zeroes

target_cols <- 6:ncol(Mol16S_wide)

non_zero_cols <- target_cols[colSums(Mol16S_wide[, target_cols]) != 0]

Mol16S_wide <- Mol16S_wide[, c(1:5, non_zero_cols)]

remove rows with all zeroes

row_sums <- rowSums(Mol16S_wide[, 5:39])

# Create a logical vector to identify rows where the sum is not zero
non_zero_rows <- row_sums != 0

# Subset the data frame to keep only rows with a non-zero sum
Mol16S_wide <- Mol16S_wide[non_zero_rows, ]

remove environmental data

Mol16S_wide_simple<-Mol16S_wide[ , 5:39]

Make environmental columns categorical

# Specify the columns to convert to factors
cols_to_factor <- c("station", "Year", "Month")

# Convert the specified columns to factors
Mol16S_wide <- Mol16S_wide %>%
  mutate_at(vars(one_of(cols_to_factor)), as.factor)

PERMANOVA

dist<-vegdist(Mol16S_wide_simple, method='bray')
perm<-adonis2(dist~station*Month, data=Mol16S_wide, permutations = 999, method="bray")
perm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist ~ station * Month, data = Mol16S_wide, permutations = 999, method = "bray")
##               Df SumOfSqs      R2      F Pr(>F)    
## station        6   3.6598 0.24908 2.8261  0.001 ***
## Month          3   2.1130 0.14381 3.2634  0.001 ***
## station:Month 12   2.6613 0.18112 1.0275  0.427    
## Residual      29   6.2591 0.42599                  
## Total         50  14.6932 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

station comparisons

adonis.pair(vegdist(Mol16S_wide_simple),Mol16S_wide$station)
##     combination SumsOfSqs   MeanSqs  F.Model         R2     P.value
## 1   P12 <-> P22 0.8989843 0.8989843 2.922938 0.18356774 0.030969031
## 2   P12 <-> P28 1.2751264 1.2751264 4.830349 0.27090603 0.006993007
## 3   P12 <-> P38 0.8316399 0.8316399 3.269490 0.20095838 0.020979021
## 4    P12 <-> P4 0.5599412 0.5599412 1.517965 0.10455771 0.152847153
## 5  P12 <-> P402 0.5977402 0.5977402 2.367952 0.15408379 0.048951049
## 6    P12 <-> P8 0.9980493 0.9980493 3.386856 0.19479402 0.005994006
## 7   P22 <-> P28 0.4178084 0.4178084 1.879406 0.13540968 0.158841159
## 8   P22 <-> P38 0.2467244 0.2467244 1.164406 0.08845106 0.337662338
## 9    P22 <-> P4 0.4297562 0.4297562 1.279250 0.09633451 0.291708292
## 10 P22 <-> P402 0.5846626 0.5846626 2.786856 0.18846847 0.014985015
## 11   P22 <-> P8 0.5367516 0.5367516 2.075791 0.13769034 0.073926074
## 12  P28 <-> P38 0.3112528 0.3112528 1.890076 0.13607382 0.125874126
## 13   P28 <-> P4 0.4491097 0.4491097 1.555452 0.11474734 0.205794206
## 14 P28 <-> P402 0.7441091 0.7441091 4.576834 0.27609821 0.008991009
## 15   P28 <-> P8 0.3637451 0.3637451 1.691859 0.11515622 0.141858142
## 16   P38 <-> P4 0.5849334 0.5849334 2.101714 0.14903960 0.074925075
## 17 P38 <-> P402 0.1958246 0.1958246 1.286951 0.09685828 0.288711289
## 18   P38 <-> P8 0.5459444 0.5459444 2.658231 0.16976574 0.006993007
## 19  P4 <-> P402 0.8145723 0.8145723 2.949033 0.19727249 0.002997003
## 20    P4 <-> P8 0.3713381 0.3713381 1.160826 0.08197447 0.369630370
## 21  P402 <-> P8 0.9373031 0.9373031 4.607175 0.26166464 0.000999001
##    P.value.corrected
## 1         0.07226107
## 2         0.02937063
## 3         0.05506993
## 4         0.20847902
## 5         0.10279720
## 6         0.02937063
## 7         0.20847902
## 8         0.35454545
## 9         0.32241443
## 10        0.04495504
## 11        0.13111888
## 12        0.20333513
## 13        0.25421637
## 14        0.03146853
## 15        0.20847902
## 16        0.13111888
## 17        0.32241443
## 18        0.02937063
## 19        0.02937063
## 20        0.36963037
## 21        0.02097902

month comparisons

adonis.pair(vegdist(Mol16S_wide_simple),Mol16S_wide$Month)
##   combination SumsOfSqs   MeanSqs   F.Model         R2     P.value
## 1   04 <-> 07 1.2408784 1.2408784 5.1724181 0.15136237 0.000999001
## 2   04 <-> 09 1.0716234 1.0716234 3.8210471 0.11297838 0.001998002
## 3   04 <-> 10 0.4741938 0.4741938 2.0336954 0.14491517 0.211788212
## 4   07 <-> 09 0.4805478 0.4805478 1.7206800 0.04685861 0.127872128
## 5   07 <-> 10 0.3403936 0.3403936 1.3913116 0.07565048 0.239760240
## 6   09 <-> 10 0.2248726 0.2248726 0.7208019 0.03850272 0.600399600
##   P.value.corrected
## 1       0.005994006
## 2       0.005994006
## 3       0.287712288
## 4       0.255744256
## 5       0.287712288
## 6       0.600399600

run NMDS

Mol16S_NMDSmodel <- metaMDS(Mol16S_wide_simple, distance = "bray",trymax = 999)
## Run 0 stress 0.1420235 
## Run 1 stress 0.1487572 
## Run 2 stress 0.1372033 
## ... New best solution
## ... Procrustes: rmse 0.09545086  max resid 0.3450041 
## Run 3 stress 0.1446668 
## Run 4 stress 0.1516894 
## Run 5 stress 0.1364151 
## ... New best solution
## ... Procrustes: rmse 0.0676417  max resid 0.334001 
## Run 6 stress 0.1382545 
## Run 7 stress 0.1321018 
## ... New best solution
## ... Procrustes: rmse 0.09725584  max resid 0.358806 
## Run 8 stress 0.1575945 
## Run 9 stress 0.1496615 
## Run 10 stress 0.1398927 
## Run 11 stress 0.1416548 
## Run 12 stress 0.141882 
## Run 13 stress 0.1494222 
## Run 14 stress 0.1409771 
## Run 15 stress 0.1353404 
## Run 16 stress 0.1341848 
## Run 17 stress 0.1380038 
## Run 18 stress 0.1381198 
## Run 19 stress 0.1407499 
## Run 20 stress 0.1454678 
## Run 21 stress 0.1412147 
## Run 22 stress 0.1423261 
## Run 23 stress 0.137248 
## Run 24 stress 0.1447911 
## Run 25 stress 0.144383 
## Run 26 stress 0.1455127 
## Run 27 stress 0.1416275 
## Run 28 stress 0.148297 
## Run 29 stress 0.1447304 
## Run 30 stress 0.1459943 
## Run 31 stress 0.1512126 
## Run 32 stress 0.1521908 
## Run 33 stress 0.1424321 
## Run 34 stress 0.1484972 
## Run 35 stress 0.1661877 
## Run 36 stress 0.1421265 
## Run 37 stress 0.1400125 
## Run 38 stress 0.1447517 
## Run 39 stress 0.1423512 
## Run 40 stress 0.1472231 
## Run 41 stress 0.1363783 
## Run 42 stress 0.1419928 
## Run 43 stress 0.1331822 
## Run 44 stress 0.1397504 
## Run 45 stress 0.1445812 
## Run 46 stress 0.1435381 
## Run 47 stress 0.1436104 
## Run 48 stress 0.1422559 
## Run 49 stress 0.1471506 
## Run 50 stress 0.1418283 
## Run 51 stress 0.1353389 
## Run 52 stress 0.1575562 
## Run 53 stress 0.138287 
## Run 54 stress 0.1507768 
## Run 55 stress 0.1438503 
## Run 56 stress 0.1525115 
## Run 57 stress 0.1433958 
## Run 58 stress 0.1425744 
## Run 59 stress 0.1397073 
## Run 60 stress 0.1332761 
## Run 61 stress 0.1412986 
## Run 62 stress 0.1476887 
## Run 63 stress 0.1394618 
## Run 64 stress 0.146797 
## Run 65 stress 0.1418524 
## Run 66 stress 0.151318 
## Run 67 stress 0.1380029 
## Run 68 stress 0.1412927 
## Run 69 stress 0.1456148 
## Run 70 stress 0.1412924 
## Run 71 stress 0.1408664 
## Run 72 stress 0.1353389 
## Run 73 stress 0.136697 
## Run 74 stress 0.1433208 
## Run 75 stress 0.1424781 
## Run 76 stress 0.1532513 
## Run 77 stress 0.1477871 
## Run 78 stress 0.133277 
## Run 79 stress 0.1400122 
## Run 80 stress 0.1773601 
## Run 81 stress 0.1438078 
## Run 82 stress 0.1379331 
## Run 83 stress 0.1353391 
## Run 84 stress 0.1352969 
## Run 85 stress 0.136009 
## Run 86 stress 0.1473051 
## Run 87 stress 0.1353392 
## Run 88 stress 0.1435408 
## Run 89 stress 0.1364491 
## Run 90 stress 0.1371089 
## Run 91 stress 0.1379339 
## Run 92 stress 0.1472542 
## Run 93 stress 0.1689747 
## Run 94 stress 0.1431744 
## Run 95 stress 0.1382259 
## Run 96 stress 0.1422429 
## Run 97 stress 0.1451987 
## Run 98 stress 0.1418413 
## Run 99 stress 0.141887 
## Run 100 stress 0.1391502 
## Run 101 stress 0.1404966 
## Run 102 stress 0.1465317 
## Run 103 stress 0.138387 
## Run 104 stress 0.1444182 
## Run 105 stress 0.1404098 
## Run 106 stress 0.1488803 
## Run 107 stress 0.1477507 
## Run 108 stress 0.1379817 
## Run 109 stress 0.1321025 
## ... Procrustes: rmse 0.001225722  max resid 0.006270148 
## ... Similar to previous best
## *** Best solution repeated 1 times
Mol16S_NMDSmodel
## 
## Call:
## metaMDS(comm = Mol16S_wide_simple, distance = "bray", trymax = 999) 
## 
## global Multidimensional Scaling using monoMDS
## 
## Data:     Mol16S_wide_simple 
## Distance: bray 
## 
## Dimensions: 2 
## Stress:     0.1321018 
## Stress type 1, weak ties
## Best solution was repeated 1 time in 109 tries
## The best solution was from try 7 (random start)
## Scaling: centring, PC rotation, halfchange scaling 
## Species: expanded scores based on 'Mol16S_wide_simple'

get datascores

Mol16S.data.scores<-scores(Mol16S_NMDSmodel,display="sites")
Mol16S.data.scores<- as.data.frame(Mol16S.data.scores)

add environmental variables back in

# Extract the first 5 columns from df1
first_5_columns <- Mol16S_wide[, 1:4]

# Combine the first 5 columns from df1 with df2
Mol16S.data.scores <- cbind(first_5_columns, Mol16S.data.scores)

plot NMDS

plot datascores

xx <- ggplot(Mol16S.data.scores, aes(x = NMDS1, y = NMDS2,colour = station)) +geom_point(aes(colour = station))+theme_bw()+theme(panel.grid.major = element_blank(),
                                                                                           panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
xx

extract hulls

unique(Mol16S.data.scores$station)
## [1] P12  P22  P28  P38  P402 P4   P8  
## Levels: P12 P22 P28 P38 P4 P402 P8
P12 <- Mol16S.data.scores[Mol16S.data.scores$station == "P12", ][chull(Mol16S.data.scores[Mol16S.data.scores$station ==                                                           "P12", c("NMDS1", "NMDS2")]), ]  
P22 <- Mol16S.data.scores[Mol16S.data.scores$station == "P22", ][chull(Mol16S.data.scores[Mol16S.data.scores$station ==                                                           "P22", c("NMDS1", "NMDS2")]), ]  
P28 <- Mol16S.data.scores[Mol16S.data.scores$station == "P28", ][chull(Mol16S.data.scores[Mol16S.data.scores$station ==                                                           "P28", c("NMDS1", "NMDS2")]), ]
P38 <- Mol16S.data.scores[Mol16S.data.scores$station == "P38", ][chull(Mol16S.data.scores[Mol16S.data.scores$station ==                                                           "P38", c("NMDS1", "NMDS2")]), ]  
P4 <- Mol16S.data.scores[Mol16S.data.scores$station == "P4", ][chull(Mol16S.data.scores[Mol16S.data.scores$station ==                                                           "P4", c("NMDS1", "NMDS2")]), ]  
P402 <- Mol16S.data.scores[Mol16S.data.scores$station == "P402", ][chull(Mol16S.data.scores[Mol16S.data.scores$station ==                                                           "P402", c("NMDS1", "NMDS2")]), ]  
P8 <- Mol16S.data.scores[Mol16S.data.scores$station == "P8", ][chull(Mol16S.data.scores[Mol16S.data.scores$station ==                                                           "P8", c("NMDS1", "NMDS2")]), ]  

get hull data

hull.data <- rbind(P12,P22,P28,P38,P402,P4,P8)  #combine grp.a and grp.b
hull.data
##        sample Year Month station       NMDS1        NMDS2
## 1   1804P12Ob   18    04     P12 -0.60581099 -1.016735934
## 45  2009P12Ob   20    09     P12 -1.09573621  0.156920817
## 26  1907P12Ob   19    07     P12 -1.09573621  0.156920817
## 8   1807P12Ob   18    07     P12 -1.09573621  0.156920817
## 33  1909P12Ob   19    09     P12 -0.27812313  0.758923125
## 13  1809P12Ob   18    09     P12  1.04468969  0.563959359
## 2   1804P22Ob   18    04     P22  1.64762904 -0.369985349
## 46  2009P22Ob   20    09     P22 -0.42126733  0.164997664
## 52  2010P22Ob   20    10     P22  0.01754380  0.729211108
## 14  1809P22Ob   18    09     P22  0.43615776  0.522299965
## 3   1804P28Ob   18    04     P28  0.87169812 -0.420405215
## 35  1909P28Ob   19    09     P28  0.72774179 -0.620931643
## 9   1807P28Ob   18    07     P28 -0.45120819  0.305980045
## 28  1907P28Ob   19    07     P28  0.34931289  0.186183521
## 15  1809P28Ob   18    09     P28  0.42679200  0.132118635
## 4   1804P38Ob   18    04     P38  0.58249081  0.008958916
## 48  2009P38Ob   20    09     P38  0.21885311 -0.084867714
## 22  1904P38Ob   19    04     P38  0.03445329 -0.055005676
## 41  2007P38Ob   20    07     P38 -0.15209743  0.059778258
## 29  1907P38Ob   19    07     P38 -0.45550326  0.372213385
## 36  1909P38Ob   19    09     P38  0.19644808  0.860695144
## 5  1804P402Ob   18    04    P402 -0.07954775  0.012289395
## 23 1904P402Ob   19    04    P402 -0.68405829 -0.049143775
## 49 2009P402Ob   20    09    P402 -0.48394532  0.471275917
## 17 1809P402Ob   18    09    P402 -0.18989465  0.907442989
## 30 1907P402Ob   19    07    P402  0.09962929  0.707443731
## 6    1804P4Ob   18    04      P4  0.72774179 -0.620931643
## 31   1907P4Ob   19    07      P4  0.34677570 -0.860880613
## 11   1807P4Ob   18    07      P4 -2.30613418 -0.832022499
## 50   2009P4Ob   20    09      P4 -1.23777925 -0.190676456
## 43   2007P4Ob   20    07      P4 -0.18408763  0.365516922
## 18   1809P4Ob   18    09      P4  1.04468969  0.563959359
## 19   1809P8Ob   18    09      P8  0.18301642 -1.729139172
## 51   2009P8Ob   20    09      P8 -0.36527849 -0.566452149
## 12   1807P8Ob   18    07      P8 -0.17101645 -0.293822299
## 44   2007P8Ob   20    07      P8 -0.07366711 -0.253652622
## 25   1904P8Ob   19    04      P8  0.40903049 -0.220298734
## 7    1804P8Ob   18    04      P8  0.87169812 -0.420405215

Species fitting

vf <- envfit(Mol16S_NMDSmodel, Mol16S_wide, perm = 999)

spp.scrs <- as.data.frame(scores(vf, display = "vectors"))
spp.scrs <- cbind(spp.scrs, Species = rownames(spp.scrs))
####for ggplot
arrow_factor <- ordiArrowMul(vf)
spp.scrs <- as.data.frame(scores(vf, display = "vectors")) * arrow_factor
spp.scrs <- cbind(spp.scrs, Species = rownames(spp.scrs), Pvalues = vf$vectors$pvals, R_squared = vf$vectors$r)

# select significance similarly to `plot(vf, p.max = 0.01)`
spp.scrs <- subset(spp.scrs, Pvalues < 0.05)
spp.scrs <- subset(spp.scrs, R_squared > 0.3)
spp.scrs
##                      NMDS1      NMDS2         Species Pvalues R_squared
## Lacuna vincta    0.5425525 -0.7113848   Lacuna vincta   0.001 0.3093587
## Littorina plena -0.8859512  0.2040507 Littorina plena   0.001 0.3194518
## Mya arenaria    -0.7962286 -0.4204621    Mya arenaria   0.003 0.3133538
Mol16S_Species_plot <- ggplot(Mol16S.data.scores, aes(x = NMDS1, y = NMDS2)) +geom_point(aes(colour =  station))+theme_bw()+theme(panel.grid.major = element_blank(),
                                                                                           panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+
  geom_segment(data = spp.scrs,size=0.4,
               aes(x = 0, xend = NMDS1, y = 0, yend = NMDS2),
               arrow = arrow(length = unit(0.2, "cm")), colour = "darkgrey")+ 
  geom_polygon(data=hull.data,aes(x=NMDS1,y=NMDS2,fill=station,group=station),alpha=0.20, size=0.1, linetype=1, colour="black") +
  annotate("text", label = "2D Stress: 0.135", x = -0.55, y = 0.8, size = 5, colour = "black")+
  geom_label_repel(data = spp.scrs, aes(x = NMDS1, y = NMDS2, label = Species),
                  size = 3, fontface="bold", fill="white", label.padding = unit(0.15, "lines"), box.padding = unit(0.16, "lines"), label.size = 0.05)
Mol16S_Species_plot

save plot

setwd("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-3/output")
ggsave(filename = "Mol16S_NMDS_oblique.png", plot = Mol16S_Species_plot, height = 170, width = 200, units="mm", device='png', dpi=300)

M28S

Data Setup

Remove non-zooplanton phyla

unique(M28S_counts$Phylum)
##  [1] "Arthropoda"        NA                  "Cnidaria"         
##  [4] "Dinophyceae__p"    "Raphidophyceae__p" "Bacillariophyta"  
##  [7] "Chordata"          "Ctenophora"        "Chytridiomycota"  
## [10] "Annelida"          "Streptophyta"      "Acantharea__p"
levels_to_remove <- c('Bacillariophyta', 'Rhodophyta',NA,'Oomycota','Ascomycota','Onychophora','Cryptophyceae__p','Zoopagomycota','Phaeophyceae__p','Haptophyta','Basidiomycota','Chlorophyta','Dinophyceae__p','Discosea','Tubulinea','Dictyochophyceae__p','Bacteroidota','Blastocladiomycota','Pseudomonadota','Chrysophyceae__p','Malawimonadida__c__p','Prasinodermophyta','Chrysoparadoxa__f__o__c__p','Apusomonadida__c__p','Streptophyta','Pelagophyceae__p','Eustigmatophyceae__p','Raphidophyceae__p','Chytridiomycota','Acantharea__p')

# Remove those levels and drop them from the factor
M28S_counts <- droplevels(M28S_counts[!M28S_counts$Phylum %in% levels_to_remove,])

remove non-species rows

M28S_comp<- M28S_counts[complete.cases(M28S_counts), ]

convert to presence-absence

M28S_comp_pa<-M28S_comp %>%
  mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))

remove excess columns

M28S_comp_pa <- M28S_comp_pa[ -c(1:7) ]

wide to long format

M28S_long<-melt(M28S_comp_pa, id.vars=c("Species"))

add up multiple lines per station and convert back to presence-absence

M28S_long_sum <- M28S_long %>%
  group_by(Species,variable) %>%
  summarise(
    value = sum(value))

M28S_long_sum<-M28S_long_sum %>%
  mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))

split code string up to extract metadata

M28S_long_sum$variable<-str_split_fixed(M28S_long_sum$variable,"_",4)

remove samples of non-interest

unique(M28S_long_sum$variable[,3])
##  [1] "P12"   "P22"   "P28"   "P38"   "P402"  "P4"    "P8"    "CA042" "P105" 
## [10] "P123"  "P132"  "P136"  "P381"  "P7"    ""
unique(M28S_long_sum$variable[,2])
##  [1] "1804"     "1806"     "1807"     "1809"     "1904"     "1907"    
##  [7] "1909"     "2007"     "2009"     "2010"     "Control1" "Control2"
#remove excess stations
levels_to_remove <- c('CA042','',"P105","P123","P132","P136","P381","P7")
M28S_long_sum <- droplevels(M28S_long_sum[!M28S_long_sum$variable[,3] %in% levels_to_remove,])

split up replicate columns

unique(M28S_long_sum$variable[,4])
## [1] "Ob"  "Ve"  "ObB" "VeA" "ObA" "VeB"
M28S_long_sum <- M28S_long_sum %>%
  mutate(
    tow_type = substr(variable[,4], 1, 2),
    replicate = substr(variable[,4], 3, nchar(variable[,4]))
  )

split up year and month columns

M28S_long_sum <- M28S_long_sum %>%
  mutate(
    Year = substr(variable[,2], 1, 2),
    Month = substr(variable[,2], 3, 4)
  )

make station column

M28S_long_sum$station<-M28S_long_sum$variable[,3]

remove vertical samples

M28S_long_sum<-subset(M28S_long_sum,tow_type=="Ob")

merge replicates (and oblique and vertical tows)

colnames(M28S_long_sum)
## [1] "Species"   "variable"  "value"     "tow_type"  "replicate" "Year"     
## [7] "Month"     "station"
M28S_long_sum$sample<-paste(M28S_long_sum$Year,M28S_long_sum$Month,M28S_long_sum$station,M28S_long_sum$tow_type,sep="")

M28S_long_sum <- M28S_long_sum %>%
  group_by(Year,Month,station,sample,Species) %>%
  summarise(
    value = sum(value))
#convert back to presence-absence
M28S_long_sum<-M28S_long_sum %>%
  mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))

back to wide format

M28S_wide<-dcast(M28S_long_sum, sample+Year+Month+station ~ Species )

remove any columns with all zeroes

target_cols <- 6:ncol(M28S_wide)

non_zero_cols <- target_cols[colSums(M28S_wide[, target_cols]) != 0]

M28S_wide <- M28S_wide[, c(1:5, non_zero_cols)]

remove environmental data

M28S_wide_simple<-M28S_wide[ , 5:31]

Make environmental columns categorical

# Specify the columns to convert to factors
cols_to_factor <- c("station", "Year", "Month")

# Convert the specified columns to factors
M28S_wide <- M28S_wide %>%
  mutate_at(vars(one_of(cols_to_factor)), as.factor)

PERMANOVA

dist<-vegdist(M28S_wide_simple, method='bray')
perm<-adonis2(dist~station*Month, data=M28S_wide, permutations = 999, method="bray")
perm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist ~ station * Month, data = M28S_wide, permutations = 999, method = "bray")
##               Df SumOfSqs      R2      F Pr(>F)    
## station        6   2.3250 0.34311 5.7420  0.001 ***
## Month          3   0.7960 0.11748 3.9320  0.001 ***
## station:Month 13   1.6307 0.24064 1.8587  0.004 ** 
## Residual      30   2.0245 0.29877                  
## Total         52   6.7762 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

station comparisons

adonis.pair(vegdist(M28S_wide_simple),M28S_wide$station)
##     combination  SumsOfSqs    MeanSqs    F.Model         R2     P.value
## 1   P12 <-> P22 0.66804033 0.66804033  8.9961187 0.39120161 0.000999001
## 2   P12 <-> P28 0.38107987 0.38107987  3.9205066 0.21877209 0.003996004
## 3   P12 <-> P38 0.25618672 0.25618672  2.2707796 0.14870096 0.085914086
## 4    P12 <-> P4 0.14186670 0.14186670  1.1740844 0.08283318 0.329670330
## 5  P12 <-> P402 0.21512395 0.21512395  1.7660925 0.11960459 0.158841159
## 6    P12 <-> P8 0.39488422 0.39488422  4.0948720 0.22630014 0.004995005
## 7   P22 <-> P28 0.60906136 0.60906136 10.2443032 0.42254476 0.001998002
## 8   P22 <-> P38 0.46806990 0.46806990  6.4859162 0.33285149 0.000999001
## 9    P22 <-> P4 0.46209322 0.46209322  5.7631948 0.30715424 0.001998002
## 10 P22 <-> P402 1.13535161 1.13535161 13.9897184 0.51833510 0.000999001
## 11   P22 <-> P8 0.24452909 0.24452909  4.1667497 0.22936132 0.018981019
## 12  P28 <-> P38 0.07035161 0.07035161  0.7262107 0.05290686 0.586413586
## 13   P28 <-> P4 0.21260055 0.21260055  2.0269321 0.13488662 0.098901099
## 14 P28 <-> P402 0.55152214 0.55152214  5.2097258 0.28609578 0.003996004
## 15   P28 <-> P8 0.22539405 0.22539405  2.7612084 0.16473803 0.043956044
## 16   P38 <-> P4 0.16748934 0.16748934  1.3678523 0.10232401 0.228771229
## 17 P38 <-> P402 0.34334978 0.34334978  2.7800615 0.18809539 0.032967033
## 18   P38 <-> P8 0.17422272 0.17422272  1.8139129 0.12244657 0.119880120
## 19  P4 <-> P402 0.39829857 0.39829857  3.0131891 0.20070280 0.007992008
## 20    P4 <-> P8 0.12016979 0.12016979  1.1548021 0.08158377 0.357642358
## 21  P402 <-> P8 0.86765199 0.86765199  8.2604389 0.38853567 0.000999001
##    P.value.corrected
## 1        0.005244755
## 2        0.010489510
## 3        0.128871129
## 4        0.364372470
## 5        0.196215549
## 6        0.011655012
## 7        0.006993007
## 8        0.005244755
## 9        0.006993007
## 10       0.005244755
## 11       0.036236491
## 12       0.586413586
## 13       0.138461538
## 14       0.010489510
## 15       0.071005917
## 16       0.266899767
## 17       0.057692308
## 18       0.157342657
## 19       0.016783217
## 20       0.375524476
## 21       0.005244755

month comparisons

adonis.pair(vegdist(M28S_wide_simple),M28S_wide$Month)
##   combination   SumsOfSqs     MeanSqs   F.Model         R2    P.value
## 1   04 <-> 07 0.246317314 0.246317314 2.3020106 0.07126524 0.06893107
## 2   04 <-> 09 0.330966201 0.330966201 2.5518732 0.07839405 0.03896104
## 3   04 <-> 10 0.063778296 0.063778296 0.5790068 0.04263985 0.71328671
## 4   07 <-> 09 0.413323900 0.413323900 3.2054987 0.08176146 0.01298701
## 5   07 <-> 10 0.095928787 0.095928787 0.8349194 0.04209341 0.55844156
## 6   09 <-> 10 0.003497279 0.003497279 0.0232024 0.00121969 0.98701299
##   P.value.corrected
## 1        0.13786214
## 2        0.11688312
## 3        0.85594406
## 4        0.07792208
## 5        0.83766234
## 6        0.98701299

run NMDS

M28S_NMDSmodel <- metaMDS(M28S_wide_simple, distance = "bray",trymax = 999)
## Run 0 stress 0.206974 
## Run 1 stress 0.2290133 
## Run 2 stress 0.2248491 
## Run 3 stress 0.2438311 
## Run 4 stress 0.2371662 
## Run 5 stress 0.2304036 
## Run 6 stress 0.2226275 
## Run 7 stress 0.2317371 
## Run 8 stress 0.223941 
## Run 9 stress 0.2352705 
## Run 10 stress 0.2345652 
## Run 11 stress 0.2344381 
## Run 12 stress 0.2298158 
## Run 13 stress 0.2282033 
## Run 14 stress 0.2222865 
## Run 15 stress 0.229962 
## Run 16 stress 0.2291709 
## Run 17 stress 0.208272 
## Run 18 stress 0.2212705 
## Run 19 stress 0.2292452 
## Run 20 stress 0.2331123 
## Run 21 stress 0.2295625 
## Run 22 stress 0.2267182 
## Run 23 stress 0.2165566 
## Run 24 stress 0.2227241 
## Run 25 stress 0.2082718 
## Run 26 stress 0.2315231 
## Run 27 stress 0.2271305 
## Run 28 stress 0.2172864 
## Run 29 stress 0.2305348 
## Run 30 stress 0.2393786 
## Run 31 stress 0.2314914 
## Run 32 stress 0.2218763 
## Run 33 stress 0.2176189 
## Run 34 stress 0.2184809 
## Run 35 stress 0.2340489 
## Run 36 stress 0.2207368 
## Run 37 stress 0.2221219 
## Run 38 stress 0.2377905 
## Run 39 stress 0.2268368 
## Run 40 stress 0.2179231 
## Run 41 stress 0.2219736 
## Run 42 stress 0.2325035 
## Run 43 stress 0.2381708 
## Run 44 stress 0.2345359 
## Run 45 stress 0.216371 
## Run 46 stress 0.2326738 
## Run 47 stress 0.2240677 
## Run 48 stress 0.2300943 
## Run 49 stress 0.2267939 
## Run 50 stress 0.246557 
## Run 51 stress 0.2137034 
## Run 52 stress 0.2287888 
## Run 53 stress 0.2109735 
## Run 54 stress 0.2123302 
## Run 55 stress 0.227375 
## Run 56 stress 0.2154465 
## Run 57 stress 0.2302811 
## Run 58 stress 0.2389569 
## Run 59 stress 0.2293788 
## Run 60 stress 0.2193683 
## Run 61 stress 0.2335544 
## Run 62 stress 0.2263505 
## Run 63 stress 0.2257863 
## Run 64 stress 0.2195394 
## Run 65 stress 0.2411559 
## Run 66 stress 0.2217801 
## Run 67 stress 0.2343708 
## Run 68 stress 0.2232186 
## Run 69 stress 0.2301361 
## Run 70 stress 0.2165561 
## Run 71 stress 0.2267872 
## Run 72 stress 0.2232597 
## Run 73 stress 0.2314402 
## Run 74 stress 0.21562 
## Run 75 stress 0.244776 
## Run 76 stress 0.216563 
## Run 77 stress 0.2123383 
## Run 78 stress 0.206974 
## ... New best solution
## ... Procrustes: rmse 2.041027e-05  max resid 7.441322e-05 
## ... Similar to previous best
## *** Best solution repeated 1 times
M28S_NMDSmodel
## 
## Call:
## metaMDS(comm = M28S_wide_simple, distance = "bray", trymax = 999) 
## 
## global Multidimensional Scaling using monoMDS
## 
## Data:     M28S_wide_simple 
## Distance: bray 
## 
## Dimensions: 2 
## Stress:     0.206974 
## Stress type 1, weak ties
## Best solution was repeated 1 time in 78 tries
## The best solution was from try 78 (random start)
## Scaling: centring, PC rotation, halfchange scaling 
## Species: expanded scores based on 'M28S_wide_simple'

get datascores

M28S.data.scores<-scores(M28S_NMDSmodel,display="sites")
M28S.data.scores<- as.data.frame(M28S.data.scores)

add environmental variables back in

# Extract the first 5 columns from df1
first_5_columns <- M28S_wide[, 1:4]

# Combine the first 5 columns from df1 with df2
M28S.data.scores <- cbind(first_5_columns, M28S.data.scores)

plot NMDS

plot datascores

xx <- ggplot(M28S.data.scores, aes(x = NMDS1, y = NMDS2,colour = station)) +geom_point(aes(colour = station))+theme_bw()+theme(panel.grid.major = element_blank(),
                                                                                           panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
xx

extract hulls

unique(M28S.data.scores$station)
## [1] P12  P22  P28  P38  P402 P4   P8  
## Levels: P12 P22 P28 P38 P4 P402 P8
P12 <- M28S.data.scores[M28S.data.scores$station == "P12", ][chull(M28S.data.scores[M28S.data.scores$station ==                                                           "P12", c("NMDS1", "NMDS2")]), ]  
P22 <- M28S.data.scores[M28S.data.scores$station == "P22", ][chull(M28S.data.scores[M28S.data.scores$station ==                                                           "P22", c("NMDS1", "NMDS2")]), ]  
P28 <- M28S.data.scores[M28S.data.scores$station == "P28", ][chull(M28S.data.scores[M28S.data.scores$station ==                                                           "P28", c("NMDS1", "NMDS2")]), ]
P38 <- M28S.data.scores[M28S.data.scores$station == "P38", ][chull(M28S.data.scores[M28S.data.scores$station ==                                                           "P38", c("NMDS1", "NMDS2")]), ]  
P4 <- M28S.data.scores[M28S.data.scores$station == "P4", ][chull(M28S.data.scores[M28S.data.scores$station ==                                                           "P4", c("NMDS1", "NMDS2")]), ]  
P402 <- M28S.data.scores[M28S.data.scores$station == "P402", ][chull(M28S.data.scores[M28S.data.scores$station ==                                                           "P402", c("NMDS1", "NMDS2")]), ]  
P8 <- M28S.data.scores[M28S.data.scores$station == "P8", ][chull(M28S.data.scores[M28S.data.scores$station ==                                                           "P8", c("NMDS1", "NMDS2")]), ]  

get hull data

hull.data <- rbind(P12,P22,P28,P38,P402,P4,P8)  #combine grp.a and grp.b
hull.data
##        sample Year Month station       NMDS1      NMDS2
## 1   1804P12Ob   18    04     P12 -0.40986184 -0.1290208
## 38  2007P12Ob   20    07     P12 -0.58699048 -0.1697800
## 20  1904P12Ob   19    04     P12 -0.35714594  0.3909733
## 45  2009P12Ob   20    09     P12  0.42777001  0.6532272
## 33  1909P12Ob   19    09     P12  0.27269307  0.2664957
## 21  1904P22Ob   19    04     P22  0.67389337 -0.1951536
## 2   1804P22Ob   18    04     P22  0.67547683 -0.2889490
## 52  2010P22Ob   20    10     P22  0.07946482 -0.1783487
## 27  1907P22Ob   19    07     P22  0.46294234  0.1074290
## 14  1809P22Ob   18    09     P22  0.66615882 -0.1359921
## 3   1804P28Ob   18    04     P28  0.38197209 -0.2929012
## 35  1909P28Ob   19    09     P28  0.18965078 -0.3667978
## 15  1809P28Ob   18    09     P28 -0.54731643 -0.5272428
## 53  2010P28Ob   20    10     P28 -0.09360909  0.6971442
## 29  1907P38Ob   19    07     P38  0.10813916 -0.3601671
## 16  1809P38Ob   18    09     P38 -0.93900042 -0.4892924
## 36  1909P38Ob   19    09     P38  0.34464840  0.4419093
## 22  1904P38Ob   19    04     P38  0.24033626  0.0120023
## 5  1804P402Ob   18    04    P402 -0.11423793 -0.3205777
## 30 1907P402Ob   19    07    P402 -0.71644022 -0.2605243
## 17 1809P402Ob   18    09    P402 -0.98235325  0.3815895
## 10 1807P402Ob   18    07    P402 -0.48645732  0.6791813
## 31   1907P4Ob   19    07      P4 -0.29777479 -0.5467845
## 43   2007P4Ob   20    07      P4 -0.38905531 -0.2722517
## 11   1807P4Ob   18    07      P4 -0.48641599  0.4505654
## 50   2009P4Ob   20    09      P4  0.30877591  0.6145633
## 18   1809P4Ob   18    09      P4  0.53827349  0.3163899
## 7    1804P8Ob   18    04      P8  0.55250013 -0.2150411
## 51   2009P8Ob   20    09      P8 -0.02262388 -0.6283247
## 19   1809P8Ob   18    09      P8  0.08526749  0.6703768

Species fitting

vf <- envfit(M28S_NMDSmodel, M28S_wide, perm = 999)

spp.scrs <- as.data.frame(scores(vf, display = "vectors"))
spp.scrs <- cbind(spp.scrs, Species = rownames(spp.scrs))
####for ggplot
arrow_factor <- ordiArrowMul(vf)
spp.scrs <- as.data.frame(scores(vf, display = "vectors")) * arrow_factor
spp.scrs <- cbind(spp.scrs, Species = rownames(spp.scrs), Pvalues = vf$vectors$pvals, R_squared = vf$vectors$r)

# select significance similarly to `plot(vf, p.max = 0.01)`
spp.scrs <- subset(spp.scrs, Pvalues < 0.05)
spp.scrs <- subset(spp.scrs, R_squared > 0.3)
spp.scrs
##                            NMDS1       NMDS2               Species Pvalues
## Calanus finmarchicus   0.6908220  0.27153448  Calanus finmarchicus   0.001
## Calanus glacialis      0.7500000  0.07339109     Calanus glacialis   0.001
## Eucalanus bungii       0.6892136  0.01861204      Eucalanus bungii   0.001
## Metridia lucens        0.5611232 -0.11401179       Metridia lucens   0.001
## Paracalanus parvus    -0.6166596 -0.41149828    Paracalanus parvus   0.001
## Paraeuchaeta elongata  0.0460038  0.61022418 Paraeuchaeta elongata   0.001
## Paraeuchaeta russelli  0.2439038  0.55168908 Paraeuchaeta russelli   0.001
## Pseudocalanus minutus  0.6065093 -0.44833970 Pseudocalanus minutus   0.001
## Pseudocalanus newmani  0.3517803 -0.58327905 Pseudocalanus newmani   0.001
##                       R_squared
## Calanus finmarchicus  0.5215867
## Calanus glacialis     0.5376047
## Eucalanus bungii      0.4500139
## Metridia lucens       0.3103755
## Paracalanus parvus    0.5202935
## Paraeuchaeta elongata 0.3545209
## Paraeuchaeta russelli 0.3444483
## Pseudocalanus minutus 0.5385284
## Pseudocalanus newmani 0.4392237
M28S_Species_plot <- ggplot(M28S.data.scores, aes(x = NMDS1, y = NMDS2)) +geom_point(aes(colour =  station))+theme_bw()+theme(panel.grid.major = element_blank(),
                                                                                           panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+
  geom_segment(data = spp.scrs,size=0.4,
               aes(x = 0, xend = NMDS1, y = 0, yend = NMDS2),
               arrow = arrow(length = unit(0.2, "cm")), colour = "darkgrey")+ 
  geom_polygon(data=hull.data,aes(x=NMDS1,y=NMDS2,fill=station,group=station),alpha=0.20, size=0.1, linetype=1, colour="black") +
  annotate("text", label = "2D Stress: 0.205", x = 0.6, y = 0.8, size = 5, colour = "black")+
  geom_label_repel(data = spp.scrs, aes(x = NMDS1, y = NMDS2, label = Species),
                  size = 3, fontface="bold", fill="white", label.padding = unit(0.15, "lines"), box.padding = unit(0.16, "lines"), label.size = 0.05)
M28S_Species_plot

save plot

setwd("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-3/output")
ggsave(filename = "M28S_NMDS_oblique.png", plot = M28S_Species_plot, height = 170, width = 200, units="mm", device='png', dpi=300)

Machida18S

Data Setup

Remove non-zooplanton phyla

unique(Machida18S_counts$Phylum)
##  [1] "Arthropoda"          "Cnidaria"            "Bacillariophyta"    
##  [4] "Mollusca"            "Ctenophora"          "Chaetognatha"       
##  [7] NA                    "Annelida"            "Dinophyceae__p"     
## [10] "Nemertea"            "Chordata"            "Ciliophora"         
## [13] "Oomycota"            "Acantharea__p"       "Bryozoa"            
## [16] "Echinodermata"       "Apicomplexa"         "Platyhelminthes"    
## [19] "Chlorophyta"         "Synurophyceae__p"    "Endomyxa"           
## [22] "Cercozoa"            "Streptophyta"        "Ascomycota"         
## [25] "Blastocladiomycota"  "Bigyra__p"           "Chytridiomycota"    
## [28] "Nematoda"            "Rotifera"            "Phoronida"          
## [31] "Polycystinea__p"     "Choanoflagellata__p" "Cryptophyceae__p"   
## [34] "Phaeophyceae__p"     "Rhodophyta"          "Raphidophyceae__p"  
## [37] "Basidiomycota"       "Pirsoniales__c__p"   "Porifera"           
## [40] "Ichthyosporea__p"    "Colponemida__c__p"   "Dictyochophyceae__p"
## [43] "Entoprocta"
levels_to_remove <- c('Bacillariophyta', 'Rhodophyta',NA,'Oomycota','Ascomycota','Onychophora','Cryptophyceae__p','Zoopagomycota','Phaeophyceae__p','Haptophyta','Basidiomycota','Chlorophyta','Dinophyceae__p','Discosea','Tubulinea','Dictyochophyceae__p','Bacteroidota','Blastocladiomycota','Pseudomonadota','Chrysophyceae__p','Malawimonadida__c__p','Prasinodermophyta','Chrysoparadoxa__f__o__c__p','Apusomonadida__c__p','Streptophyta','Pelagophyceae__p','Eustigmatophyceae__p','Ciliophora','Acantharea__p','Apicomplexa','Synurophyceae__p','Endomyxa','Cercozoa','Bigyra__p','Chytridiomycota','Polycystinea__p','Choanoflagellata__p','Raphidophyceae__p','Pirsoniales__c__p','Colponemida__c__p','Ichthyosporea__p')

# Remove those levels and drop them from the factor
Machida18S_counts <- droplevels(Machida18S_counts[!Machida18S_counts$Phylum %in% levels_to_remove,])

remove non-species rows

Machida18S_comp<- Machida18S_counts[complete.cases(Machida18S_counts), ]

convert to presence-absence

Machida18S_comp_pa<-Machida18S_comp %>%
  mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))

remove excess columns

Machida18S_comp_pa <- Machida18S_comp_pa[ -c(1:7) ]

wide to long format

Machida18S_long<-melt(Machida18S_comp_pa, id.vars=c("Species"))

add up multiple lines per station and convert back to presence-absence

Machida18S_long_sum <- Machida18S_long %>%
  group_by(Species,variable) %>%
  summarise(
    value = sum(value))

Machida18S_long_sum<-Machida18S_long_sum %>%
  mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))

split code string up to extract metadata

Machida18S_long_sum$variable<-str_split_fixed(Machida18S_long_sum$variable,"_",4)

remove samples of non-interest

unique(Machida18S_long_sum$variable[,3])
##  [1] "P12"   "P22"   "P28"   "P38"   "P402"  "P4"    "P8"    "CA042" "P105" 
## [10] "P123"  "P132"  "P136"  "P381"  "P7"    ""
unique(Machida18S_long_sum$variable[,2])
##  [1] "1804"     "1806"     "1807"     "1809"     "1904"     "1907"    
##  [7] "1909"     "2007"     "2009"     "2010"     "Control1"
#remove excess stations
levels_to_remove <- c('CA042','',"P105","P123","P132","P136","P381","P7")
Machida18S_long_sum <- droplevels(Machida18S_long_sum[!Machida18S_long_sum$variable[,3] %in% levels_to_remove,])

split up replicate columns

unique(Machida18S_long_sum$variable[,4])
## [1] "Ob"  "Ve"  "ObB" "VeA" "ObA" "VeB"
Machida18S_long_sum <- Machida18S_long_sum %>%
  mutate(
    tow_type = substr(variable[,4], 1, 2),
    replicate = substr(variable[,4], 3, nchar(variable[,4]))
  )

split up year and month columns

Machida18S_long_sum <- Machida18S_long_sum %>%
  mutate(
    Year = substr(variable[,2], 1, 2),
    Month = substr(variable[,2], 3, 4)
  )

make station column

Machida18S_long_sum$station<-Machida18S_long_sum$variable[,3]

remove vertical samples

Machida18S_long_sum<-subset(Machida18S_long_sum,tow_type=="Ob")

merge replicates (and oblique and vertical tows)

colnames(Machida18S_long_sum)
## [1] "Species"   "variable"  "value"     "tow_type"  "replicate" "Year"     
## [7] "Month"     "station"
Machida18S_long_sum$sample<-paste(Machida18S_long_sum$Year,Machida18S_long_sum$Month,Machida18S_long_sum$station,Machida18S_long_sum$tow_type,sep="")

Machida18S_long_sum <- Machida18S_long_sum %>%
  group_by(Year,Month,station,sample,Species) %>%
  summarise(
    value = sum(value))
#convert back to presence-absence
Machida18S_long_sum<-Machida18S_long_sum %>%
  mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))

back to wide format

Machida18S_wide<-dcast(Machida18S_long_sum, sample+Year+Month+station ~ Species )

remove any columns with all zeroes

target_cols <- 6:ncol(Machida18S_wide)

non_zero_cols <- target_cols[colSums(Machida18S_wide[, target_cols]) != 0]

Machida18S_wide <- Machida18S_wide[, c(1:5, non_zero_cols)]

remove environmental data

Machida18S_wide_simple<-Machida18S_wide[ , 5:110]

Make environmental columns categorical

# Specify the columns to convert to factors
cols_to_factor <- c("station", "Year", "Month")

# Convert the specified columns to factors
Machida18S_wide <- Machida18S_wide %>%
  mutate_at(vars(one_of(cols_to_factor)), as.factor)

PERMANOVA

dist<-vegdist(Machida18S_wide_simple, method='bray')
perm<-adonis2(dist~station*Month, data=Machida18S_wide, permutations = 999, method="bray")
perm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist ~ station * Month, data = Machida18S_wide, permutations = 999, method = "bray")
##               Df SumOfSqs      R2      F Pr(>F)    
## station        6   1.9876 0.27114 4.0876  0.001 ***
## Month          3   1.0171 0.13875 4.1834  0.001 ***
## station:Month 13   1.8135 0.24738 1.7213  0.001 ***
## Residual      31   2.5124 0.34272                  
## Total         53   7.3306 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

station comparisons

adonis.pair(vegdist(Machida18S_wide_simple),Machida18S_wide$station)
##     combination SumsOfSqs   MeanSqs  F.Model        R2     P.value
## 1   P12 <-> P22 0.4050801 0.4050801 4.139886 0.2162963 0.002997003
## 2   P12 <-> P28 0.2007739 0.2007739 1.892205 0.1190650 0.094905095
## 3   P12 <-> P38 0.1829881 0.1829881 1.665385 0.1135589 0.107892108
## 4    P12 <-> P4 0.3075001 0.3075001 3.520660 0.2131065 0.014985015
## 5  P12 <-> P402 0.2593743 0.2593743 2.220451 0.1458860 0.090909091
## 6    P12 <-> P8 0.3705372 0.3705372 2.975489 0.1752815 0.007992008
## 7   P22 <-> P28 0.5258883 0.5258883 4.996480 0.2498680 0.001998002
## 8   P22 <-> P38 0.2543377 0.2543377 2.339965 0.1432050 0.009990010
## 9    P22 <-> P4 0.3209403 0.3209403 3.656737 0.2071015 0.004995005
## 10 P22 <-> P402 0.4478436 0.4478436 3.889834 0.2174326 0.000999001
## 11   P22 <-> P8 0.2003051 0.2003051 1.635844 0.0983325 0.113886114
## 12  P28 <-> P38 0.2074972 0.2074972 1.752214 0.1187763 0.106893107
## 13   P28 <-> P4 0.5734570 0.5734570 5.980720 0.3150945 0.001998002
## 14 P28 <-> P402 0.4835637 0.4835637 3.857579 0.2288335 0.006993007
## 15   P28 <-> P8 0.4488371 0.4488371 3.388413 0.1948662 0.010989011
## 16   P38 <-> P4 0.2629625 0.2629625 2.653015 0.1810559 0.008991009
## 17 P38 <-> P402 0.2718403 0.2718403 2.074418 0.1473893 0.053946054
## 18   P38 <-> P8 0.2823920 0.2823920 2.042451 0.1357791 0.054945055
## 19  P4 <-> P402 0.1970624 0.1970624 1.848089 0.1334544 0.065934066
## 20    P4 <-> P8 0.3176350 0.3176350 2.744726 0.1743267 0.017982018
## 21  P402 <-> P8 0.3964518 0.3964518 2.730469 0.1735784 0.003996004
##    P.value.corrected
## 1         0.01573427
## 2         0.11072261
## 3         0.11328671
## 4         0.02622378
## 5         0.11072261
## 6         0.02097902
## 7         0.01398601
## 8         0.02097902
## 9         0.01748252
## 10        0.01398601
## 11        0.11388611
## 12        0.11328671
## 13        0.01398601
## 14        0.02097902
## 15        0.02097902
## 16        0.02097902
## 17        0.07692308
## 18        0.07692308
## 19        0.08653846
## 20        0.02904788
## 21        0.01678322

month comparisons

adonis.pair(vegdist(Machida18S_wide_simple),Machida18S_wide$Month)
##   combination SumsOfSqs   MeanSqs   F.Model         R2     P.value
## 1   04 <-> 07 0.7291627 0.7291627 6.0598509 0.16351525 0.000999001
## 2   04 <-> 09 0.6448790 0.6448790 5.1233906 0.14586834 0.000999001
## 3   04 <-> 10 0.2278935 0.2278935 1.6661575 0.11360559 0.102897103
## 4   07 <-> 09 0.1587687 0.1587687 1.3215272 0.03448524 0.215784216
## 5   07 <-> 10 0.1520046 0.1520046 1.2422648 0.05848081 0.271728272
## 6   09 <-> 10 0.1019444 0.1019444 0.7768989 0.03928315 0.695304695
##   P.value.corrected
## 1       0.002997003
## 2       0.002997003
## 3       0.205794206
## 4       0.323676324
## 5       0.326073926
## 6       0.695304695

run NMDS

Machida18S_NMDSmodel <- metaMDS(Machida18S_wide_simple, distance = "bray",trymax = 999)
## Run 0 stress 0.2180912 
## Run 1 stress 0.2327872 
## Run 2 stress 0.2423508 
## Run 3 stress 0.2205055 
## Run 4 stress 0.2278572 
## Run 5 stress 0.2190059 
## Run 6 stress 0.2205049 
## Run 7 stress 0.2192346 
## Run 8 stress 0.23612 
## Run 9 stress 0.2324939 
## Run 10 stress 0.2192886 
## Run 11 stress 0.2409283 
## Run 12 stress 0.2246649 
## Run 13 stress 0.2192514 
## Run 14 stress 0.2308205 
## Run 15 stress 0.2263327 
## Run 16 stress 0.2309202 
## Run 17 stress 0.2318796 
## Run 18 stress 0.222232 
## Run 19 stress 0.2361207 
## Run 20 stress 0.2193056 
## Run 21 stress 0.2542189 
## Run 22 stress 0.2259888 
## Run 23 stress 0.2184218 
## ... Procrustes: rmse 0.01488169  max resid 0.06864917 
## Run 24 stress 0.243502 
## Run 25 stress 0.2195005 
## Run 26 stress 0.2235743 
## Run 27 stress 0.230459 
## Run 28 stress 0.2257394 
## Run 29 stress 0.2183715 
## ... Procrustes: rmse 0.04204822  max resid 0.1824824 
## Run 30 stress 0.2180912 
## ... New best solution
## ... Procrustes: rmse 6.934386e-06  max resid 2.755266e-05 
## ... Similar to previous best
## *** Best solution repeated 1 times
Machida18S_NMDSmodel
## 
## Call:
## metaMDS(comm = Machida18S_wide_simple, distance = "bray", trymax = 999) 
## 
## global Multidimensional Scaling using monoMDS
## 
## Data:     Machida18S_wide_simple 
## Distance: bray 
## 
## Dimensions: 2 
## Stress:     0.2180912 
## Stress type 1, weak ties
## Best solution was repeated 1 time in 30 tries
## The best solution was from try 30 (random start)
## Scaling: centring, PC rotation, halfchange scaling 
## Species: expanded scores based on 'Machida18S_wide_simple'

get datascores

Machida18S.data.scores<-scores(Machida18S_NMDSmodel,display="sites")
Machida18S.data.scores<- as.data.frame(Machida18S.data.scores)

add environmental variables back in

# Extract the first 5 columns from df1
first_5_columns <- Machida18S_wide[, 1:4]

# Combine the first 5 columns from df1 with df2
Machida18S.data.scores <- cbind(first_5_columns, Machida18S.data.scores)

plot NMDS

plot datascores

xx <- ggplot(Machida18S.data.scores, aes(x = NMDS1, y = NMDS2,colour = station)) +geom_point(aes(colour = station))+theme_bw()+theme(panel.grid.major = element_blank(),
                                                                                           panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
xx

extract hulls

unique(Machida18S.data.scores$station)
## [1] P12  P22  P28  P38  P402 P4   P8  
## Levels: P12 P22 P28 P38 P4 P402 P8
P12 <- Machida18S.data.scores[Machida18S.data.scores$station == "P12", ][chull(Machida18S.data.scores[Machida18S.data.scores$station ==                                                           "P12", c("NMDS1", "NMDS2")]), ]  
P22 <- Machida18S.data.scores[Machida18S.data.scores$station == "P22", ][chull(Machida18S.data.scores[Machida18S.data.scores$station ==                                                           "P22", c("NMDS1", "NMDS2")]), ]  
P28 <- Machida18S.data.scores[Machida18S.data.scores$station == "P28", ][chull(Machida18S.data.scores[Machida18S.data.scores$station ==                                                           "P28", c("NMDS1", "NMDS2")]), ]
P38 <- Machida18S.data.scores[Machida18S.data.scores$station == "P38", ][chull(Machida18S.data.scores[Machida18S.data.scores$station ==                                                           "P38", c("NMDS1", "NMDS2")]), ]  
P4 <- Machida18S.data.scores[Machida18S.data.scores$station == "P4", ][chull(Machida18S.data.scores[Machida18S.data.scores$station ==                                                           "P4", c("NMDS1", "NMDS2")]), ]  
P402 <- Machida18S.data.scores[Machida18S.data.scores$station == "P402", ][chull(Machida18S.data.scores[Machida18S.data.scores$station ==                                                           "P402", c("NMDS1", "NMDS2")]), ]  
P8 <- Machida18S.data.scores[Machida18S.data.scores$station == "P8", ][chull(Machida18S.data.scores[Machida18S.data.scores$station ==                                                           "P8", c("NMDS1", "NMDS2")]), ]  

get hull data

hull.data <- rbind(P12,P22,P28,P38,P402,P4,P8)  #combine grp.a and grp.b
hull.data
##        sample Year Month station        NMDS1        NMDS2
## 34  1909P12Ob   19    09     P12  0.396359675  0.125076523
## 46  2009P12Ob   20    09     P12  0.254394872  0.077670176
## 21  1904P12Ob   19    04     P12 -0.245286359  0.003780224
## 1   1804P12Ob   18    04     P12 -0.532243523  0.078841202
## 8   1807P12Ob   18    07     P12  0.345500684  0.317835626
## 14  1809P12Ob   18    09     P12  0.423600451  0.162535854
## 40  2007P22Ob   20    07     P22  0.190290701 -0.212460323
## 22  1904P22Ob   19    04     P22 -0.029997322 -0.326957539
## 53  2010P22Ob   20    10     P22 -0.282008807 -0.366104613
## 9   1807P22Ob   18    07     P22 -0.613555424 -0.266440626
## 35  1909P22Ob   19    09     P22 -0.276765254  0.040710164
## 15  1809P22Ob   18    09     P22  0.005575514  0.047474321
## 28  1907P22Ob   19    07     P22  0.175423251 -0.068213265
## 54  2010P28Ob   20    10     P28  0.600577680 -0.345004985
## 3   1804P28Ob   18    04     P28 -0.555670306 -0.092517746
## 29  1907P28Ob   19    07     P28  0.324793574  0.225025384
## 16  1809P28Ob   18    09     P28  0.621521573  0.088959238
## 49  2009P38Ob   20    09     P38  0.332494764 -0.210046489
## 23  1904P38Ob   19    04     P38 -0.058909988 -0.385986346
## 4   1804P38Ob   18    04     P38 -0.578203417 -0.404042151
## 17  1809P38Ob   18    09     P38  0.082806686  0.234877848
## 30  1907P38Ob   19    07     P38  0.240963450  0.272320799
## 42  2007P38Ob   20    07     P38  0.357796622  0.067388329
## 50 2009P402Ob   20    09    P402 -0.147305217 -0.246814584
## 5  1804P402Ob   18    04    P402 -0.519839435  0.123072634
## 24 1904P402Ob   19    04    P402 -0.615468218  0.340963300
## 43 2007P402Ob   20    07    P402 -0.235711874  0.677489266
## 18 1809P402Ob   18    09    P402  0.564123705  0.245705800
## 6    1804P4Ob   18    04      P4  0.071300372 -0.423341177
## 12   1807P4Ob   18    07      P4 -0.341178655  0.219526125
## 19   1809P4Ob   18    09      P4 -0.193105990  0.287536229
## 44   2007P4Ob   20    07      P4 -0.141306727  0.200364184
## 45   2007P8Ob   20    07      P8  0.384467069 -0.469692110
## 52   2009P8Ob   20    09      P8 -0.007901423 -0.550965596
## 26   1904P8Ob   19    04      P8 -0.343502926 -0.461922624
## 7    1804P8Ob   18    04      P8 -0.884678930 -0.001719643
## 20   1809P8Ob   18    09      P8 -0.328277064  0.457654631
## 33   1907P8Ob   19    07      P8  0.399855511 -0.118875374

Species fitting

vf <- envfit(Machida18S_NMDSmodel, Machida18S_wide, perm = 999)

spp.scrs <- as.data.frame(scores(vf, display = "vectors"))
spp.scrs <- cbind(spp.scrs, Species = rownames(spp.scrs))
####for ggplot
arrow_factor <- ordiArrowMul(vf)
spp.scrs <- as.data.frame(scores(vf, display = "vectors")) * arrow_factor
spp.scrs <- cbind(spp.scrs, Species = rownames(spp.scrs), Pvalues = vf$vectors$pvals, R_squared = vf$vectors$r)

# select significance similarly to `plot(vf, p.max = 0.01)`
spp.scrs <- subset(spp.scrs, Pvalues < 0.05)
spp.scrs <- subset(spp.scrs, R_squared > 0.3)
spp.scrs
##                                 NMDS1       NMDS2                   Species
## Clytia gracilis           -0.51409831 -0.73203689           Clytia gracilis
## Diphyes chamissonis       -0.77621015  0.32421305       Diphyes chamissonis
## Eutonina indicans         -0.53380883 -0.55443704         Eutonina indicans
## Halopsis ocellata         -0.64111045 -0.46727610         Halopsis ocellata
## Leuckartiara octona       -0.67857013  0.68240750       Leuckartiara octona
## Limnocalanus macrurus     -0.08241382 -0.71514202     Limnocalanus macrurus
## Membranipora membranacea  -0.78344846 -0.01188514  Membranipora membranacea
## Mesochaetopterus taylori  -0.61857557  0.39637049  Mesochaetopterus taylori
## Mitrella bicincta         -0.71333849 -0.18729172         Mitrella bicincta
## Neotrypaea californiensis -0.09990835  0.75000000 Neotrypaea californiensis
## Oithona atlantica         -0.30383367  0.64004812         Oithona atlantica
## Phoronopsis harmeri       -0.62069482  0.58122300       Phoronopsis harmeri
##                           Pvalues R_squared
## Clytia gracilis             0.001 0.4932201
## Diphyes chamissonis         0.001 0.4361678
## Eutonina indicans           0.001 0.3651202
## Halopsis ocellata           0.001 0.3879373
## Leuckartiara octona         0.001 0.5708621
## Limnocalanus macrurus       0.001 0.3194259
## Membranipora membranacea    0.001 0.3784222
## Mesochaetopterus taylori    0.001 0.3326938
## Mitrella bicincta           0.001 0.3352732
## Neotrypaea californiensis   0.001 0.3528721
## Oithona atlantica           0.001 0.3094134
## Phoronopsis harmeri         0.001 0.4457009
Machida18S_Species_plot <- ggplot(Machida18S.data.scores, aes(x = NMDS1, y = NMDS2)) +geom_point(aes(colour =  station))+theme_bw()+theme(panel.grid.major = element_blank(),
                                                                                           panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+
  geom_segment(data = spp.scrs,size=0.4,
               aes(x = 0, xend = NMDS1, y = 0, yend = NMDS2),
               arrow = arrow(length = unit(0.2, "cm")), colour = "darkgrey")+ 
  geom_polygon(data=hull.data,aes(x=NMDS1,y=NMDS2,fill=station,group=station),alpha=0.20, size=0.1, linetype=1, colour="black") +
  annotate("text", label = "2D Stress: 0.218", x =-0.6, y = 0.8, size = 5, colour = "black")+
  geom_label_repel(data = spp.scrs, aes(x = NMDS1, y = NMDS2, label = Species),
                  size = 3, fontface="bold", fill="white", label.padding = unit(0.15, "lines"), box.padding = unit(0.16, "lines"), label.size = 0.05)
Machida18S_Species_plot

save plot

setwd("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-3/output")
ggsave(filename = "Machida18S_NMDS_oblique.png", plot = Machida18S_Species_plot, height = 170, width = 200, units="mm", device='png', dpi=300)

Fishcytb

Data Setup

Remove non-zooplanton phyla

unique(Fishcytb_counts$Phylum)
## [1] NA               "Chordata"       "Arthropoda"     "Mollusca"      
## [5] "Annelida"       "Echinodermata"  "Pseudomonadota"
levels_to_remove <- c('Bacillariophyta', 'Rhodophyta',NA,'Oomycota','Ascomycota','Onychophora','Cryptophyceae__p','Zoopagomycota','Phaeophyceae__p','Haptophyta','Basidiomycota','Chlorophyta','Dinophyceae__p','Discosea','Tubulinea','Dictyochophyceae__p','Bacteroidota','Blastocladiomycota','Pseudomonadota','Chrysophyceae__p','Malawimonadida__c__p','Prasinodermophyta','Chrysoparadoxa__f__o__c__p','Apusomonadida__c__p','Streptophyta','Pelagophyceae__p','Eustigmatophyceae__p','Pseudomonadota')

# Remove those levels and drop them from the factor
Fishcytb_counts <- droplevels(Fishcytb_counts[!Fishcytb_counts$Phylum %in% levels_to_remove,])

subset to fish only

Fishcytb_counts<-subset(Fishcytb_counts,Class=="Actinopteri")

remove non-species rows

Fishcytb_comp<- Fishcytb_counts[complete.cases(Fishcytb_counts), ]

convert to presence-absence

Fishcytb_comp_pa<-Fishcytb_comp %>%
  mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))

remove excess columns

Fishcytb_comp_pa <- Fishcytb_comp_pa[ -c(1:7) ]

wide to long format

Fishcytb_long<-melt(Fishcytb_comp_pa, id.vars=c("Species"))

add up multiple lines per station and convert back to presence-absence

Fishcytb_long_sum <- Fishcytb_long %>%
  group_by(Species,variable) %>%
  summarise(
    value = sum(value))

Fishcytb_long_sum<-Fishcytb_long_sum %>%
  mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))

split code string up to extract metadata

Fishcytb_long_sum$variable<-str_split_fixed(Fishcytb_long_sum$variable,"_",4)

remove samples of non-interest

unique(Fishcytb_long_sum$variable[,3])
##  [1] "P12"   "P22"   "P28"   "P38"   "P402"  "P4"    "P8"    "CA042" "P105" 
## [10] "P123"  "P132"  "P136"  "P7"    ""
unique(Fishcytb_long_sum$variable[,2])
##  [1] "1804"     "1806"     "1807"     "1809"     "1904"     "1907"    
##  [7] "1909"     "2007"     "2009"     "2010"     "Control1"
#remove excess stations
levels_to_remove <- c('CA042','',"P105","P123","P132","P136","P381","P7")
Fishcytb_long_sum <- droplevels(Fishcytb_long_sum[!Fishcytb_long_sum$variable[,3] %in% levels_to_remove,])

split up replicate columns

unique(Fishcytb_long_sum$variable[,4])
## [1] "Ob"  "VeA" "Ve"  "ObB" "ObA" "VeB"
Fishcytb_long_sum <- Fishcytb_long_sum %>%
  mutate(
    tow_type = substr(variable[,4], 1, 2),
    replicate = substr(variable[,4], 3, nchar(variable[,4]))
  )

split up year and month columns

Fishcytb_long_sum <- Fishcytb_long_sum %>%
  mutate(
    Year = substr(variable[,2], 1, 2),
    Month = substr(variable[,2], 3, 4)
  )

make station column

Fishcytb_long_sum$station<-Fishcytb_long_sum$variable[,3]

remove vertical samples

Fishcytb_long_sum<-subset(Fishcytb_long_sum,tow_type=="Ob")

merge replicates

colnames(Fishcytb_long_sum)
## [1] "Species"   "variable"  "value"     "tow_type"  "replicate" "Year"     
## [7] "Month"     "station"
Fishcytb_long_sum$sample<-paste(Fishcytb_long_sum$Year,Fishcytb_long_sum$Month,Fishcytb_long_sum$station,Fishcytb_long_sum$tow_type,sep="")

Fishcytb_long_sum <- Fishcytb_long_sum %>%
  group_by(Year,Month,station,sample,Species) %>%
  summarise(
    value = sum(value))
#convert back to presence-absence
Fishcytb_long_sum<-Fishcytb_long_sum %>%
  mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))

back to wide format

Fishcytb_wide<-dcast(Fishcytb_long_sum, sample+Year+Month+station ~ Species )

remove any columns with all zeroes

target_cols <- 6:ncol(Fishcytb_wide)

non_zero_cols <- target_cols[colSums(Fishcytb_wide[, target_cols]) != 0]

Fishcytb_wide <- Fishcytb_wide[, c(1:5, non_zero_cols)]

remove rows with all zeroes

row_sums <- rowSums(Fishcytb_wide[, 5:32])

# Create a logical vector to identify rows where the sum is not zero
non_zero_rows <- row_sums != 0

# Subset the data frame to keep only rows with a non-zero sum
Fishcytb_wide <- Fishcytb_wide[non_zero_rows, ]

remove environmental data

Fishcytb_wide_simple<-Fishcytb_wide[ , 5:32]

Make environmental columns categorical

# Specify the columns to convert to factors
cols_to_factor <- c("station", "Year", "Month")

# Convert the specified columns to factors
Fishcytb_wide <- Fishcytb_wide %>%
  mutate_at(vars(one_of(cols_to_factor)), as.factor)

PERMANOVA

dist<-vegdist(Fishcytb_wide_simple, method='bray')
perm<-adonis2(dist~station*Month, data=Fishcytb_wide, permutations = 999, method="bray")
perm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = dist ~ station * Month, data = Fishcytb_wide, permutations = 999, method = "bray")
##               Df SumOfSqs      R2      F Pr(>F)    
## station        6   3.3596 0.23778 2.8840  0.001 ***
## Month          3   2.9107 0.20601 4.9973  0.001 ***
## station:Month 13   3.5873 0.25390 1.4213  0.030 *  
## Residual      22   4.2714 0.30231                  
## Total         44  14.1289 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

station comparisons

adonis.pair(vegdist(Fishcytb_wide_simple),Fishcytb_wide$station)
##     combination SumsOfSqs   MeanSqs   F.Model         R2     P.value
## 1   P12 <-> P22 1.2570176 1.2570176 4.4154240 0.26898020 0.005994006
## 2   P12 <-> P28 0.5265152 0.5265152 1.3982861 0.13447275 0.175824176
## 3   P12 <-> P38 0.8039701 0.8039701 2.4763880 0.18375755 0.059940060
## 4    P12 <-> P4 0.9286159 0.9286159 3.7590368 0.27320494 0.021978022
## 5  P12 <-> P402 0.6744163 0.6744163 2.7202768 0.19826691 0.037962038
## 6    P12 <-> P8 1.1848082 1.1848082 4.3954958 0.30533827 0.004995005
## 7   P22 <-> P28 0.4252049 0.4252049 1.2291927 0.10051299 0.321678322
## 8   P22 <-> P38 0.4431602 0.4431602 1.4448029 0.10002233 0.190809191
## 9    P22 <-> P4 0.3801767 0.3801767 1.5804422 0.11637634 0.144855145
## 10 P22 <-> P402 0.3797361 0.3797361 1.5704575 0.10778368 0.189810190
## 11   P22 <-> P8 0.7501007 0.7501007 2.8926427 0.19423300 0.028971029
## 12  P28 <-> P38 0.3269048 0.3269048 0.8254992 0.07625507 0.608391608
## 13   P28 <-> P4 0.5022366 0.5022366 1.5808821 0.14940929 0.110889111
## 14 P28 <-> P402 0.3671006 0.3671006 1.1781033 0.10539385 0.322677323
## 15   P28 <-> P8 0.2708882 0.2708882 0.7904289 0.08073486 0.681318681
## 16   P38 <-> P4 0.3747189 0.3747189 1.3551981 0.10968647 0.237762238
## 17 P38 <-> P402 0.1571306 0.1571306 0.5716717 0.04547300 0.751248751
## 18   P38 <-> P8 0.3378666 0.3378666 1.1377013 0.09373285 0.344655345
## 19  P4 <-> P402 0.3196842 0.3196842 1.6002412 0.12700084 0.192807193
## 20    P4 <-> P8 0.7789558 0.7789558 3.5965172 0.26451753 0.005994006
## 21  P402 <-> P8 0.5935853 0.5935853 2.6951672 0.19679695 0.027972028
##    P.value.corrected
## 1         0.04195804
## 2         0.28921079
## 3         0.15734266
## 4         0.10139860
## 5         0.11388611
## 6         0.04195804
## 7         0.39860140
## 8         0.28921079
## 9         0.28921079
## 10        0.28921079
## 11        0.10139860
## 12        0.67243283
## 13        0.25874126
## 14        0.39860140
## 15        0.71538462
## 16        0.33286713
## 17        0.75124875
## 18        0.40209790
## 19        0.28921079
## 20        0.04195804
## 21        0.10139860

month comparisons

adonis.pair(vegdist(Fishcytb_wide_simple),Fishcytb_wide$Year)
##   combination SumsOfSqs   MeanSqs  F.Model         R2   P.value
## 1   18 <-> 19 0.3755977 0.3755977 1.298178 0.04587497 0.2447552
## 2   18 <-> 20 0.4379156 0.4379156 1.347149 0.04439128 0.2537463
## 3   19 <-> 20 0.5061051 0.5061051 1.530874 0.05183978 0.1598402
##   P.value.corrected
## 1         0.2537463
## 2         0.2537463
## 3         0.2537463

run NMDS

Fishcytb_NMDSmodel <- metaMDS(Fishcytb_wide_simple, distance = "bray",trymax = 999)
## Run 0 stress 0.1583673 
## Run 1 stress 0.1666955 
## Run 2 stress 0.1766177 
## Run 3 stress 0.1580672 
## ... New best solution
## ... Procrustes: rmse 0.1102832  max resid 0.3243578 
## Run 4 stress 0.1564601 
## ... New best solution
## ... Procrustes: rmse 0.1066288  max resid 0.3252197 
## Run 5 stress 0.1579185 
## Run 6 stress 0.1560572 
## ... New best solution
## ... Procrustes: rmse 0.09080185  max resid 0.3545808 
## Run 7 stress 0.1605663 
## Run 8 stress 0.1601261 
## Run 9 stress 0.1496531 
## ... New best solution
## ... Procrustes: rmse 0.1091031  max resid 0.3172181 
## Run 10 stress 0.1484116 
## ... New best solution
## ... Procrustes: rmse 0.09294735  max resid 0.3427064 
## Run 11 stress 0.1588653 
## Run 12 stress 0.1622118 
## Run 13 stress 0.1566133 
## Run 14 stress 0.1554879 
## Run 15 stress 0.1536969 
## Run 16 stress 0.1580743 
## Run 17 stress 0.1596336 
## Run 18 stress 0.1545446 
## Run 19 stress 0.154086 
## Run 20 stress 0.156618 
## Run 21 stress 0.161681 
## Run 22 stress 0.1581779 
## Run 23 stress 0.169016 
## Run 24 stress 0.158542 
## Run 25 stress 0.1636469 
## Run 26 stress 0.1496268 
## Run 27 stress 0.1545171 
## Run 28 stress 0.1502626 
## Run 29 stress 0.1615822 
## Run 30 stress 0.1527706 
## Run 31 stress 0.1540708 
## Run 32 stress 0.1850368 
## Run 33 stress 0.1670124 
## Run 34 stress 0.1653676 
## Run 35 stress 0.1503019 
## Run 36 stress 0.1536123 
## Run 37 stress 0.1494774 
## Run 38 stress 0.1497016 
## Run 39 stress 0.1592069 
## Run 40 stress 0.1643382 
## Run 41 stress 0.1584598 
## Run 42 stress 0.1516966 
## Run 43 stress 0.1578278 
## Run 44 stress 0.1631824 
## Run 45 stress 0.1701022 
## Run 46 stress 0.1524939 
## Run 47 stress 0.1602812 
## Run 48 stress 0.1609328 
## Run 49 stress 0.156683 
## Run 50 stress 0.148843 
## ... Procrustes: rmse 0.06957941  max resid 0.3816475 
## Run 51 stress 0.1546622 
## Run 52 stress 0.1639194 
## Run 53 stress 0.1570247 
## Run 54 stress 0.1541572 
## Run 55 stress 0.1582515 
## Run 56 stress 0.1581801 
## Run 57 stress 0.158738 
## Run 58 stress 0.1509434 
## Run 59 stress 0.1491922 
## Run 60 stress 0.1597418 
## Run 61 stress 0.1553132 
## Run 62 stress 0.1616525 
## Run 63 stress 0.1577165 
## Run 64 stress 0.1664544 
## Run 65 stress 0.1486091 
## ... Procrustes: rmse 0.02503328  max resid 0.1078624 
## Run 66 stress 0.1605558 
## Run 67 stress 0.1487986 
## ... Procrustes: rmse 0.06278361  max resid 0.3175811 
## Run 68 stress 0.1565008 
## Run 69 stress 0.1501814 
## Run 70 stress 0.1579488 
## Run 71 stress 0.1500097 
## Run 72 stress 0.1520563 
## Run 73 stress 0.1621642 
## Run 74 stress 0.1578314 
## Run 75 stress 0.1554758 
## Run 76 stress 0.1541767 
## Run 77 stress 0.1525381 
## Run 78 stress 0.1607285 
## Run 79 stress 0.1611361 
## Run 80 stress 0.1676238 
## Run 81 stress 0.1503132 
## Run 82 stress 0.1480305 
## ... New best solution
## ... Procrustes: rmse 0.01919301  max resid 0.1046834 
## Run 83 stress 0.1662694 
## Run 84 stress 0.1517157 
## Run 85 stress 0.1480314 
## ... Procrustes: rmse 0.0003021542  max resid 0.001639335 
## ... Similar to previous best
## *** Best solution repeated 1 times
Fishcytb_NMDSmodel
## 
## Call:
## metaMDS(comm = Fishcytb_wide_simple, distance = "bray", trymax = 999) 
## 
## global Multidimensional Scaling using monoMDS
## 
## Data:     Fishcytb_wide_simple 
## Distance: bray 
## 
## Dimensions: 2 
## Stress:     0.1480305 
## Stress type 1, weak ties
## Best solution was repeated 1 time in 85 tries
## The best solution was from try 82 (random start)
## Scaling: centring, PC rotation, halfchange scaling 
## Species: expanded scores based on 'Fishcytb_wide_simple'

get datascores

Fishcytb.data.scores<-scores(Fishcytb_NMDSmodel,display="sites")
Fishcytb.data.scores<- as.data.frame(Fishcytb.data.scores)

add environmental variables back in

# Extract the first 5 columns from df1
first_5_columns <- Fishcytb_wide[, 1:4]

# Combine the first 5 columns from df1 with df2
Fishcytb.data.scores <- cbind(first_5_columns, Fishcytb.data.scores)

plot NMDS

plot datascores

xx <- ggplot(Fishcytb.data.scores, aes(x = NMDS1, y = NMDS2,colour = station)) +geom_point(aes(colour = station))+theme_bw()+theme(panel.grid.major = element_blank(),
                                                                                           panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
  
xx

for the plot I had to zoom way in, because there were 3 outliers. I think there isn’t enough data for nmds here

extract hulls

unique(Fishcytb.data.scores$station)
## [1] P12  P22  P28  P38  P402 P4   P8  
## Levels: P12 P22 P28 P38 P4 P402 P8
P12 <- Fishcytb.data.scores[Fishcytb.data.scores$station == "P12", ][chull(Fishcytb.data.scores[Fishcytb.data.scores$station ==                                                           "P12", c("NMDS1", "NMDS2")]), ]  
P22 <- Fishcytb.data.scores[Fishcytb.data.scores$station == "P22", ][chull(Fishcytb.data.scores[Fishcytb.data.scores$station ==                                                           "P22", c("NMDS1", "NMDS2")]), ]  
P28 <- Fishcytb.data.scores[Fishcytb.data.scores$station == "P28", ][chull(Fishcytb.data.scores[Fishcytb.data.scores$station ==                                                           "P28", c("NMDS1", "NMDS2")]), ]
P38 <- Fishcytb.data.scores[Fishcytb.data.scores$station == "P38", ][chull(Fishcytb.data.scores[Fishcytb.data.scores$station ==                                                           "P38", c("NMDS1", "NMDS2")]), ]  
P4 <- Fishcytb.data.scores[Fishcytb.data.scores$station == "P4", ][chull(Fishcytb.data.scores[Fishcytb.data.scores$station ==                                                           "P4", c("NMDS1", "NMDS2")]), ]  
P402 <- Fishcytb.data.scores[Fishcytb.data.scores$station == "P402", ][chull(Fishcytb.data.scores[Fishcytb.data.scores$station ==                                                           "P402", c("NMDS1", "NMDS2")]), ]  
P8 <- Fishcytb.data.scores[Fishcytb.data.scores$station == "P8", ][chull(Fishcytb.data.scores[Fishcytb.data.scores$station ==                                                           "P8", c("NMDS1", "NMDS2")]), ]  

get hull data

hull.data <- rbind(P12,P22,P28,P38,P402,P4,P8)  #combine grp.a and grp.b
hull.data
##        sample Year Month station       NMDS1       NMDS2
## 26  1907P12Ob   19    07     P12  1.02527080  0.35531582
## 8   1807P12Ob   18    07     P12  1.02527080  0.35531582
## 1   1804P12Ob   18    04     P12 -0.84854566  0.45664004
## 20  1904P12Ob   19    04     P12 -1.49236325  0.61629606
## 35  2007P12Ob   20    07     P12  0.49561271  0.60914544
## 27  1907P22Ob   19    07     P22  0.80952364 -0.35360545
## 36  2007P22Ob   20    07     P22 -0.45130300 -1.19501852
## 2   1804P22Ob   18    04     P22 -0.79137166 -0.45872049
## 43  2009P22Ob   20    09     P22 -0.54992194  0.78083456
## 14  1809P22Ob   18    09     P22  0.23304934  0.16725370
## 37  2007P28Ob   20    07     P28  0.60199652 -0.48057934
## 50  2010P28Ob   20    10     P28 -0.60428744 -1.05703984
## 3   1804P28Ob   18    04     P28 -0.73388498  0.08992029
## 44  2009P28Ob   20    09     P28 -0.09091469  1.00569721
## 9   1807P28Ob   18    07     P28  1.02527080  0.35531582
## 45  2009P38Ob   20    09     P38  0.50662348 -0.74041767
## 16  1809P38Ob   18    09     P38  0.20564989 -0.63980523
## 22  1904P38Ob   19    04     P38 -0.49053664 -0.37742784
## 4   1804P38Ob   18    04     P38 -0.95301882  0.39258863
## 33  1909P38Ob   19    09     P38  0.56019960  1.00316286
## 38  2007P38Ob   20    07     P38  0.68039527  0.07712894
## 10 1807P402Ob   18    07    P402  0.34463063 -0.19932061
## 46 2009P402Ob   20    09    P402 -0.01616893 -0.23459010
## 5  1804P402Ob   18    04    P402 -0.51998445  0.33170597
## 39 2007P402Ob   20    07    P402  0.49157829  0.27504512
## 29 1907P402Ob   19    07    P402  0.68039527  0.07712894
## 30   1907P4Ob   19    07      P4  0.05653786  0.25512507
## 6    1804P4Ob   18    04      P4 -0.83124451 -0.09560046
## 47   2009P4Ob   20    09      P4 -0.68783155  0.63334617
## 40   2007P4Ob   20    07      P4  0.04169239  0.49716243
## 48   2009P8Ob   20    09      P8  0.45606155 -1.31707329
## 7    1804P8Ob   18    04      P8 -0.33120097 -0.61879064
## 34   1909P8Ob   19    09      P8 -0.21672482 -0.18032775
## 25   1904P8Ob   19    04      P8  0.80952364 -0.35360545

Species fitting

vf <- envfit(Fishcytb_NMDSmodel, Fishcytb_wide, perm = 999)

spp.scrs <- as.data.frame(scores(vf, display = "vectors"))
spp.scrs <- cbind(spp.scrs, Species = rownames(spp.scrs))
####for ggplot
arrow_factor <- ordiArrowMul(vf)
spp.scrs <- as.data.frame(scores(vf, display = "vectors")) * arrow_factor
spp.scrs <- cbind(spp.scrs, Species = rownames(spp.scrs), Pvalues = vf$vectors$pvals, R_squared = vf$vectors$r)

# select significance similarly to `plot(vf, p.max = 0.01)`
spp.scrs <- subset(spp.scrs, Pvalues < 0.05)
spp.scrs <- subset(spp.scrs, R_squared > 0.3)
spp.scrs
##                        NMDS1      NMDS2           Species Pvalues R_squared
## Clupea pallasii   -0.3951285 -0.7922745   Clupea pallasii   0.001 0.4200226
## Engraulis mordax   0.7500000  0.3024734  Engraulis mordax   0.001 0.3504487
## Parophrys vetulus -0.7742137  0.3349948 Parophrys vetulus   0.001 0.3813349
Fishcytb_Species_plot <- ggplot(Fishcytb.data.scores, aes(x = NMDS1, y = NMDS2)) +geom_point(aes(colour =  station))+theme_bw()+theme(panel.grid.major = element_blank(),
                                                                                           panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+
  geom_segment(data = spp.scrs,size=0.4,
               aes(x = 0, xend = NMDS1, y = 0, yend = NMDS2),
               arrow = arrow(length = unit(0.2, "cm")), colour = "darkgrey")+ 
  geom_polygon(data=hull.data,aes(x=NMDS1,y=NMDS2,fill=station,group=station),alpha=0.20, size=0.1, linetype=1, colour="black") +
  annotate("text", label = "2D Stress: 0.147", x = 0.55, y = 0.67, size = 5, colour = "black")+
  geom_label_repel(data = spp.scrs, aes(x = NMDS1, y = NMDS2, label = Species),
                  size = 3, fontface="bold", fill="white", label.padding = unit(0.15, "lines"), box.padding = unit(0.16, "lines"), label.size = 0.05)
Fishcytb_Species_plot

save plot

setwd("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-3/output")
ggsave(filename = "Fishcytb_NMDS_oblique.png", plot = Fishcytb_Species_plot, height = 170, width = 200, units="mm", device='png', dpi=300)