The purpose of this code is to Run NMDS ordinations for each metabarcoding marker to determine which species are influential and may be interesting to include in mock community analysis. This file is for the oblique tows. There is another file for the vertical tows. I decided to remove stations that were not strictly WOAC stations. Replicates were also combined so that any species that was found in either of the replicates was counted. NMDS plots are made using presence-absence data due to concerns about amplification bias. For taxon-specific markers, I decided to subset the data to that specific taxon.
library(stringr)
library(vegan)
library(EcolUtils)
library(ggplot2)
library(ggrepel)
library(dplyr)
library(reshape2)
Import ASV count tables - I downloaded the tsv and csv files from google drive into the data directory
files_to_read <- list.files(path = "/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-3/data/",
pattern = "\\.tsv$", full.names = TRUE)
# Read each file and assign to a variable named after the file
for (file in files_to_read) {
# Read the file into a data frame
df <- read.table(file = file, sep = '\t', header = TRUE)
# Rename the column "x" to "ASV" if it exists
if ("x" %in% colnames(df)) {
colnames(df)[colnames(df) == "x"] <- "ASV"
}
# Create a variable name from the filename
# Remove the directory path and file extension, replace spaces with underscores
var_name <- gsub("\\.tsv$", "", basename(file))
var_name <- gsub(" ", "_", var_name)
# Assign the data frame to a variable with the created name
assign(var_name, df)
}
Import ASV to taxonomy tables
files_to_read <- list.files(path = "/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-3/data/",
pattern = "\\.txt$", full.names = TRUE)
# Read each file and assign to a variable named after the file
for (file in files_to_read) {
# Read the file into a data frame
df <- read.table(file = file, sep = '\t', header = TRUE)
# Create a variable name from the filename
# Remove the directory path and file extension, replace spaces with underscores
var_name <- gsub("\\.txt$", "", basename(file))
var_name <- gsub(" ", "_", var_name)
# Assign the data frame to a variable with the created name
assign(var_name, df)
}
add taxa ids to count data
M28S_counts <- merge(RV_28S_asvTaxonomyTable, marker_28S_ASVs_counts, by = "ASV", all = TRUE)
COI_counts <- merge(RV_COI_asvTaxonomyTable, COI_ASVs_counts, by = "ASV", all = TRUE)
Cop16S_counts <- merge(RV_Cop16S_asvTaxonomyTable, Cop16S_ASVs_counts, by = "ASV", all = TRUE)
Fishcytb_counts <- merge(RV_Fishcytb_asvTaxonomyTable, Fishcytb_ASVs_counts, by = "ASV", all = TRUE)
Machida18S_counts <- merge(RV_Machida18S_asvTaxonomyTable, Machida18S_ASVs_counts, by = "ASV", all = TRUE)
Mol16S_counts <- merge(RV_Mol16S_asvTaxonomyTable, Mol16S_ASVs_counts, by = "ASV", all = TRUE)
Remove non-zooplanton phyla
unique(COI_counts$Phylum)
## [1] "Arthropoda" "Bacillariophyta"
## [3] "Bryozoa" "Mollusca"
## [5] "Rhodophyta" NA
## [7] "Echinodermata" "Annelida"
## [9] "Cnidaria" "Platyhelminthes"
## [11] "Oomycota" "Chordata"
## [13] "Chaetognatha" "Nemertea"
## [15] "Ctenophora" "Ascomycota"
## [17] "Onychophora" "Cryptophyceae__p"
## [19] "Zoopagomycota" "Rotifera"
## [21] "Phaeophyceae__p" "Haptophyta"
## [23] "Basidiomycota" "Chlorophyta"
## [25] "Dinophyceae__p" "Phoronida"
## [27] "Discosea" "Porifera"
## [29] "Tubulinea" "Dictyochophyceae__p"
## [31] "Bacteroidota" "Blastocladiomycota"
## [33] "Pseudomonadota" "Chrysophyceae__p"
## [35] "Nematoda" "Malawimonadida__c__p"
## [37] "Prasinodermophyta" "Chrysoparadoxa__f__o__c__p"
## [39] "Apusomonadida__c__p" "Streptophyta"
## [41] "Pelagophyceae__p" "Eustigmatophyceae__p"
levels_to_remove <- c('Bacillariophyta', 'Rhodophyta',NA,'Oomycota','Ascomycota','Onychophora','Cryptophyceae__p','Zoopagomycota','Phaeophyceae__p','Haptophyta','Basidiomycota','Chlorophyta','Dinophyceae__p','Discosea','Tubulinea','Dictyochophyceae__p','Bacteroidota','Blastocladiomycota','Pseudomonadota','Chrysophyceae__p','Malawimonadida__c__p','Prasinodermophyta','Chrysoparadoxa__f__o__c__p','Apusomonadida__c__p','Streptophyta','Pelagophyceae__p','Eustigmatophyceae__p')
# Remove those levels and drop them from the factor
COI_counts <- droplevels(COI_counts[!COI_counts$Phylum %in% levels_to_remove,])
remove non-species rows
COI_comp<- COI_counts[complete.cases(COI_counts), ]
convert to presence-absence
COI_comp_pa<-COI_comp %>%
mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))
remove excess columns
COI_comp_pa <- COI_comp_pa[ -c(1:7) ]
wide to long format
COI_long<-melt(COI_comp_pa, id.vars=c("Species"))
add up multiple lines per station and convert back to presence-absence
COI_long_sum <- COI_long %>%
group_by(Species,variable) %>%
summarise(
value = sum(value))
COI_long_sum<-COI_long_sum %>%
mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))
split code string up to extract metadata
COI_long_sum$variable<-str_split_fixed(COI_long_sum$variable,"_",4)
remove samples of non-interest
unique(COI_long_sum$variable[,3])
## [1] "P12" "P22" "P28" "P38" "P402" "P4" "P8" "CA042" "P105"
## [10] "P123" "P132" "P136" "P381" "P7" ""
unique(COI_long_sum$variable[,2])
## [1] "1804" "1806" "1807" "1809" "1904" "1907"
## [7] "1909" "2007" "2009" "2010" "Control1" "Control2"
remove excess stations
levels_to_remove <- c('CA042','',"P105","P123","P132","P136","P381","P7")
COI_long_sum <- droplevels(COI_long_sum[!COI_long_sum$variable[,3] %in% levels_to_remove,])
split up replicate columns
unique(COI_long_sum$variable[,4])
## [1] "VeA" "Ve" "ObB" "ObA" "Ob" "VeB"
COI_long_sum <- COI_long_sum %>%
mutate(
tow_type = substr(variable[,4], 1, 2),
replicate = substr(variable[,4], 3, nchar(variable[,4]))
)
split up year and month columns
COI_long_sum <- COI_long_sum %>%
mutate(
Year = substr(variable[,2], 1, 2),
Month = substr(variable[,2], 3, 4)
)
make station column
COI_long_sum$station<-COI_long_sum$variable[,3]
remove vertical samples
COI_long_sum<-subset(COI_long_sum,tow_type=="Ob")
merge replicates
colnames(COI_long_sum)
## [1] "Species" "variable" "value" "tow_type" "replicate" "Year"
## [7] "Month" "station"
COI_long_sum$sample<-paste(COI_long_sum$Year,COI_long_sum$Month,COI_long_sum$station,COI_long_sum$tow_type,sep="")
COI_long_sum <- COI_long_sum %>%
group_by(Year,Month,station,sample,Species) %>%
summarise(
value = sum(value))
#convert back to presence-absence
COI_long_sum<-COI_long_sum %>%
mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))
back to wide format
COI_wide<-dcast(COI_long_sum, sample+Year+Month+station ~ Species )
remove any columns with all zeroes
target_cols <- 6:ncol(COI_wide)
non_zero_cols <- target_cols[colSums(COI_wide[, target_cols]) != 0]
COI_wide <- COI_wide[, c(1:5, non_zero_cols)]
remove environmental data
COI_wide_simple<-COI_wide[ , 5:231]
Make environmental columns categorical
# Specify the columns to convert to factors
cols_to_factor <- c("station", "Year", "Month")
# Convert the specified columns to factors
COI_wide <- COI_wide %>%
mutate_at(vars(one_of(cols_to_factor)), as.factor)
dist<-vegdist(COI_wide_simple, method='bray')
perm<-adonis2(dist~station*Month, data=COI_wide, permutations = 999, method="bray")
perm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist ~ station * Month, data = COI_wide, permutations = 999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## station 6 3.3012 0.35816 5.3775 0.001 ***
## Month 3 1.1019 0.11955 3.5899 0.001 ***
## station:Month 13 1.6423 0.17818 1.2348 0.044 *
## Residual 31 3.1717 0.34411
## Total 53 9.2171 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
station comparisons
adonis.pair(vegdist(COI_wide_simple),COI_wide$station)
## combination SumsOfSqs MeanSqs F.Model R2 P.value
## 1 P12 <-> P22 0.8441661 0.8441661 6.818192 0.3440370 0.001998002
## 2 P12 <-> P28 0.3392037 0.3392037 2.732722 0.1633161 0.000999001
## 3 P12 <-> P38 0.4858547 0.4858547 3.715211 0.2364086 0.002997003
## 4 P12 <-> P4 0.6119630 0.6119630 4.910171 0.2741555 0.000999001
## 5 P12 <-> P402 0.6108084 0.6108084 4.372799 0.2670770 0.003996004
## 6 P12 <-> P8 0.7420610 0.7420610 5.163161 0.2842656 0.000999001
## 7 P22 <-> P28 0.6673237 0.6673237 5.954230 0.2841541 0.000999001
## 8 P22 <-> P38 0.5308458 0.5308458 4.562179 0.2597730 0.000999001
## 9 P22 <-> P4 0.7407450 0.7407450 6.632524 0.3214596 0.000999001
## 10 P22 <-> P402 0.7811113 0.7811113 6.269866 0.3253716 0.000999001
## 11 P22 <-> P8 0.2256816 0.2256816 1.743919 0.1107678 0.051948052
## 12 P28 <-> P38 0.3680673 0.3680673 3.140342 0.1832135 0.000999001
## 13 P28 <-> P4 0.7315381 0.7315381 6.486016 0.3018715 0.000999001
## 14 P28 <-> P402 0.7381860 0.7381860 5.912936 0.2969394 0.000999001
## 15 P28 <-> P8 0.5058739 0.5058739 3.911434 0.2068290 0.001998002
## 16 P38 <-> P4 0.3524163 0.3524163 3.007509 0.1878812 0.000999001
## 17 P38 <-> P402 0.3856336 0.3856336 2.930131 0.1962562 0.005994006
## 18 P38 <-> P8 0.2911071 0.2911071 2.136263 0.1411354 0.009990010
## 19 P4 <-> P402 0.4367466 0.4367466 3.482754 0.2112968 0.000999001
## 20 P4 <-> P8 0.5927355 0.5927355 4.553449 0.2454233 0.001998002
## 21 P402 <-> P8 0.5467140 0.5467140 3.783666 0.2254374 0.002997003
## P.value.corrected
## 1 0.002797203
## 2 0.001748252
## 3 0.003702180
## 4 0.001748252
## 5 0.004662005
## 6 0.001748252
## 7 0.001748252
## 8 0.001748252
## 9 0.001748252
## 10 0.001748252
## 11 0.051948052
## 12 0.001748252
## 13 0.001748252
## 14 0.001748252
## 15 0.002797203
## 16 0.001748252
## 17 0.006624954
## 18 0.010489510
## 19 0.001748252
## 20 0.002797203
## 21 0.003702180
month comparisons
adonis.pair(vegdist(COI_wide_simple),COI_wide$Month)
## combination SumsOfSqs MeanSqs F.Model R2 P.value
## 1 04 <-> 07 0.5235489 0.5235489 3.5498196 0.10580741 0.000999001
## 2 04 <-> 09 0.7506380 0.7506380 4.3752162 0.12368027 0.000999001
## 3 04 <-> 10 0.2498979 0.2498979 1.5026453 0.10361181 0.104895105
## 4 07 <-> 09 0.2884489 0.2884489 1.8093841 0.04662233 0.047952048
## 5 07 <-> 10 0.1392565 0.1392565 0.9649775 0.04833351 0.494505495
## 6 09 <-> 10 0.1370137 0.1370137 0.7536787 0.03631543 0.680319680
## P.value.corrected
## 1 0.002997003
## 2 0.002997003
## 3 0.157342657
## 4 0.095904096
## 5 0.593406593
## 6 0.680319680
COI_NMDSmodel <- metaMDS(COI_wide_simple, distance = "bray",trymax = 999)
## Run 0 stress 0.226526
## Run 1 stress 0.2447922
## Run 2 stress 0.252478
## Run 3 stress 0.2346737
## Run 4 stress 0.2285766
## Run 5 stress 0.25453
## Run 6 stress 0.2276202
## Run 7 stress 0.2384844
## Run 8 stress 0.2350877
## Run 9 stress 0.226526
## ... Procrustes: rmse 2.106009e-05 max resid 0.0001057249
## ... Similar to previous best
## Run 10 stress 0.2592979
## Run 11 stress 0.2296912
## Run 12 stress 0.2504095
## Run 13 stress 0.2517093
## Run 14 stress 0.2485717
## Run 15 stress 0.2713318
## Run 16 stress 0.2574965
## Run 17 stress 0.2265261
## ... Procrustes: rmse 6.446179e-05 max resid 0.0003724996
## ... Similar to previous best
## Run 18 stress 0.2327687
## Run 19 stress 0.2704882
## Run 20 stress 0.2276842
## *** Best solution repeated 2 times
COI_NMDSmodel
##
## Call:
## metaMDS(comm = COI_wide_simple, distance = "bray", trymax = 999)
##
## global Multidimensional Scaling using monoMDS
##
## Data: COI_wide_simple
## Distance: bray
##
## Dimensions: 2
## Stress: 0.226526
## Stress type 1, weak ties
## Best solution was repeated 2 times in 20 tries
## The best solution was from try 0 (metric scaling or null solution)
## Scaling: centring, PC rotation, halfchange scaling
## Species: expanded scores based on 'COI_wide_simple'
get datascores
COI.data.scores<-scores(COI_NMDSmodel,display="sites")
COI.data.scores<- as.data.frame(COI.data.scores)
add environmental variables back in
# Extract the first 5 columns from df1
first_5_columns <- COI_wide[, 1:4]
# Combine the first 5 columns from df1 with df2
COI.data.scores <- cbind(first_5_columns, COI.data.scores)
plot datascores
xx <- ggplot(COI.data.scores, aes(x = NMDS1, y = NMDS2,colour = station)) +geom_point(aes(colour = station))+theme_bw()+theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
xx
extract hulls
unique(COI.data.scores$station)
## [1] P22 P28 P38 P402 P4 P8 P12
## Levels: P12 P22 P28 P38 P4 P402 P8
P12 <- COI.data.scores[COI.data.scores$station == "P12", ][chull(COI.data.scores[COI.data.scores$station == "P12", c("NMDS1", "NMDS2")]), ]
P22 <- COI.data.scores[COI.data.scores$station == "P22", ][chull(COI.data.scores[COI.data.scores$station == "P22", c("NMDS1", "NMDS2")]), ]
P28 <- COI.data.scores[COI.data.scores$station == "P28", ][chull(COI.data.scores[COI.data.scores$station == "P28", c("NMDS1", "NMDS2")]), ]
P38 <- COI.data.scores[COI.data.scores$station == "P38", ][chull(COI.data.scores[COI.data.scores$station == "P38", c("NMDS1", "NMDS2")]), ]
P4 <- COI.data.scores[COI.data.scores$station == "P4", ][chull(COI.data.scores[COI.data.scores$station == "P4", c("NMDS1", "NMDS2")]), ]
P402 <- COI.data.scores[COI.data.scores$station == "P402", ][chull(COI.data.scores[COI.data.scores$station == "P402", c("NMDS1", "NMDS2")]), ]
P8 <- COI.data.scores[COI.data.scores$station == "P8", ][chull(COI.data.scores[COI.data.scores$station == "P8", c("NMDS1", "NMDS2")]), ]
get hull data
hull.data <- rbind(P12,P22,P28,P38,P402,P4,P8) #combine grp.a and grp.b
hull.data
## sample Year Month station NMDS1 NMDS2
## 12 1809P12Ob 18 09 P12 0.713489965 -0.342848681
## 33 1909P12Ob 19 09 P12 0.455358562 -0.311146940
## 19 1904P12Ob 19 04 P12 0.170347293 0.133811623
## 39 2007P12Ob 20 07 P12 0.457820765 0.104799253
## 26 1907P12Ob 19 07 P12 0.598141034 -0.004805131
## 1 1804P22Ob 18 04 P22 -0.387878578 -0.532164753
## 20 1904P22Ob 19 04 P22 -0.499709794 -0.513621197
## 53 2010P22Ob 20 10 P22 -0.584301914 -0.126212631
## 47 2009P22Ob 20 09 P22 -0.443064235 -0.119353116
## 40 2007P22Ob 20 07 P22 -0.296111165 -0.120300088
## 13 1809P22Ob 18 09 P22 -0.120729672 -0.153832973
## 27 1907P22Ob 19 07 P22 -0.127814701 -0.196333318
## 14 1809P28Ob 18 09 P28 0.402913089 -0.505697376
## 21 1904P28Ob 19 04 P28 -0.102549744 -0.350172019
## 41 2007P28Ob 20 07 P28 -0.057103259 -0.204335220
## 8 1807P28Ob 18 07 P28 0.171593110 -0.098777628
## 54 2010P28Ob 20 10 P28 0.367005687 -0.250153935
## 36 1909P38Ob 19 09 P38 0.188327608 -0.070878325
## 22 1904P38Ob 19 04 P38 0.145904772 -0.160418774
## 42 2007P38Ob 20 07 P38 -0.229100826 0.065102238
## 49 2009P38Ob 20 09 P38 -0.309460006 0.261420853
## 15 1809P38Ob 18 09 P38 0.055886735 0.417824116
## 16 1809P402Ob 18 09 P402 0.336710849 0.079483228
## 30 1907P402Ob 19 07 P402 -0.066377474 0.073864091
## 9 1807P402Ob 18 07 P402 -0.356591639 0.307195379
## 23 1904P402Ob 19 04 P402 -0.378868662 0.551504499
## 50 2009P402Ob 20 09 P402 -0.298333464 0.737893937
## 4 1804P402Ob 18 04 P402 -0.012364394 0.637948606
## 17 1809P4Ob 18 09 P4 0.560735958 0.290274064
## 44 2007P4Ob 20 07 P4 0.004194868 0.180514980
## 31 1907P4Ob 19 07 P4 -0.049093821 0.174387598
## 24 1904P4Ob 19 04 P4 -0.100773759 0.357828001
## 37 1909P4Ob 19 09 P4 0.153803605 0.454601739
## 32 1907P8Ob 19 07 P8 -0.020162991 -0.159863266
## 6 1804P8Ob 18 04 P8 -0.464154873 -0.367341837
## 52 2009P8Ob 20 09 P8 -0.555890115 0.012938163
## 18 1809P8Ob 18 09 P8 0.537098993 0.418450707
vf <- envfit(COI_NMDSmodel, COI_wide, perm = 999)
spp.scrs <- as.data.frame(scores(vf, display = "vectors"))
spp.scrs <- cbind(spp.scrs, Species = rownames(spp.scrs))
####for ggplot
arrow_factor <- ordiArrowMul(vf)
spp.scrs <- as.data.frame(scores(vf, display = "vectors")) * arrow_factor
spp.scrs <- cbind(spp.scrs, Species = rownames(spp.scrs), Pvalues = vf$vectors$pvals, R_squared = vf$vectors$r)
# select significance similarly to `plot(vf, p.max = 0.01)`
spp.scrs <- subset(spp.scrs, Pvalues < 0.05)
spp.scrs <- subset(spp.scrs, R_squared > 0.3)
spp.scrs
## NMDS1 NMDS2 Species
## Acartia longiremis -0.4822590 -0.4714815 Acartia longiremis
## Balanus glandula -0.5855395 0.3388121 Balanus glandula
## Balanus rostratus -0.3268131 -0.5340929 Balanus rostratus
## Crangon crangon -0.5018258 0.4302424 Crangon crangon
## Discorsopagurus schmitti -0.4134237 -0.4797192 Discorsopagurus schmitti
## Eucalanus bungii -0.3503248 -0.5593713 Eucalanus bungii
## Fabia subquadrata -0.6405165 -0.5734933 Fabia subquadrata
## Glebocarcinus oregonensis -0.4339432 -0.5031502 Glebocarcinus oregonensis
## Heptacarpus tridens -0.4134237 -0.4797192 Heptacarpus tridens
## Lepidasthenia berkeleyae -0.2436771 0.7500000 Lepidasthenia berkeleyae
## Metacarcinus gracilis -0.5549191 -0.3089166 Metacarcinus gracilis
## Metridia pacifica 0.5004458 -0.3889989 Metridia pacifica
## Pagurus beringanus -0.5880015 -0.2561552 Pagurus beringanus
## Paraeuchaeta elongata 0.6999164 -0.0342669 Paraeuchaeta elongata
## Pasiphaea pacifica 0.4645366 -0.5290621 Pasiphaea pacifica
## Pseudocalanus moultoni -0.6108149 -0.5860453 Pseudocalanus moultoni
## Rathkea octopunctata -0.2036083 0.6974968 Rathkea octopunctata
## Sarsia bella -0.4134237 -0.4797192 Sarsia bella
## Scyra acutifrons -0.4922006 -0.5224270 Scyra acutifrons
## Stomotoca atra -0.5394189 -0.4749028 Stomotoca atra
## Pvalues R_squared
## Acartia longiremis 0.001 0.3511338
## Balanus glandula 0.001 0.3532810
## Balanus rostratus 0.001 0.3026506
## Crangon crangon 0.001 0.3372921
## Discorsopagurus schmitti 0.001 0.3095886
## Eucalanus bungii 0.001 0.3362778
## Fabia subquadrata 0.001 0.5705882
## Glebocarcinus oregonensis 0.001 0.3407883
## Heptacarpus tridens 0.001 0.3095886
## Lepidasthenia berkeleyae 0.001 0.4800564
## Metacarcinus gracilis 0.001 0.3113756
## Metridia pacifica 0.001 0.3101416
## Pagurus beringanus 0.001 0.3175485
## Paraeuchaeta elongata 0.001 0.3790695
## Pasiphaea pacifica 0.001 0.3826542
## Pseudocalanus moultoni 0.001 0.5531329
## Rathkea octopunctata 0.001 0.4075549
## Sarsia bella 0.001 0.3095886
## Scyra acutifrons 0.001 0.3976998
## Stomotoca atra 0.001 0.3987141
COI_Species_plot <- ggplot(COI.data.scores, aes(x = NMDS1, y = NMDS2)) +geom_point(aes(colour = station))+theme_bw()+theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+
geom_segment(data = spp.scrs,size=0.4,
aes(x = 0, xend = NMDS1, y = 0, yend = NMDS2),
arrow = arrow(length = unit(0.2, "cm")), colour = "darkgrey")+
geom_polygon(data=hull.data,aes(x=NMDS1,y=NMDS2,fill=station,group=station),alpha=0.20, size=0.1, linetype=1, colour="black") +
annotate("text", label = "2D Stress: 0.227", x = 0.55, y = 0.8, size = 5, colour = "black")+
geom_label_repel(data = spp.scrs, aes(x = NMDS1, y = NMDS2, label = Species),
size = 2, fontface="bold", fill="white", label.padding = unit(0.15, "lines"), box.padding = unit(0.01, "lines"), label.size = 0.02)
COI_Species_plot
save plot
setwd("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-3/output")
ggsave(filename = "COI_NMDS_oblique.png", plot = COI_Species_plot, height = 170, width = 200, units="mm", device='png', dpi=300)
Remove non-zooplanton phyla
unique(Cop16S_counts$Phylum)
## [1] "Arthropoda" NA "Mollusca" "Phoronida"
## [5] "Annelida" "Nemertea" "Cnidaria" "Chaetognatha"
## [9] "Echinodermata" "Nematoda" "Bryozoa" "Acanthocephala"
## [13] "Platyhelminthes"
levels_to_remove <- c('Bacillariophyta', 'Rhodophyta',NA,'Oomycota','Ascomycota','Onychophora','Cryptophyceae__p','Zoopagomycota','Phaeophyceae__p','Haptophyta','Basidiomycota','Chlorophyta','Dinophyceae__p','Discosea','Tubulinea','Dictyochophyceae__p','Bacteroidota','Blastocladiomycota','Pseudomonadota','Chrysophyceae__p','Malawimonadida__c__p','Prasinodermophyta','Chrysoparadoxa__f__o__c__p','Apusomonadida__c__p','Streptophyta','Pelagophyceae__p','Eustigmatophyceae__p')
# Remove those levels and drop them from the factor
Cop16S_counts <- droplevels(Cop16S_counts[!Cop16S_counts$Phylum %in% levels_to_remove,])
subset to copepods only
Cop16S_counts<-subset(Cop16S_counts,Class=="Hexanauplia")
remove non-species rows
Cop16S_comp<- Cop16S_counts[complete.cases(Cop16S_counts), ]
convert to presence-absence
Cop16S_comp_pa<-Cop16S_comp %>%
mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))
remove excess columns
Cop16S_comp_pa <- Cop16S_comp_pa[ -c(1:7) ]
wide to long format
Cop16S_long<-melt(Cop16S_comp_pa, id.vars=c("Species"))
add up multiple lines per station and convert back to presence-absence
Cop16S_long_sum <- Cop16S_long %>%
group_by(Species,variable) %>%
summarise(
value = sum(value))
Cop16S_long_sum<-Cop16S_long_sum %>%
mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))
split code string up to extract metadata
Cop16S_long_sum$variable<-str_split_fixed(Cop16S_long_sum$variable,"_",4)
remove samples of non-interest
unique(Cop16S_long_sum$variable[,3])
## [1] "P12" "P22" "P28" "P38" "P402" "P4" "P8" "CA042" "P105"
## [10] "P123" "P132" "P136" "P381" "P7" ""
unique(Cop16S_long_sum$variable[,2])
## [1] "1804" "1806" "1807" "1809" "1904" "1907"
## [7] "1909" "2007" "2009" "2010" "Control1" "Control2"
#remove excess stations
levels_to_remove <- c('CA042','',"P105","P123","P132","P136","P381","P7")
Cop16S_long_sum <- droplevels(Cop16S_long_sum[!Cop16S_long_sum$variable[,3] %in% levels_to_remove,])
split up replicate columns
unique(Cop16S_long_sum$variable[,4])
## [1] "Ob" "Ve" "ObB" "VeA" "ObA" "VeB"
Cop16S_long_sum <- Cop16S_long_sum %>%
mutate(
tow_type = substr(variable[,4], 1, 2),
replicate = substr(variable[,4], 3, nchar(variable[,4]))
)
split up year and month columns
Cop16S_long_sum <- Cop16S_long_sum %>%
mutate(
Year = substr(variable[,2], 1, 2),
Month = substr(variable[,2], 3, 4)
)
make station column
Cop16S_long_sum$station<-Cop16S_long_sum$variable[,3]
remove vertical samples
Cop16S_long_sum<-subset(Cop16S_long_sum,tow_type=="Ob")
merge replicates (and oblique and vertical tows)
colnames(Cop16S_long_sum)
## [1] "Species" "variable" "value" "tow_type" "replicate" "Year"
## [7] "Month" "station"
Cop16S_long_sum$sample<-paste(Cop16S_long_sum$Year,Cop16S_long_sum$Month,Cop16S_long_sum$station,Cop16S_long_sum$tow_type,sep="")
Cop16S_long_sum <- Cop16S_long_sum %>%
group_by(Year,Month,station,sample,Species) %>%
summarise(
value = sum(value))
#convert back to presence-absence
Cop16S_long_sum<-Cop16S_long_sum %>%
mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))
back to wide format
Cop16S_wide<-dcast(Cop16S_long_sum, sample+Year+Month+station ~ Species )
remove any columns with all zeroes
target_cols <- 6:ncol(Cop16S_wide)
non_zero_cols <- target_cols[colSums(Cop16S_wide[, target_cols]) != 0]
Cop16S_wide <- Cop16S_wide[, c(1:5, non_zero_cols)]
remove environmental data
Cop16S_wide_simple<-Cop16S_wide[ , 5:17]
Make environmental columns categorical
# Specify the columns to convert to factors
cols_to_factor <- c("station", "Year", "Month")
# Convert the specified columns to factors
Cop16S_wide <- Cop16S_wide %>%
mutate_at(vars(one_of(cols_to_factor)), as.factor)
dist<-vegdist(Cop16S_wide_simple, method='bray')
perm<-adonis2(dist~station*Month, data=Cop16S_wide, permutations = 999, method="bray")
perm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist ~ station * Month, data = Cop16S_wide, permutations = 999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## station 6 0.82213 0.32630 3.6804 0.001 ***
## Month 3 0.19740 0.07835 1.7673 0.099 .
## station:Month 13 0.60649 0.24072 1.2531 0.229
## Residual 24 0.89352 0.35464
## Total 46 2.51954 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
station comparisons
adonis.pair(vegdist(Cop16S_wide_simple),Cop16S_wide$station)
## combination SumsOfSqs MeanSqs F.Model R2 P.value
## 1 P12 <-> P22 0.459598705 0.459598705 13.07684176 0.501473372 0.000999001
## 2 P12 <-> P28 0.058984168 0.058984168 1.21783820 0.085655652 0.336663337
## 3 P12 <-> P38 0.036826815 0.036826815 1.08530367 0.082940656 0.383616384
## 4 P12 <-> P4 0.103127635 0.103127635 2.45381936 0.197033479 0.086913087
## 5 P12 <-> P402 0.049619527 0.049619527 1.18099498 0.096953901 0.347652348
## 6 P12 <-> P8 0.097116997 0.097116997 2.10779880 0.160804940 0.154845155
## 7 P22 <-> P28 0.239546946 0.239546946 5.47711809 0.281207829 0.016983017
## 8 P22 <-> P38 0.323144638 0.323144638 10.77549533 0.453218542 0.001998002
## 9 P22 <-> P4 0.170734702 0.170734702 4.66092680 0.297615004 0.003996004
## 10 P22 <-> P402 0.411842178 0.411842178 11.11002693 0.480744872 0.000999001
## 11 P22 <-> P8 0.185915832 0.185915832 4.55775099 0.275263892 0.030969031
## 12 P28 <-> P38 0.008981190 0.008981190 0.20753116 0.015713093 0.835164835
## 13 P28 <-> P4 0.067694554 0.067694554 1.29349839 0.105218088 0.322677323
## 14 P28 <-> P402 0.093247579 0.093247579 1.81189199 0.131183475 0.171828172
## 15 P28 <-> P8 -0.002244164 -0.002244164 -0.04066551 -0.003400316 0.928071928
## 16 P38 <-> P4 0.081850068 0.081850068 2.31718323 0.188126066 0.121878122
## 17 P38 <-> P402 0.068078615 0.068078615 1.89527407 0.146974315 0.246753247
## 18 P38 <-> P8 0.022882636 0.022882636 0.57234843 0.049458278 0.492507493
## 19 P4 <-> P402 0.129215853 0.129215853 2.84889115 0.240435254 0.068931069
## 20 P4 <-> P8 0.053734225 0.053734225 1.06787494 0.106067561 0.407592408
## 21 P402 <-> P8 0.122224665 0.122224665 2.47037891 0.198099747 0.095904096
## P.value.corrected
## 1 0.01048951
## 2 0.45629371
## 3 0.47387906
## 4 0.22377622
## 5 0.45629371
## 6 0.29561348
## 7 0.07132867
## 8 0.01398601
## 9 0.02097902
## 10 0.01048951
## 11 0.10839161
## 12 0.87692308
## 13 0.45629371
## 14 0.30069930
## 15 0.92807193
## 16 0.25594406
## 17 0.39860140
## 18 0.54435039
## 19 0.20679321
## 20 0.47552448
## 21 0.22377622
month comparisons
adonis.pair(vegdist(Cop16S_wide_simple),Cop16S_wide$Month)
## combination SumsOfSqs MeanSqs F.Model R2 P.value
## 1 04 <-> 07 0.0396521431 0.0396521431 0.8590613962 3.455727e-02 0.4235764
## 2 04 <-> 09 0.0348810420 0.0348810420 0.5427633964 1.970621e-02 0.6293706
## 3 04 <-> 10 0.0669444714 0.0669444714 1.0879229397 9.811783e-02 0.3326673
## 4 07 <-> 09 0.0000125265 0.0000125265 0.0002301713 6.974840e-06 0.9320679
## 5 07 <-> 10 0.0586162538 0.0586162538 1.3871373305 7.977951e-02 0.2717283
## 6 09 <-> 10 0.0565087073 0.0565087073 0.8236763897 4.155013e-02 0.4895105
## P.value.corrected
## 1 0.7342657
## 2 0.7552448
## 3 0.7342657
## 4 0.9320679
## 5 0.7342657
## 6 0.7342657
Cop16S_NMDSmodel <- metaMDS(Cop16S_wide_simple, distance = "bray",trymax = 999)
## Run 0 stress 0.09415055
## Run 1 stress 0.1209909
## Run 2 stress 0.09415044
## ... New best solution
## ... Procrustes: rmse 7.711639e-05 max resid 0.000152219
## ... Similar to previous best
## Run 3 stress 0.09681938
## Run 4 stress 0.1219042
## Run 5 stress 0.1080135
## Run 6 stress 0.1082108
## Run 7 stress 0.09415049
## ... Procrustes: rmse 0.0001767031 max resid 0.000344989
## ... Similar to previous best
## Run 8 stress 0.1079082
## Run 9 stress 0.1370283
## Run 10 stress 0.1304382
## Run 11 stress 0.09415053
## ... Procrustes: rmse 6.856108e-05 max resid 0.0001420486
## ... Similar to previous best
## Run 12 stress 0.1199078
## Run 13 stress 0.09429338
## ... Procrustes: rmse 0.003202509 max resid 0.01605773
## Run 14 stress 0.1360864
## Run 15 stress 0.09377016
## ... New best solution
## ... Procrustes: rmse 0.02075863 max resid 0.1284749
## Run 16 stress 0.138071
## Run 17 stress 0.1219042
## Run 18 stress 0.0939106
## ... Procrustes: rmse 0.003483345 max resid 0.01834119
## Run 19 stress 0.1082108
## Run 20 stress 0.1079082
## Run 21 stress 0.09681938
## Run 22 stress 0.1081069
## Run 23 stress 0.09415048
## ... Procrustes: rmse 0.02073858 max resid 0.1284859
## Run 24 stress 0.0941504
## ... Procrustes: rmse 0.02077492 max resid 0.128555
## Run 25 stress 0.1157494
## Run 26 stress 0.09677197
## Run 27 stress 0.09429336
## Run 28 stress 0.09677196
## Run 29 stress 0.09415039
## ... Procrustes: rmse 0.02077787 max resid 0.1285601
## Run 30 stress 0.1082108
## Run 31 stress 0.0937701
## ... New best solution
## ... Procrustes: rmse 9.473911e-05 max resid 0.0002575655
## ... Similar to previous best
## *** Best solution repeated 1 times
Cop16S_NMDSmodel
##
## Call:
## metaMDS(comm = Cop16S_wide_simple, distance = "bray", trymax = 999)
##
## global Multidimensional Scaling using monoMDS
##
## Data: Cop16S_wide_simple
## Distance: bray
##
## Dimensions: 2
## Stress: 0.0937701
## Stress type 1, weak ties
## Best solution was repeated 1 time in 31 tries
## The best solution was from try 31 (random start)
## Scaling: centring, PC rotation, halfchange scaling
## Species: expanded scores based on 'Cop16S_wide_simple'
get datascores
Cop16S.data.scores<-scores(Cop16S_NMDSmodel,display="sites")
Cop16S.data.scores<- as.data.frame(Cop16S.data.scores)
add environmental variables back in
# Extract the first 5 columns from df1
first_5_columns <- Cop16S_wide[, 1:4]
# Combine the first 5 columns from df1 with df2
Cop16S.data.scores <- cbind(first_5_columns, Cop16S.data.scores)
plot datascores
xx <- ggplot(Cop16S.data.scores, aes(x = NMDS1, y = NMDS2,colour = station)) +geom_point(aes(colour = station))+theme_bw()+theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
xx
extract hulls
unique(Cop16S.data.scores$station)
## [1] P12 P22 P28 P38 P402 P4 P8
## Levels: P12 P22 P28 P38 P4 P402 P8
P12 <- Cop16S.data.scores[Cop16S.data.scores$station == "P12", ][chull(Cop16S.data.scores[Cop16S.data.scores$station == "P12", c("NMDS1", "NMDS2")]), ]
P22 <- Cop16S.data.scores[Cop16S.data.scores$station == "P22", ][chull(Cop16S.data.scores[Cop16S.data.scores$station == "P22", c("NMDS1", "NMDS2")]), ]
P28 <- Cop16S.data.scores[Cop16S.data.scores$station == "P28", ][chull(Cop16S.data.scores[Cop16S.data.scores$station == "P28", c("NMDS1", "NMDS2")]), ]
P38 <- Cop16S.data.scores[Cop16S.data.scores$station == "P38", ][chull(Cop16S.data.scores[Cop16S.data.scores$station == "P38", c("NMDS1", "NMDS2")]), ]
P4 <- Cop16S.data.scores[Cop16S.data.scores$station == "P4", ][chull(Cop16S.data.scores[Cop16S.data.scores$station == "P4", c("NMDS1", "NMDS2")]), ]
P402 <- Cop16S.data.scores[Cop16S.data.scores$station == "P402", ][chull(Cop16S.data.scores[Cop16S.data.scores$station == "P402", c("NMDS1", "NMDS2")]), ]
P8 <- Cop16S.data.scores[Cop16S.data.scores$station == "P8", ][chull(Cop16S.data.scores[Cop16S.data.scores$station == "P8", c("NMDS1", "NMDS2")]), ]
get hull data
hull.data <- rbind(P12,P22,P28,P38,P402,P4,P8) #combine grp.a and grp.b
hull.data
## sample Year Month station NMDS1 NMDS2
## 11 1809P12Ob 18 09 P12 0.09483426 -0.4055790381
## 1 1804P12Ob 18 04 P12 -0.58838114 0.0001825864
## 8 1807P12Ob 18 07 P12 -0.58838125 0.0001850032
## 39 2009P12Ob 20 09 P12 -0.24745734 0.1907048364
## 27 1909P12Ob 19 09 P12 -0.02759480 -0.0330026043
## 2 1804P22Ob 18 04 P22 0.67566518 -0.1702912021
## 22 1907P22Ob 19 07 P22 0.46873051 -0.1283340394
## 18 1904P22Ob 19 04 P22 0.46873051 -0.1283340394
## 46 2010P22Ob 20 10 P22 -0.01286390 0.4407132778
## 12 1809P22Ob 18 09 P22 0.54822128 0.2182857927
## 28 1909P22Ob 19 09 P22 0.56936244 0.1569100030
## 34 2007P28Ob 20 07 P28 0.46738037 -0.2617466878
## 13 1809P28Ob 18 09 P28 -0.58838111 0.0001819462
## 47 2010P28Ob 20 10 P28 -0.58838119 0.0001837652
## 29 1909P28Ob 19 09 P28 0.01305402 0.2557123435
## 9 1807P28Ob 18 07 P28 0.17889316 0.2350923578
## 3 1804P28Ob 18 04 P28 0.42744692 0.0867423072
## 19 1904P38Ob 19 04 P38 -0.02759480 -0.0330026043
## 42 2009P38Ob 20 09 P38 -0.20214800 -0.0299290709
## 14 1809P38Ob 18 09 P38 -0.58838113 0.0001824058
## 4 1804P38Ob 18 04 P38 0.01305402 0.2557123435
## 30 1909P38Ob 19 09 P38 0.34162097 0.2308266212
## 43 2009P402Ob 20 09 P402 0.25438110 -0.8030822135
## 15 1809P402Ob 18 09 P402 -0.58838113 0.0001823084
## 5 1804P402Ob 18 04 P402 -0.20214800 -0.0299290709
## 25 1907P402Ob 19 07 P402 -0.20214800 -0.0299290709
## 16 1809P4Ob 18 09 P4 0.35269132 -0.2965862860
## 6 1804P4Ob 18 04 P4 -0.58838125 0.0001850558
## 37 2007P4Ob 20 07 P4 0.17889316 0.2350923578
## 17 1809P8Ob 18 09 P8 -0.11136462 -0.2712000093
## 45 2009P8Ob 20 09 P8 -0.58838125 0.0001850491
## 31 1909P8Ob 19 09 P8 0.01305402 0.2557123435
## 26 1907P8Ob 19 07 P8 0.34162097 0.2308266212
## 7 1804P8Ob 18 04 P8 0.48626850 0.0780765961
vf <- envfit(Cop16S_NMDSmodel, Cop16S_wide, perm = 999)
spp.scrs <- as.data.frame(scores(vf, display = "vectors"))
spp.scrs <- cbind(spp.scrs, Species = rownames(spp.scrs))
####for ggplot
arrow_factor <- ordiArrowMul(vf)
spp.scrs <- as.data.frame(scores(vf, display = "vectors")) * arrow_factor
spp.scrs <- cbind(spp.scrs, Species = rownames(spp.scrs), Pvalues = vf$vectors$pvals, R_squared = vf$vectors$r)
# select significance similarly to `plot(vf, p.max = 0.01)`
spp.scrs <- subset(spp.scrs, Pvalues < 0.05)
spp.scrs <- subset(spp.scrs, R_squared > 0.3)
spp.scrs
## NMDS1 NMDS2 Species Pvalues
## Acartia longiremis 0.31076910 0.7500000000 Acartia longiremis 0.001
## Calanus glacialis 0.57387237 0.0102990728 Calanus glacialis 0.001
## Calanus marshallae 0.68216569 -0.0006285338 Calanus marshallae 0.001
## Eucalanus bungii 0.61255339 -0.4684059248 Eucalanus bungii 0.001
## Metridia lucens 0.71595441 0.1008227318 Metridia lucens 0.001
## Metridia pacifica -0.05647834 0.5267238704 Metridia pacifica 0.015
## R_squared
## Acartia longiremis 0.7388724
## Calanus glacialis 0.3693206
## Calanus marshallae 0.5216908
## Eucalanus bungii 0.6666176
## Metridia lucens 0.5860464
## Metridia pacifica 0.3146037
Cop16S_Species_plot <- ggplot(Cop16S.data.scores, aes(x = NMDS1, y = NMDS2)) +geom_point(aes(colour = station))+theme_bw()+theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+
geom_segment(data = spp.scrs,size=0.4,
aes(x = 0, xend = NMDS1, y = 0, yend = NMDS2),
arrow = arrow(length = unit(0.2, "cm")), colour = "darkgrey")+
geom_polygon(data=hull.data,aes(x=NMDS1,y=NMDS2,fill=station,group=station),alpha=0.20, size=0.1, linetype=1, colour="black") +
annotate("text", label = "2D Stress: 0.92", x = 0.55, y = 0.8, size = 5, colour = "black")+
geom_label_repel(data = spp.scrs, aes(x = NMDS1, y = NMDS2, label = Species),
size = 3, fontface="bold", fill="white", label.padding = unit(0.15, "lines"), box.padding = unit(0.16, "lines"), label.size = 0.05)
Cop16S_Species_plot
save plot
setwd("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-3/output")
ggsave(filename = "Cop16S_NMDS_oblique.png", plot = Cop16S_Species_plot, height = 170, width = 200, units="mm", device='png', dpi=300)
Remove non-zooplanton phyla
unique(Mol16S_counts$Phylum)
## [1] "Arthropoda" "Nemertea" "Mollusca"
## [4] "Chordata" "Annelida" "Cnidaria"
## [7] "Echinodermata" NA "Rotifera"
## [10] "Chlorophyta" "Bacillariophyta" "Bryozoa"
## [13] "Gastrotricha" "Phoronida" "Entoprocta"
## [16] "Hemichordata" "Bacteroidota" "Platyhelminthes"
## [19] "Choanoflagellata__p"
levels_to_remove <- c('Bacillariophyta', 'Rhodophyta',NA,'Oomycota','Ascomycota','Onychophora','Cryptophyceae__p','Zoopagomycota','Phaeophyceae__p','Haptophyta','Basidiomycota','Chlorophyta','Dinophyceae__p','Discosea','Tubulinea','Dictyochophyceae__p','Bacteroidota','Blastocladiomycota','Pseudomonadota','Chrysophyceae__p','Malawimonadida__c__p','Prasinodermophyta','Chrysoparadoxa__f__o__c__p','Apusomonadida__c__p','Streptophyta','Pelagophyceae__p','Eustigmatophyceae__p')
# Remove those levels and drop them from the factor
Mol16S_counts <- droplevels(Mol16S_counts[!Mol16S_counts$Phylum %in% levels_to_remove,])
subset to copepods only
Mol16S_counts<-subset(Mol16S_counts,Phylum=="Mollusca")
remove non-species rows
Mol16S_comp<- Mol16S_counts[complete.cases(Mol16S_counts), ]
convert to presence-absence
Mol16S_comp_pa<-Mol16S_comp %>%
mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))
remove excess columns
Mol16S_comp_pa <- Mol16S_comp_pa[ -c(1:7) ]
wide to long format
Mol16S_long<-melt(Mol16S_comp_pa, id.vars=c("Species"))
add up multiple lines per station and convert back to presence-absence
Mol16S_long_sum <- Mol16S_long %>%
group_by(Species,variable) %>%
summarise(
value = sum(value))
Mol16S_long_sum<-Mol16S_long_sum %>%
mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))
split code string up to extract metadata
Mol16S_long_sum$variable<-str_split_fixed(Mol16S_long_sum$variable,"_",4)
remove samples of non-interest
unique(Mol16S_long_sum$variable[,3])
## [1] "P12" "P22" "P28" "P38" "P402" "P4" "P8" "CA042" "P105"
## [10] "P123" "P132" "P136" "P381" "P7" ""
unique(Mol16S_long_sum$variable[,2])
## [1] "1804" "1806" "1807" "1809" "1904" "1907"
## [7] "1909" "2007" "2009" "2010" "Control2"
#remove excess stations
levels_to_remove <- c('CA042','',"P105","P123","P132","P136","P381","P7")
Mol16S_long_sum <- droplevels(Mol16S_long_sum[!Mol16S_long_sum$variable[,3] %in% levels_to_remove,])
split up replicate columns
unique(Mol16S_long_sum$variable[,4])
## [1] "Ob" "VeA" "Ve" "ObB" "ObA" "VeB"
Mol16S_long_sum <- Mol16S_long_sum %>%
mutate(
tow_type = substr(variable[,4], 1, 2),
replicate = substr(variable[,4], 3, nchar(variable[,4]))
)
split up year and month columns
Mol16S_long_sum <- Mol16S_long_sum %>%
mutate(
Year = substr(variable[,2], 1, 2),
Month = substr(variable[,2], 3, 4)
)
make station column
Mol16S_long_sum$station<-Mol16S_long_sum$variable[,3]
remove vertical samples
Mol16S_long_sum<-subset(Mol16S_long_sum,tow_type=="Ob")
merge replicates
colnames(Mol16S_long_sum)
## [1] "Species" "variable" "value" "tow_type" "replicate" "Year"
## [7] "Month" "station"
Mol16S_long_sum$sample<-paste(Mol16S_long_sum$Year,Mol16S_long_sum$Month,Mol16S_long_sum$station,Mol16S_long_sum$tow_type,sep="")
Mol16S_long_sum <- Mol16S_long_sum %>%
group_by(Year,Month,station,sample,Species) %>%
summarise(
value = sum(value))
#convert back to presence-absence
Mol16S_long_sum<-Mol16S_long_sum %>%
mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))
back to wide format
Mol16S_wide<-dcast(Mol16S_long_sum, sample+Year+Month+station ~ Species )
remove any columns with all zeroes
target_cols <- 6:ncol(Mol16S_wide)
non_zero_cols <- target_cols[colSums(Mol16S_wide[, target_cols]) != 0]
Mol16S_wide <- Mol16S_wide[, c(1:5, non_zero_cols)]
remove rows with all zeroes
row_sums <- rowSums(Mol16S_wide[, 5:39])
# Create a logical vector to identify rows where the sum is not zero
non_zero_rows <- row_sums != 0
# Subset the data frame to keep only rows with a non-zero sum
Mol16S_wide <- Mol16S_wide[non_zero_rows, ]
remove environmental data
Mol16S_wide_simple<-Mol16S_wide[ , 5:39]
Make environmental columns categorical
# Specify the columns to convert to factors
cols_to_factor <- c("station", "Year", "Month")
# Convert the specified columns to factors
Mol16S_wide <- Mol16S_wide %>%
mutate_at(vars(one_of(cols_to_factor)), as.factor)
dist<-vegdist(Mol16S_wide_simple, method='bray')
perm<-adonis2(dist~station*Month, data=Mol16S_wide, permutations = 999, method="bray")
perm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist ~ station * Month, data = Mol16S_wide, permutations = 999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## station 6 3.6598 0.24908 2.8261 0.001 ***
## Month 3 2.1130 0.14381 3.2634 0.001 ***
## station:Month 12 2.6613 0.18112 1.0275 0.427
## Residual 29 6.2591 0.42599
## Total 50 14.6932 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
station comparisons
adonis.pair(vegdist(Mol16S_wide_simple),Mol16S_wide$station)
## combination SumsOfSqs MeanSqs F.Model R2 P.value
## 1 P12 <-> P22 0.8989843 0.8989843 2.922938 0.18356774 0.030969031
## 2 P12 <-> P28 1.2751264 1.2751264 4.830349 0.27090603 0.006993007
## 3 P12 <-> P38 0.8316399 0.8316399 3.269490 0.20095838 0.020979021
## 4 P12 <-> P4 0.5599412 0.5599412 1.517965 0.10455771 0.152847153
## 5 P12 <-> P402 0.5977402 0.5977402 2.367952 0.15408379 0.048951049
## 6 P12 <-> P8 0.9980493 0.9980493 3.386856 0.19479402 0.005994006
## 7 P22 <-> P28 0.4178084 0.4178084 1.879406 0.13540968 0.158841159
## 8 P22 <-> P38 0.2467244 0.2467244 1.164406 0.08845106 0.337662338
## 9 P22 <-> P4 0.4297562 0.4297562 1.279250 0.09633451 0.291708292
## 10 P22 <-> P402 0.5846626 0.5846626 2.786856 0.18846847 0.014985015
## 11 P22 <-> P8 0.5367516 0.5367516 2.075791 0.13769034 0.073926074
## 12 P28 <-> P38 0.3112528 0.3112528 1.890076 0.13607382 0.125874126
## 13 P28 <-> P4 0.4491097 0.4491097 1.555452 0.11474734 0.205794206
## 14 P28 <-> P402 0.7441091 0.7441091 4.576834 0.27609821 0.008991009
## 15 P28 <-> P8 0.3637451 0.3637451 1.691859 0.11515622 0.141858142
## 16 P38 <-> P4 0.5849334 0.5849334 2.101714 0.14903960 0.074925075
## 17 P38 <-> P402 0.1958246 0.1958246 1.286951 0.09685828 0.288711289
## 18 P38 <-> P8 0.5459444 0.5459444 2.658231 0.16976574 0.006993007
## 19 P4 <-> P402 0.8145723 0.8145723 2.949033 0.19727249 0.002997003
## 20 P4 <-> P8 0.3713381 0.3713381 1.160826 0.08197447 0.369630370
## 21 P402 <-> P8 0.9373031 0.9373031 4.607175 0.26166464 0.000999001
## P.value.corrected
## 1 0.07226107
## 2 0.02937063
## 3 0.05506993
## 4 0.20847902
## 5 0.10279720
## 6 0.02937063
## 7 0.20847902
## 8 0.35454545
## 9 0.32241443
## 10 0.04495504
## 11 0.13111888
## 12 0.20333513
## 13 0.25421637
## 14 0.03146853
## 15 0.20847902
## 16 0.13111888
## 17 0.32241443
## 18 0.02937063
## 19 0.02937063
## 20 0.36963037
## 21 0.02097902
month comparisons
adonis.pair(vegdist(Mol16S_wide_simple),Mol16S_wide$Month)
## combination SumsOfSqs MeanSqs F.Model R2 P.value
## 1 04 <-> 07 1.2408784 1.2408784 5.1724181 0.15136237 0.000999001
## 2 04 <-> 09 1.0716234 1.0716234 3.8210471 0.11297838 0.001998002
## 3 04 <-> 10 0.4741938 0.4741938 2.0336954 0.14491517 0.211788212
## 4 07 <-> 09 0.4805478 0.4805478 1.7206800 0.04685861 0.127872128
## 5 07 <-> 10 0.3403936 0.3403936 1.3913116 0.07565048 0.239760240
## 6 09 <-> 10 0.2248726 0.2248726 0.7208019 0.03850272 0.600399600
## P.value.corrected
## 1 0.005994006
## 2 0.005994006
## 3 0.287712288
## 4 0.255744256
## 5 0.287712288
## 6 0.600399600
Mol16S_NMDSmodel <- metaMDS(Mol16S_wide_simple, distance = "bray",trymax = 999)
## Run 0 stress 0.1420235
## Run 1 stress 0.1487572
## Run 2 stress 0.1372033
## ... New best solution
## ... Procrustes: rmse 0.09545086 max resid 0.3450041
## Run 3 stress 0.1446668
## Run 4 stress 0.1516894
## Run 5 stress 0.1364151
## ... New best solution
## ... Procrustes: rmse 0.0676417 max resid 0.334001
## Run 6 stress 0.1382545
## Run 7 stress 0.1321018
## ... New best solution
## ... Procrustes: rmse 0.09725584 max resid 0.358806
## Run 8 stress 0.1575945
## Run 9 stress 0.1496615
## Run 10 stress 0.1398927
## Run 11 stress 0.1416548
## Run 12 stress 0.141882
## Run 13 stress 0.1494222
## Run 14 stress 0.1409771
## Run 15 stress 0.1353404
## Run 16 stress 0.1341848
## Run 17 stress 0.1380038
## Run 18 stress 0.1381198
## Run 19 stress 0.1407499
## Run 20 stress 0.1454678
## Run 21 stress 0.1412147
## Run 22 stress 0.1423261
## Run 23 stress 0.137248
## Run 24 stress 0.1447911
## Run 25 stress 0.144383
## Run 26 stress 0.1455127
## Run 27 stress 0.1416275
## Run 28 stress 0.148297
## Run 29 stress 0.1447304
## Run 30 stress 0.1459943
## Run 31 stress 0.1512126
## Run 32 stress 0.1521908
## Run 33 stress 0.1424321
## Run 34 stress 0.1484972
## Run 35 stress 0.1661877
## Run 36 stress 0.1421265
## Run 37 stress 0.1400125
## Run 38 stress 0.1447517
## Run 39 stress 0.1423512
## Run 40 stress 0.1472231
## Run 41 stress 0.1363783
## Run 42 stress 0.1419928
## Run 43 stress 0.1331822
## Run 44 stress 0.1397504
## Run 45 stress 0.1445812
## Run 46 stress 0.1435381
## Run 47 stress 0.1436104
## Run 48 stress 0.1422559
## Run 49 stress 0.1471506
## Run 50 stress 0.1418283
## Run 51 stress 0.1353389
## Run 52 stress 0.1575562
## Run 53 stress 0.138287
## Run 54 stress 0.1507768
## Run 55 stress 0.1438503
## Run 56 stress 0.1525115
## Run 57 stress 0.1433958
## Run 58 stress 0.1425744
## Run 59 stress 0.1397073
## Run 60 stress 0.1332761
## Run 61 stress 0.1412986
## Run 62 stress 0.1476887
## Run 63 stress 0.1394618
## Run 64 stress 0.146797
## Run 65 stress 0.1418524
## Run 66 stress 0.151318
## Run 67 stress 0.1380029
## Run 68 stress 0.1412927
## Run 69 stress 0.1456148
## Run 70 stress 0.1412924
## Run 71 stress 0.1408664
## Run 72 stress 0.1353389
## Run 73 stress 0.136697
## Run 74 stress 0.1433208
## Run 75 stress 0.1424781
## Run 76 stress 0.1532513
## Run 77 stress 0.1477871
## Run 78 stress 0.133277
## Run 79 stress 0.1400122
## Run 80 stress 0.1773601
## Run 81 stress 0.1438078
## Run 82 stress 0.1379331
## Run 83 stress 0.1353391
## Run 84 stress 0.1352969
## Run 85 stress 0.136009
## Run 86 stress 0.1473051
## Run 87 stress 0.1353392
## Run 88 stress 0.1435408
## Run 89 stress 0.1364491
## Run 90 stress 0.1371089
## Run 91 stress 0.1379339
## Run 92 stress 0.1472542
## Run 93 stress 0.1689747
## Run 94 stress 0.1431744
## Run 95 stress 0.1382259
## Run 96 stress 0.1422429
## Run 97 stress 0.1451987
## Run 98 stress 0.1418413
## Run 99 stress 0.141887
## Run 100 stress 0.1391502
## Run 101 stress 0.1404966
## Run 102 stress 0.1465317
## Run 103 stress 0.138387
## Run 104 stress 0.1444182
## Run 105 stress 0.1404098
## Run 106 stress 0.1488803
## Run 107 stress 0.1477507
## Run 108 stress 0.1379817
## Run 109 stress 0.1321025
## ... Procrustes: rmse 0.001225722 max resid 0.006270148
## ... Similar to previous best
## *** Best solution repeated 1 times
Mol16S_NMDSmodel
##
## Call:
## metaMDS(comm = Mol16S_wide_simple, distance = "bray", trymax = 999)
##
## global Multidimensional Scaling using monoMDS
##
## Data: Mol16S_wide_simple
## Distance: bray
##
## Dimensions: 2
## Stress: 0.1321018
## Stress type 1, weak ties
## Best solution was repeated 1 time in 109 tries
## The best solution was from try 7 (random start)
## Scaling: centring, PC rotation, halfchange scaling
## Species: expanded scores based on 'Mol16S_wide_simple'
get datascores
Mol16S.data.scores<-scores(Mol16S_NMDSmodel,display="sites")
Mol16S.data.scores<- as.data.frame(Mol16S.data.scores)
add environmental variables back in
# Extract the first 5 columns from df1
first_5_columns <- Mol16S_wide[, 1:4]
# Combine the first 5 columns from df1 with df2
Mol16S.data.scores <- cbind(first_5_columns, Mol16S.data.scores)
plot datascores
xx <- ggplot(Mol16S.data.scores, aes(x = NMDS1, y = NMDS2,colour = station)) +geom_point(aes(colour = station))+theme_bw()+theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
xx
extract hulls
unique(Mol16S.data.scores$station)
## [1] P12 P22 P28 P38 P402 P4 P8
## Levels: P12 P22 P28 P38 P4 P402 P8
P12 <- Mol16S.data.scores[Mol16S.data.scores$station == "P12", ][chull(Mol16S.data.scores[Mol16S.data.scores$station == "P12", c("NMDS1", "NMDS2")]), ]
P22 <- Mol16S.data.scores[Mol16S.data.scores$station == "P22", ][chull(Mol16S.data.scores[Mol16S.data.scores$station == "P22", c("NMDS1", "NMDS2")]), ]
P28 <- Mol16S.data.scores[Mol16S.data.scores$station == "P28", ][chull(Mol16S.data.scores[Mol16S.data.scores$station == "P28", c("NMDS1", "NMDS2")]), ]
P38 <- Mol16S.data.scores[Mol16S.data.scores$station == "P38", ][chull(Mol16S.data.scores[Mol16S.data.scores$station == "P38", c("NMDS1", "NMDS2")]), ]
P4 <- Mol16S.data.scores[Mol16S.data.scores$station == "P4", ][chull(Mol16S.data.scores[Mol16S.data.scores$station == "P4", c("NMDS1", "NMDS2")]), ]
P402 <- Mol16S.data.scores[Mol16S.data.scores$station == "P402", ][chull(Mol16S.data.scores[Mol16S.data.scores$station == "P402", c("NMDS1", "NMDS2")]), ]
P8 <- Mol16S.data.scores[Mol16S.data.scores$station == "P8", ][chull(Mol16S.data.scores[Mol16S.data.scores$station == "P8", c("NMDS1", "NMDS2")]), ]
get hull data
hull.data <- rbind(P12,P22,P28,P38,P402,P4,P8) #combine grp.a and grp.b
hull.data
## sample Year Month station NMDS1 NMDS2
## 1 1804P12Ob 18 04 P12 -0.60581099 -1.016735934
## 45 2009P12Ob 20 09 P12 -1.09573621 0.156920817
## 26 1907P12Ob 19 07 P12 -1.09573621 0.156920817
## 8 1807P12Ob 18 07 P12 -1.09573621 0.156920817
## 33 1909P12Ob 19 09 P12 -0.27812313 0.758923125
## 13 1809P12Ob 18 09 P12 1.04468969 0.563959359
## 2 1804P22Ob 18 04 P22 1.64762904 -0.369985349
## 46 2009P22Ob 20 09 P22 -0.42126733 0.164997664
## 52 2010P22Ob 20 10 P22 0.01754380 0.729211108
## 14 1809P22Ob 18 09 P22 0.43615776 0.522299965
## 3 1804P28Ob 18 04 P28 0.87169812 -0.420405215
## 35 1909P28Ob 19 09 P28 0.72774179 -0.620931643
## 9 1807P28Ob 18 07 P28 -0.45120819 0.305980045
## 28 1907P28Ob 19 07 P28 0.34931289 0.186183521
## 15 1809P28Ob 18 09 P28 0.42679200 0.132118635
## 4 1804P38Ob 18 04 P38 0.58249081 0.008958916
## 48 2009P38Ob 20 09 P38 0.21885311 -0.084867714
## 22 1904P38Ob 19 04 P38 0.03445329 -0.055005676
## 41 2007P38Ob 20 07 P38 -0.15209743 0.059778258
## 29 1907P38Ob 19 07 P38 -0.45550326 0.372213385
## 36 1909P38Ob 19 09 P38 0.19644808 0.860695144
## 5 1804P402Ob 18 04 P402 -0.07954775 0.012289395
## 23 1904P402Ob 19 04 P402 -0.68405829 -0.049143775
## 49 2009P402Ob 20 09 P402 -0.48394532 0.471275917
## 17 1809P402Ob 18 09 P402 -0.18989465 0.907442989
## 30 1907P402Ob 19 07 P402 0.09962929 0.707443731
## 6 1804P4Ob 18 04 P4 0.72774179 -0.620931643
## 31 1907P4Ob 19 07 P4 0.34677570 -0.860880613
## 11 1807P4Ob 18 07 P4 -2.30613418 -0.832022499
## 50 2009P4Ob 20 09 P4 -1.23777925 -0.190676456
## 43 2007P4Ob 20 07 P4 -0.18408763 0.365516922
## 18 1809P4Ob 18 09 P4 1.04468969 0.563959359
## 19 1809P8Ob 18 09 P8 0.18301642 -1.729139172
## 51 2009P8Ob 20 09 P8 -0.36527849 -0.566452149
## 12 1807P8Ob 18 07 P8 -0.17101645 -0.293822299
## 44 2007P8Ob 20 07 P8 -0.07366711 -0.253652622
## 25 1904P8Ob 19 04 P8 0.40903049 -0.220298734
## 7 1804P8Ob 18 04 P8 0.87169812 -0.420405215
vf <- envfit(Mol16S_NMDSmodel, Mol16S_wide, perm = 999)
spp.scrs <- as.data.frame(scores(vf, display = "vectors"))
spp.scrs <- cbind(spp.scrs, Species = rownames(spp.scrs))
####for ggplot
arrow_factor <- ordiArrowMul(vf)
spp.scrs <- as.data.frame(scores(vf, display = "vectors")) * arrow_factor
spp.scrs <- cbind(spp.scrs, Species = rownames(spp.scrs), Pvalues = vf$vectors$pvals, R_squared = vf$vectors$r)
# select significance similarly to `plot(vf, p.max = 0.01)`
spp.scrs <- subset(spp.scrs, Pvalues < 0.05)
spp.scrs <- subset(spp.scrs, R_squared > 0.3)
spp.scrs
## NMDS1 NMDS2 Species Pvalues R_squared
## Lacuna vincta 0.5425525 -0.7113848 Lacuna vincta 0.001 0.3093587
## Littorina plena -0.8859512 0.2040507 Littorina plena 0.001 0.3194518
## Mya arenaria -0.7962286 -0.4204621 Mya arenaria 0.003 0.3133538
Mol16S_Species_plot <- ggplot(Mol16S.data.scores, aes(x = NMDS1, y = NMDS2)) +geom_point(aes(colour = station))+theme_bw()+theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+
geom_segment(data = spp.scrs,size=0.4,
aes(x = 0, xend = NMDS1, y = 0, yend = NMDS2),
arrow = arrow(length = unit(0.2, "cm")), colour = "darkgrey")+
geom_polygon(data=hull.data,aes(x=NMDS1,y=NMDS2,fill=station,group=station),alpha=0.20, size=0.1, linetype=1, colour="black") +
annotate("text", label = "2D Stress: 0.135", x = -0.55, y = 0.8, size = 5, colour = "black")+
geom_label_repel(data = spp.scrs, aes(x = NMDS1, y = NMDS2, label = Species),
size = 3, fontface="bold", fill="white", label.padding = unit(0.15, "lines"), box.padding = unit(0.16, "lines"), label.size = 0.05)
Mol16S_Species_plot
save plot
setwd("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-3/output")
ggsave(filename = "Mol16S_NMDS_oblique.png", plot = Mol16S_Species_plot, height = 170, width = 200, units="mm", device='png', dpi=300)
Remove non-zooplanton phyla
unique(M28S_counts$Phylum)
## [1] "Arthropoda" NA "Cnidaria"
## [4] "Dinophyceae__p" "Raphidophyceae__p" "Bacillariophyta"
## [7] "Chordata" "Ctenophora" "Chytridiomycota"
## [10] "Annelida" "Streptophyta" "Acantharea__p"
levels_to_remove <- c('Bacillariophyta', 'Rhodophyta',NA,'Oomycota','Ascomycota','Onychophora','Cryptophyceae__p','Zoopagomycota','Phaeophyceae__p','Haptophyta','Basidiomycota','Chlorophyta','Dinophyceae__p','Discosea','Tubulinea','Dictyochophyceae__p','Bacteroidota','Blastocladiomycota','Pseudomonadota','Chrysophyceae__p','Malawimonadida__c__p','Prasinodermophyta','Chrysoparadoxa__f__o__c__p','Apusomonadida__c__p','Streptophyta','Pelagophyceae__p','Eustigmatophyceae__p','Raphidophyceae__p','Chytridiomycota','Acantharea__p')
# Remove those levels and drop them from the factor
M28S_counts <- droplevels(M28S_counts[!M28S_counts$Phylum %in% levels_to_remove,])
remove non-species rows
M28S_comp<- M28S_counts[complete.cases(M28S_counts), ]
convert to presence-absence
M28S_comp_pa<-M28S_comp %>%
mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))
remove excess columns
M28S_comp_pa <- M28S_comp_pa[ -c(1:7) ]
wide to long format
M28S_long<-melt(M28S_comp_pa, id.vars=c("Species"))
add up multiple lines per station and convert back to presence-absence
M28S_long_sum <- M28S_long %>%
group_by(Species,variable) %>%
summarise(
value = sum(value))
M28S_long_sum<-M28S_long_sum %>%
mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))
split code string up to extract metadata
M28S_long_sum$variable<-str_split_fixed(M28S_long_sum$variable,"_",4)
remove samples of non-interest
unique(M28S_long_sum$variable[,3])
## [1] "P12" "P22" "P28" "P38" "P402" "P4" "P8" "CA042" "P105"
## [10] "P123" "P132" "P136" "P381" "P7" ""
unique(M28S_long_sum$variable[,2])
## [1] "1804" "1806" "1807" "1809" "1904" "1907"
## [7] "1909" "2007" "2009" "2010" "Control1" "Control2"
#remove excess stations
levels_to_remove <- c('CA042','',"P105","P123","P132","P136","P381","P7")
M28S_long_sum <- droplevels(M28S_long_sum[!M28S_long_sum$variable[,3] %in% levels_to_remove,])
split up replicate columns
unique(M28S_long_sum$variable[,4])
## [1] "Ob" "Ve" "ObB" "VeA" "ObA" "VeB"
M28S_long_sum <- M28S_long_sum %>%
mutate(
tow_type = substr(variable[,4], 1, 2),
replicate = substr(variable[,4], 3, nchar(variable[,4]))
)
split up year and month columns
M28S_long_sum <- M28S_long_sum %>%
mutate(
Year = substr(variable[,2], 1, 2),
Month = substr(variable[,2], 3, 4)
)
make station column
M28S_long_sum$station<-M28S_long_sum$variable[,3]
remove vertical samples
M28S_long_sum<-subset(M28S_long_sum,tow_type=="Ob")
merge replicates (and oblique and vertical tows)
colnames(M28S_long_sum)
## [1] "Species" "variable" "value" "tow_type" "replicate" "Year"
## [7] "Month" "station"
M28S_long_sum$sample<-paste(M28S_long_sum$Year,M28S_long_sum$Month,M28S_long_sum$station,M28S_long_sum$tow_type,sep="")
M28S_long_sum <- M28S_long_sum %>%
group_by(Year,Month,station,sample,Species) %>%
summarise(
value = sum(value))
#convert back to presence-absence
M28S_long_sum<-M28S_long_sum %>%
mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))
back to wide format
M28S_wide<-dcast(M28S_long_sum, sample+Year+Month+station ~ Species )
remove any columns with all zeroes
target_cols <- 6:ncol(M28S_wide)
non_zero_cols <- target_cols[colSums(M28S_wide[, target_cols]) != 0]
M28S_wide <- M28S_wide[, c(1:5, non_zero_cols)]
remove environmental data
M28S_wide_simple<-M28S_wide[ , 5:31]
Make environmental columns categorical
# Specify the columns to convert to factors
cols_to_factor <- c("station", "Year", "Month")
# Convert the specified columns to factors
M28S_wide <- M28S_wide %>%
mutate_at(vars(one_of(cols_to_factor)), as.factor)
dist<-vegdist(M28S_wide_simple, method='bray')
perm<-adonis2(dist~station*Month, data=M28S_wide, permutations = 999, method="bray")
perm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist ~ station * Month, data = M28S_wide, permutations = 999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## station 6 2.3250 0.34311 5.7420 0.001 ***
## Month 3 0.7960 0.11748 3.9320 0.001 ***
## station:Month 13 1.6307 0.24064 1.8587 0.004 **
## Residual 30 2.0245 0.29877
## Total 52 6.7762 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
station comparisons
adonis.pair(vegdist(M28S_wide_simple),M28S_wide$station)
## combination SumsOfSqs MeanSqs F.Model R2 P.value
## 1 P12 <-> P22 0.66804033 0.66804033 8.9961187 0.39120161 0.000999001
## 2 P12 <-> P28 0.38107987 0.38107987 3.9205066 0.21877209 0.003996004
## 3 P12 <-> P38 0.25618672 0.25618672 2.2707796 0.14870096 0.085914086
## 4 P12 <-> P4 0.14186670 0.14186670 1.1740844 0.08283318 0.329670330
## 5 P12 <-> P402 0.21512395 0.21512395 1.7660925 0.11960459 0.158841159
## 6 P12 <-> P8 0.39488422 0.39488422 4.0948720 0.22630014 0.004995005
## 7 P22 <-> P28 0.60906136 0.60906136 10.2443032 0.42254476 0.001998002
## 8 P22 <-> P38 0.46806990 0.46806990 6.4859162 0.33285149 0.000999001
## 9 P22 <-> P4 0.46209322 0.46209322 5.7631948 0.30715424 0.001998002
## 10 P22 <-> P402 1.13535161 1.13535161 13.9897184 0.51833510 0.000999001
## 11 P22 <-> P8 0.24452909 0.24452909 4.1667497 0.22936132 0.018981019
## 12 P28 <-> P38 0.07035161 0.07035161 0.7262107 0.05290686 0.586413586
## 13 P28 <-> P4 0.21260055 0.21260055 2.0269321 0.13488662 0.098901099
## 14 P28 <-> P402 0.55152214 0.55152214 5.2097258 0.28609578 0.003996004
## 15 P28 <-> P8 0.22539405 0.22539405 2.7612084 0.16473803 0.043956044
## 16 P38 <-> P4 0.16748934 0.16748934 1.3678523 0.10232401 0.228771229
## 17 P38 <-> P402 0.34334978 0.34334978 2.7800615 0.18809539 0.032967033
## 18 P38 <-> P8 0.17422272 0.17422272 1.8139129 0.12244657 0.119880120
## 19 P4 <-> P402 0.39829857 0.39829857 3.0131891 0.20070280 0.007992008
## 20 P4 <-> P8 0.12016979 0.12016979 1.1548021 0.08158377 0.357642358
## 21 P402 <-> P8 0.86765199 0.86765199 8.2604389 0.38853567 0.000999001
## P.value.corrected
## 1 0.005244755
## 2 0.010489510
## 3 0.128871129
## 4 0.364372470
## 5 0.196215549
## 6 0.011655012
## 7 0.006993007
## 8 0.005244755
## 9 0.006993007
## 10 0.005244755
## 11 0.036236491
## 12 0.586413586
## 13 0.138461538
## 14 0.010489510
## 15 0.071005917
## 16 0.266899767
## 17 0.057692308
## 18 0.157342657
## 19 0.016783217
## 20 0.375524476
## 21 0.005244755
month comparisons
adonis.pair(vegdist(M28S_wide_simple),M28S_wide$Month)
## combination SumsOfSqs MeanSqs F.Model R2 P.value
## 1 04 <-> 07 0.246317314 0.246317314 2.3020106 0.07126524 0.06893107
## 2 04 <-> 09 0.330966201 0.330966201 2.5518732 0.07839405 0.03896104
## 3 04 <-> 10 0.063778296 0.063778296 0.5790068 0.04263985 0.71328671
## 4 07 <-> 09 0.413323900 0.413323900 3.2054987 0.08176146 0.01298701
## 5 07 <-> 10 0.095928787 0.095928787 0.8349194 0.04209341 0.55844156
## 6 09 <-> 10 0.003497279 0.003497279 0.0232024 0.00121969 0.98701299
## P.value.corrected
## 1 0.13786214
## 2 0.11688312
## 3 0.85594406
## 4 0.07792208
## 5 0.83766234
## 6 0.98701299
M28S_NMDSmodel <- metaMDS(M28S_wide_simple, distance = "bray",trymax = 999)
## Run 0 stress 0.206974
## Run 1 stress 0.2290133
## Run 2 stress 0.2248491
## Run 3 stress 0.2438311
## Run 4 stress 0.2371662
## Run 5 stress 0.2304036
## Run 6 stress 0.2226275
## Run 7 stress 0.2317371
## Run 8 stress 0.223941
## Run 9 stress 0.2352705
## Run 10 stress 0.2345652
## Run 11 stress 0.2344381
## Run 12 stress 0.2298158
## Run 13 stress 0.2282033
## Run 14 stress 0.2222865
## Run 15 stress 0.229962
## Run 16 stress 0.2291709
## Run 17 stress 0.208272
## Run 18 stress 0.2212705
## Run 19 stress 0.2292452
## Run 20 stress 0.2331123
## Run 21 stress 0.2295625
## Run 22 stress 0.2267182
## Run 23 stress 0.2165566
## Run 24 stress 0.2227241
## Run 25 stress 0.2082718
## Run 26 stress 0.2315231
## Run 27 stress 0.2271305
## Run 28 stress 0.2172864
## Run 29 stress 0.2305348
## Run 30 stress 0.2393786
## Run 31 stress 0.2314914
## Run 32 stress 0.2218763
## Run 33 stress 0.2176189
## Run 34 stress 0.2184809
## Run 35 stress 0.2340489
## Run 36 stress 0.2207368
## Run 37 stress 0.2221219
## Run 38 stress 0.2377905
## Run 39 stress 0.2268368
## Run 40 stress 0.2179231
## Run 41 stress 0.2219736
## Run 42 stress 0.2325035
## Run 43 stress 0.2381708
## Run 44 stress 0.2345359
## Run 45 stress 0.216371
## Run 46 stress 0.2326738
## Run 47 stress 0.2240677
## Run 48 stress 0.2300943
## Run 49 stress 0.2267939
## Run 50 stress 0.246557
## Run 51 stress 0.2137034
## Run 52 stress 0.2287888
## Run 53 stress 0.2109735
## Run 54 stress 0.2123302
## Run 55 stress 0.227375
## Run 56 stress 0.2154465
## Run 57 stress 0.2302811
## Run 58 stress 0.2389569
## Run 59 stress 0.2293788
## Run 60 stress 0.2193683
## Run 61 stress 0.2335544
## Run 62 stress 0.2263505
## Run 63 stress 0.2257863
## Run 64 stress 0.2195394
## Run 65 stress 0.2411559
## Run 66 stress 0.2217801
## Run 67 stress 0.2343708
## Run 68 stress 0.2232186
## Run 69 stress 0.2301361
## Run 70 stress 0.2165561
## Run 71 stress 0.2267872
## Run 72 stress 0.2232597
## Run 73 stress 0.2314402
## Run 74 stress 0.21562
## Run 75 stress 0.244776
## Run 76 stress 0.216563
## Run 77 stress 0.2123383
## Run 78 stress 0.206974
## ... New best solution
## ... Procrustes: rmse 2.041027e-05 max resid 7.441322e-05
## ... Similar to previous best
## *** Best solution repeated 1 times
M28S_NMDSmodel
##
## Call:
## metaMDS(comm = M28S_wide_simple, distance = "bray", trymax = 999)
##
## global Multidimensional Scaling using monoMDS
##
## Data: M28S_wide_simple
## Distance: bray
##
## Dimensions: 2
## Stress: 0.206974
## Stress type 1, weak ties
## Best solution was repeated 1 time in 78 tries
## The best solution was from try 78 (random start)
## Scaling: centring, PC rotation, halfchange scaling
## Species: expanded scores based on 'M28S_wide_simple'
get datascores
M28S.data.scores<-scores(M28S_NMDSmodel,display="sites")
M28S.data.scores<- as.data.frame(M28S.data.scores)
add environmental variables back in
# Extract the first 5 columns from df1
first_5_columns <- M28S_wide[, 1:4]
# Combine the first 5 columns from df1 with df2
M28S.data.scores <- cbind(first_5_columns, M28S.data.scores)
plot datascores
xx <- ggplot(M28S.data.scores, aes(x = NMDS1, y = NMDS2,colour = station)) +geom_point(aes(colour = station))+theme_bw()+theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
xx
extract hulls
unique(M28S.data.scores$station)
## [1] P12 P22 P28 P38 P402 P4 P8
## Levels: P12 P22 P28 P38 P4 P402 P8
P12 <- M28S.data.scores[M28S.data.scores$station == "P12", ][chull(M28S.data.scores[M28S.data.scores$station == "P12", c("NMDS1", "NMDS2")]), ]
P22 <- M28S.data.scores[M28S.data.scores$station == "P22", ][chull(M28S.data.scores[M28S.data.scores$station == "P22", c("NMDS1", "NMDS2")]), ]
P28 <- M28S.data.scores[M28S.data.scores$station == "P28", ][chull(M28S.data.scores[M28S.data.scores$station == "P28", c("NMDS1", "NMDS2")]), ]
P38 <- M28S.data.scores[M28S.data.scores$station == "P38", ][chull(M28S.data.scores[M28S.data.scores$station == "P38", c("NMDS1", "NMDS2")]), ]
P4 <- M28S.data.scores[M28S.data.scores$station == "P4", ][chull(M28S.data.scores[M28S.data.scores$station == "P4", c("NMDS1", "NMDS2")]), ]
P402 <- M28S.data.scores[M28S.data.scores$station == "P402", ][chull(M28S.data.scores[M28S.data.scores$station == "P402", c("NMDS1", "NMDS2")]), ]
P8 <- M28S.data.scores[M28S.data.scores$station == "P8", ][chull(M28S.data.scores[M28S.data.scores$station == "P8", c("NMDS1", "NMDS2")]), ]
get hull data
hull.data <- rbind(P12,P22,P28,P38,P402,P4,P8) #combine grp.a and grp.b
hull.data
## sample Year Month station NMDS1 NMDS2
## 1 1804P12Ob 18 04 P12 -0.40986184 -0.1290208
## 38 2007P12Ob 20 07 P12 -0.58699048 -0.1697800
## 20 1904P12Ob 19 04 P12 -0.35714594 0.3909733
## 45 2009P12Ob 20 09 P12 0.42777001 0.6532272
## 33 1909P12Ob 19 09 P12 0.27269307 0.2664957
## 21 1904P22Ob 19 04 P22 0.67389337 -0.1951536
## 2 1804P22Ob 18 04 P22 0.67547683 -0.2889490
## 52 2010P22Ob 20 10 P22 0.07946482 -0.1783487
## 27 1907P22Ob 19 07 P22 0.46294234 0.1074290
## 14 1809P22Ob 18 09 P22 0.66615882 -0.1359921
## 3 1804P28Ob 18 04 P28 0.38197209 -0.2929012
## 35 1909P28Ob 19 09 P28 0.18965078 -0.3667978
## 15 1809P28Ob 18 09 P28 -0.54731643 -0.5272428
## 53 2010P28Ob 20 10 P28 -0.09360909 0.6971442
## 29 1907P38Ob 19 07 P38 0.10813916 -0.3601671
## 16 1809P38Ob 18 09 P38 -0.93900042 -0.4892924
## 36 1909P38Ob 19 09 P38 0.34464840 0.4419093
## 22 1904P38Ob 19 04 P38 0.24033626 0.0120023
## 5 1804P402Ob 18 04 P402 -0.11423793 -0.3205777
## 30 1907P402Ob 19 07 P402 -0.71644022 -0.2605243
## 17 1809P402Ob 18 09 P402 -0.98235325 0.3815895
## 10 1807P402Ob 18 07 P402 -0.48645732 0.6791813
## 31 1907P4Ob 19 07 P4 -0.29777479 -0.5467845
## 43 2007P4Ob 20 07 P4 -0.38905531 -0.2722517
## 11 1807P4Ob 18 07 P4 -0.48641599 0.4505654
## 50 2009P4Ob 20 09 P4 0.30877591 0.6145633
## 18 1809P4Ob 18 09 P4 0.53827349 0.3163899
## 7 1804P8Ob 18 04 P8 0.55250013 -0.2150411
## 51 2009P8Ob 20 09 P8 -0.02262388 -0.6283247
## 19 1809P8Ob 18 09 P8 0.08526749 0.6703768
vf <- envfit(M28S_NMDSmodel, M28S_wide, perm = 999)
spp.scrs <- as.data.frame(scores(vf, display = "vectors"))
spp.scrs <- cbind(spp.scrs, Species = rownames(spp.scrs))
####for ggplot
arrow_factor <- ordiArrowMul(vf)
spp.scrs <- as.data.frame(scores(vf, display = "vectors")) * arrow_factor
spp.scrs <- cbind(spp.scrs, Species = rownames(spp.scrs), Pvalues = vf$vectors$pvals, R_squared = vf$vectors$r)
# select significance similarly to `plot(vf, p.max = 0.01)`
spp.scrs <- subset(spp.scrs, Pvalues < 0.05)
spp.scrs <- subset(spp.scrs, R_squared > 0.3)
spp.scrs
## NMDS1 NMDS2 Species Pvalues
## Calanus finmarchicus 0.6908220 0.27153448 Calanus finmarchicus 0.001
## Calanus glacialis 0.7500000 0.07339109 Calanus glacialis 0.001
## Eucalanus bungii 0.6892136 0.01861204 Eucalanus bungii 0.001
## Metridia lucens 0.5611232 -0.11401179 Metridia lucens 0.001
## Paracalanus parvus -0.6166596 -0.41149828 Paracalanus parvus 0.001
## Paraeuchaeta elongata 0.0460038 0.61022418 Paraeuchaeta elongata 0.001
## Paraeuchaeta russelli 0.2439038 0.55168908 Paraeuchaeta russelli 0.001
## Pseudocalanus minutus 0.6065093 -0.44833970 Pseudocalanus minutus 0.001
## Pseudocalanus newmani 0.3517803 -0.58327905 Pseudocalanus newmani 0.001
## R_squared
## Calanus finmarchicus 0.5215867
## Calanus glacialis 0.5376047
## Eucalanus bungii 0.4500139
## Metridia lucens 0.3103755
## Paracalanus parvus 0.5202935
## Paraeuchaeta elongata 0.3545209
## Paraeuchaeta russelli 0.3444483
## Pseudocalanus minutus 0.5385284
## Pseudocalanus newmani 0.4392237
M28S_Species_plot <- ggplot(M28S.data.scores, aes(x = NMDS1, y = NMDS2)) +geom_point(aes(colour = station))+theme_bw()+theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+
geom_segment(data = spp.scrs,size=0.4,
aes(x = 0, xend = NMDS1, y = 0, yend = NMDS2),
arrow = arrow(length = unit(0.2, "cm")), colour = "darkgrey")+
geom_polygon(data=hull.data,aes(x=NMDS1,y=NMDS2,fill=station,group=station),alpha=0.20, size=0.1, linetype=1, colour="black") +
annotate("text", label = "2D Stress: 0.205", x = 0.6, y = 0.8, size = 5, colour = "black")+
geom_label_repel(data = spp.scrs, aes(x = NMDS1, y = NMDS2, label = Species),
size = 3, fontface="bold", fill="white", label.padding = unit(0.15, "lines"), box.padding = unit(0.16, "lines"), label.size = 0.05)
M28S_Species_plot
save plot
setwd("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-3/output")
ggsave(filename = "M28S_NMDS_oblique.png", plot = M28S_Species_plot, height = 170, width = 200, units="mm", device='png', dpi=300)
Remove non-zooplanton phyla
unique(Machida18S_counts$Phylum)
## [1] "Arthropoda" "Cnidaria" "Bacillariophyta"
## [4] "Mollusca" "Ctenophora" "Chaetognatha"
## [7] NA "Annelida" "Dinophyceae__p"
## [10] "Nemertea" "Chordata" "Ciliophora"
## [13] "Oomycota" "Acantharea__p" "Bryozoa"
## [16] "Echinodermata" "Apicomplexa" "Platyhelminthes"
## [19] "Chlorophyta" "Synurophyceae__p" "Endomyxa"
## [22] "Cercozoa" "Streptophyta" "Ascomycota"
## [25] "Blastocladiomycota" "Bigyra__p" "Chytridiomycota"
## [28] "Nematoda" "Rotifera" "Phoronida"
## [31] "Polycystinea__p" "Choanoflagellata__p" "Cryptophyceae__p"
## [34] "Phaeophyceae__p" "Rhodophyta" "Raphidophyceae__p"
## [37] "Basidiomycota" "Pirsoniales__c__p" "Porifera"
## [40] "Ichthyosporea__p" "Colponemida__c__p" "Dictyochophyceae__p"
## [43] "Entoprocta"
levels_to_remove <- c('Bacillariophyta', 'Rhodophyta',NA,'Oomycota','Ascomycota','Onychophora','Cryptophyceae__p','Zoopagomycota','Phaeophyceae__p','Haptophyta','Basidiomycota','Chlorophyta','Dinophyceae__p','Discosea','Tubulinea','Dictyochophyceae__p','Bacteroidota','Blastocladiomycota','Pseudomonadota','Chrysophyceae__p','Malawimonadida__c__p','Prasinodermophyta','Chrysoparadoxa__f__o__c__p','Apusomonadida__c__p','Streptophyta','Pelagophyceae__p','Eustigmatophyceae__p','Ciliophora','Acantharea__p','Apicomplexa','Synurophyceae__p','Endomyxa','Cercozoa','Bigyra__p','Chytridiomycota','Polycystinea__p','Choanoflagellata__p','Raphidophyceae__p','Pirsoniales__c__p','Colponemida__c__p','Ichthyosporea__p')
# Remove those levels and drop them from the factor
Machida18S_counts <- droplevels(Machida18S_counts[!Machida18S_counts$Phylum %in% levels_to_remove,])
remove non-species rows
Machida18S_comp<- Machida18S_counts[complete.cases(Machida18S_counts), ]
convert to presence-absence
Machida18S_comp_pa<-Machida18S_comp %>%
mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))
remove excess columns
Machida18S_comp_pa <- Machida18S_comp_pa[ -c(1:7) ]
wide to long format
Machida18S_long<-melt(Machida18S_comp_pa, id.vars=c("Species"))
add up multiple lines per station and convert back to presence-absence
Machida18S_long_sum <- Machida18S_long %>%
group_by(Species,variable) %>%
summarise(
value = sum(value))
Machida18S_long_sum<-Machida18S_long_sum %>%
mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))
split code string up to extract metadata
Machida18S_long_sum$variable<-str_split_fixed(Machida18S_long_sum$variable,"_",4)
remove samples of non-interest
unique(Machida18S_long_sum$variable[,3])
## [1] "P12" "P22" "P28" "P38" "P402" "P4" "P8" "CA042" "P105"
## [10] "P123" "P132" "P136" "P381" "P7" ""
unique(Machida18S_long_sum$variable[,2])
## [1] "1804" "1806" "1807" "1809" "1904" "1907"
## [7] "1909" "2007" "2009" "2010" "Control1"
#remove excess stations
levels_to_remove <- c('CA042','',"P105","P123","P132","P136","P381","P7")
Machida18S_long_sum <- droplevels(Machida18S_long_sum[!Machida18S_long_sum$variable[,3] %in% levels_to_remove,])
split up replicate columns
unique(Machida18S_long_sum$variable[,4])
## [1] "Ob" "Ve" "ObB" "VeA" "ObA" "VeB"
Machida18S_long_sum <- Machida18S_long_sum %>%
mutate(
tow_type = substr(variable[,4], 1, 2),
replicate = substr(variable[,4], 3, nchar(variable[,4]))
)
split up year and month columns
Machida18S_long_sum <- Machida18S_long_sum %>%
mutate(
Year = substr(variable[,2], 1, 2),
Month = substr(variable[,2], 3, 4)
)
make station column
Machida18S_long_sum$station<-Machida18S_long_sum$variable[,3]
remove vertical samples
Machida18S_long_sum<-subset(Machida18S_long_sum,tow_type=="Ob")
merge replicates (and oblique and vertical tows)
colnames(Machida18S_long_sum)
## [1] "Species" "variable" "value" "tow_type" "replicate" "Year"
## [7] "Month" "station"
Machida18S_long_sum$sample<-paste(Machida18S_long_sum$Year,Machida18S_long_sum$Month,Machida18S_long_sum$station,Machida18S_long_sum$tow_type,sep="")
Machida18S_long_sum <- Machida18S_long_sum %>%
group_by(Year,Month,station,sample,Species) %>%
summarise(
value = sum(value))
#convert back to presence-absence
Machida18S_long_sum<-Machida18S_long_sum %>%
mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))
back to wide format
Machida18S_wide<-dcast(Machida18S_long_sum, sample+Year+Month+station ~ Species )
remove any columns with all zeroes
target_cols <- 6:ncol(Machida18S_wide)
non_zero_cols <- target_cols[colSums(Machida18S_wide[, target_cols]) != 0]
Machida18S_wide <- Machida18S_wide[, c(1:5, non_zero_cols)]
remove environmental data
Machida18S_wide_simple<-Machida18S_wide[ , 5:110]
Make environmental columns categorical
# Specify the columns to convert to factors
cols_to_factor <- c("station", "Year", "Month")
# Convert the specified columns to factors
Machida18S_wide <- Machida18S_wide %>%
mutate_at(vars(one_of(cols_to_factor)), as.factor)
dist<-vegdist(Machida18S_wide_simple, method='bray')
perm<-adonis2(dist~station*Month, data=Machida18S_wide, permutations = 999, method="bray")
perm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist ~ station * Month, data = Machida18S_wide, permutations = 999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## station 6 1.9876 0.27114 4.0876 0.001 ***
## Month 3 1.0171 0.13875 4.1834 0.001 ***
## station:Month 13 1.8135 0.24738 1.7213 0.001 ***
## Residual 31 2.5124 0.34272
## Total 53 7.3306 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
station comparisons
adonis.pair(vegdist(Machida18S_wide_simple),Machida18S_wide$station)
## combination SumsOfSqs MeanSqs F.Model R2 P.value
## 1 P12 <-> P22 0.4050801 0.4050801 4.139886 0.2162963 0.002997003
## 2 P12 <-> P28 0.2007739 0.2007739 1.892205 0.1190650 0.094905095
## 3 P12 <-> P38 0.1829881 0.1829881 1.665385 0.1135589 0.107892108
## 4 P12 <-> P4 0.3075001 0.3075001 3.520660 0.2131065 0.014985015
## 5 P12 <-> P402 0.2593743 0.2593743 2.220451 0.1458860 0.090909091
## 6 P12 <-> P8 0.3705372 0.3705372 2.975489 0.1752815 0.007992008
## 7 P22 <-> P28 0.5258883 0.5258883 4.996480 0.2498680 0.001998002
## 8 P22 <-> P38 0.2543377 0.2543377 2.339965 0.1432050 0.009990010
## 9 P22 <-> P4 0.3209403 0.3209403 3.656737 0.2071015 0.004995005
## 10 P22 <-> P402 0.4478436 0.4478436 3.889834 0.2174326 0.000999001
## 11 P22 <-> P8 0.2003051 0.2003051 1.635844 0.0983325 0.113886114
## 12 P28 <-> P38 0.2074972 0.2074972 1.752214 0.1187763 0.106893107
## 13 P28 <-> P4 0.5734570 0.5734570 5.980720 0.3150945 0.001998002
## 14 P28 <-> P402 0.4835637 0.4835637 3.857579 0.2288335 0.006993007
## 15 P28 <-> P8 0.4488371 0.4488371 3.388413 0.1948662 0.010989011
## 16 P38 <-> P4 0.2629625 0.2629625 2.653015 0.1810559 0.008991009
## 17 P38 <-> P402 0.2718403 0.2718403 2.074418 0.1473893 0.053946054
## 18 P38 <-> P8 0.2823920 0.2823920 2.042451 0.1357791 0.054945055
## 19 P4 <-> P402 0.1970624 0.1970624 1.848089 0.1334544 0.065934066
## 20 P4 <-> P8 0.3176350 0.3176350 2.744726 0.1743267 0.017982018
## 21 P402 <-> P8 0.3964518 0.3964518 2.730469 0.1735784 0.003996004
## P.value.corrected
## 1 0.01573427
## 2 0.11072261
## 3 0.11328671
## 4 0.02622378
## 5 0.11072261
## 6 0.02097902
## 7 0.01398601
## 8 0.02097902
## 9 0.01748252
## 10 0.01398601
## 11 0.11388611
## 12 0.11328671
## 13 0.01398601
## 14 0.02097902
## 15 0.02097902
## 16 0.02097902
## 17 0.07692308
## 18 0.07692308
## 19 0.08653846
## 20 0.02904788
## 21 0.01678322
month comparisons
adonis.pair(vegdist(Machida18S_wide_simple),Machida18S_wide$Month)
## combination SumsOfSqs MeanSqs F.Model R2 P.value
## 1 04 <-> 07 0.7291627 0.7291627 6.0598509 0.16351525 0.000999001
## 2 04 <-> 09 0.6448790 0.6448790 5.1233906 0.14586834 0.000999001
## 3 04 <-> 10 0.2278935 0.2278935 1.6661575 0.11360559 0.102897103
## 4 07 <-> 09 0.1587687 0.1587687 1.3215272 0.03448524 0.215784216
## 5 07 <-> 10 0.1520046 0.1520046 1.2422648 0.05848081 0.271728272
## 6 09 <-> 10 0.1019444 0.1019444 0.7768989 0.03928315 0.695304695
## P.value.corrected
## 1 0.002997003
## 2 0.002997003
## 3 0.205794206
## 4 0.323676324
## 5 0.326073926
## 6 0.695304695
Machida18S_NMDSmodel <- metaMDS(Machida18S_wide_simple, distance = "bray",trymax = 999)
## Run 0 stress 0.2180912
## Run 1 stress 0.2327872
## Run 2 stress 0.2423508
## Run 3 stress 0.2205055
## Run 4 stress 0.2278572
## Run 5 stress 0.2190059
## Run 6 stress 0.2205049
## Run 7 stress 0.2192346
## Run 8 stress 0.23612
## Run 9 stress 0.2324939
## Run 10 stress 0.2192886
## Run 11 stress 0.2409283
## Run 12 stress 0.2246649
## Run 13 stress 0.2192514
## Run 14 stress 0.2308205
## Run 15 stress 0.2263327
## Run 16 stress 0.2309202
## Run 17 stress 0.2318796
## Run 18 stress 0.222232
## Run 19 stress 0.2361207
## Run 20 stress 0.2193056
## Run 21 stress 0.2542189
## Run 22 stress 0.2259888
## Run 23 stress 0.2184218
## ... Procrustes: rmse 0.01488169 max resid 0.06864917
## Run 24 stress 0.243502
## Run 25 stress 0.2195005
## Run 26 stress 0.2235743
## Run 27 stress 0.230459
## Run 28 stress 0.2257394
## Run 29 stress 0.2183715
## ... Procrustes: rmse 0.04204822 max resid 0.1824824
## Run 30 stress 0.2180912
## ... New best solution
## ... Procrustes: rmse 6.934386e-06 max resid 2.755266e-05
## ... Similar to previous best
## *** Best solution repeated 1 times
Machida18S_NMDSmodel
##
## Call:
## metaMDS(comm = Machida18S_wide_simple, distance = "bray", trymax = 999)
##
## global Multidimensional Scaling using monoMDS
##
## Data: Machida18S_wide_simple
## Distance: bray
##
## Dimensions: 2
## Stress: 0.2180912
## Stress type 1, weak ties
## Best solution was repeated 1 time in 30 tries
## The best solution was from try 30 (random start)
## Scaling: centring, PC rotation, halfchange scaling
## Species: expanded scores based on 'Machida18S_wide_simple'
get datascores
Machida18S.data.scores<-scores(Machida18S_NMDSmodel,display="sites")
Machida18S.data.scores<- as.data.frame(Machida18S.data.scores)
add environmental variables back in
# Extract the first 5 columns from df1
first_5_columns <- Machida18S_wide[, 1:4]
# Combine the first 5 columns from df1 with df2
Machida18S.data.scores <- cbind(first_5_columns, Machida18S.data.scores)
plot datascores
xx <- ggplot(Machida18S.data.scores, aes(x = NMDS1, y = NMDS2,colour = station)) +geom_point(aes(colour = station))+theme_bw()+theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
xx
extract hulls
unique(Machida18S.data.scores$station)
## [1] P12 P22 P28 P38 P402 P4 P8
## Levels: P12 P22 P28 P38 P4 P402 P8
P12 <- Machida18S.data.scores[Machida18S.data.scores$station == "P12", ][chull(Machida18S.data.scores[Machida18S.data.scores$station == "P12", c("NMDS1", "NMDS2")]), ]
P22 <- Machida18S.data.scores[Machida18S.data.scores$station == "P22", ][chull(Machida18S.data.scores[Machida18S.data.scores$station == "P22", c("NMDS1", "NMDS2")]), ]
P28 <- Machida18S.data.scores[Machida18S.data.scores$station == "P28", ][chull(Machida18S.data.scores[Machida18S.data.scores$station == "P28", c("NMDS1", "NMDS2")]), ]
P38 <- Machida18S.data.scores[Machida18S.data.scores$station == "P38", ][chull(Machida18S.data.scores[Machida18S.data.scores$station == "P38", c("NMDS1", "NMDS2")]), ]
P4 <- Machida18S.data.scores[Machida18S.data.scores$station == "P4", ][chull(Machida18S.data.scores[Machida18S.data.scores$station == "P4", c("NMDS1", "NMDS2")]), ]
P402 <- Machida18S.data.scores[Machida18S.data.scores$station == "P402", ][chull(Machida18S.data.scores[Machida18S.data.scores$station == "P402", c("NMDS1", "NMDS2")]), ]
P8 <- Machida18S.data.scores[Machida18S.data.scores$station == "P8", ][chull(Machida18S.data.scores[Machida18S.data.scores$station == "P8", c("NMDS1", "NMDS2")]), ]
get hull data
hull.data <- rbind(P12,P22,P28,P38,P402,P4,P8) #combine grp.a and grp.b
hull.data
## sample Year Month station NMDS1 NMDS2
## 34 1909P12Ob 19 09 P12 0.396359675 0.125076523
## 46 2009P12Ob 20 09 P12 0.254394872 0.077670176
## 21 1904P12Ob 19 04 P12 -0.245286359 0.003780224
## 1 1804P12Ob 18 04 P12 -0.532243523 0.078841202
## 8 1807P12Ob 18 07 P12 0.345500684 0.317835626
## 14 1809P12Ob 18 09 P12 0.423600451 0.162535854
## 40 2007P22Ob 20 07 P22 0.190290701 -0.212460323
## 22 1904P22Ob 19 04 P22 -0.029997322 -0.326957539
## 53 2010P22Ob 20 10 P22 -0.282008807 -0.366104613
## 9 1807P22Ob 18 07 P22 -0.613555424 -0.266440626
## 35 1909P22Ob 19 09 P22 -0.276765254 0.040710164
## 15 1809P22Ob 18 09 P22 0.005575514 0.047474321
## 28 1907P22Ob 19 07 P22 0.175423251 -0.068213265
## 54 2010P28Ob 20 10 P28 0.600577680 -0.345004985
## 3 1804P28Ob 18 04 P28 -0.555670306 -0.092517746
## 29 1907P28Ob 19 07 P28 0.324793574 0.225025384
## 16 1809P28Ob 18 09 P28 0.621521573 0.088959238
## 49 2009P38Ob 20 09 P38 0.332494764 -0.210046489
## 23 1904P38Ob 19 04 P38 -0.058909988 -0.385986346
## 4 1804P38Ob 18 04 P38 -0.578203417 -0.404042151
## 17 1809P38Ob 18 09 P38 0.082806686 0.234877848
## 30 1907P38Ob 19 07 P38 0.240963450 0.272320799
## 42 2007P38Ob 20 07 P38 0.357796622 0.067388329
## 50 2009P402Ob 20 09 P402 -0.147305217 -0.246814584
## 5 1804P402Ob 18 04 P402 -0.519839435 0.123072634
## 24 1904P402Ob 19 04 P402 -0.615468218 0.340963300
## 43 2007P402Ob 20 07 P402 -0.235711874 0.677489266
## 18 1809P402Ob 18 09 P402 0.564123705 0.245705800
## 6 1804P4Ob 18 04 P4 0.071300372 -0.423341177
## 12 1807P4Ob 18 07 P4 -0.341178655 0.219526125
## 19 1809P4Ob 18 09 P4 -0.193105990 0.287536229
## 44 2007P4Ob 20 07 P4 -0.141306727 0.200364184
## 45 2007P8Ob 20 07 P8 0.384467069 -0.469692110
## 52 2009P8Ob 20 09 P8 -0.007901423 -0.550965596
## 26 1904P8Ob 19 04 P8 -0.343502926 -0.461922624
## 7 1804P8Ob 18 04 P8 -0.884678930 -0.001719643
## 20 1809P8Ob 18 09 P8 -0.328277064 0.457654631
## 33 1907P8Ob 19 07 P8 0.399855511 -0.118875374
vf <- envfit(Machida18S_NMDSmodel, Machida18S_wide, perm = 999)
spp.scrs <- as.data.frame(scores(vf, display = "vectors"))
spp.scrs <- cbind(spp.scrs, Species = rownames(spp.scrs))
####for ggplot
arrow_factor <- ordiArrowMul(vf)
spp.scrs <- as.data.frame(scores(vf, display = "vectors")) * arrow_factor
spp.scrs <- cbind(spp.scrs, Species = rownames(spp.scrs), Pvalues = vf$vectors$pvals, R_squared = vf$vectors$r)
# select significance similarly to `plot(vf, p.max = 0.01)`
spp.scrs <- subset(spp.scrs, Pvalues < 0.05)
spp.scrs <- subset(spp.scrs, R_squared > 0.3)
spp.scrs
## NMDS1 NMDS2 Species
## Clytia gracilis -0.51409831 -0.73203689 Clytia gracilis
## Diphyes chamissonis -0.77621015 0.32421305 Diphyes chamissonis
## Eutonina indicans -0.53380883 -0.55443704 Eutonina indicans
## Halopsis ocellata -0.64111045 -0.46727610 Halopsis ocellata
## Leuckartiara octona -0.67857013 0.68240750 Leuckartiara octona
## Limnocalanus macrurus -0.08241382 -0.71514202 Limnocalanus macrurus
## Membranipora membranacea -0.78344846 -0.01188514 Membranipora membranacea
## Mesochaetopterus taylori -0.61857557 0.39637049 Mesochaetopterus taylori
## Mitrella bicincta -0.71333849 -0.18729172 Mitrella bicincta
## Neotrypaea californiensis -0.09990835 0.75000000 Neotrypaea californiensis
## Oithona atlantica -0.30383367 0.64004812 Oithona atlantica
## Phoronopsis harmeri -0.62069482 0.58122300 Phoronopsis harmeri
## Pvalues R_squared
## Clytia gracilis 0.001 0.4932201
## Diphyes chamissonis 0.001 0.4361678
## Eutonina indicans 0.001 0.3651202
## Halopsis ocellata 0.001 0.3879373
## Leuckartiara octona 0.001 0.5708621
## Limnocalanus macrurus 0.001 0.3194259
## Membranipora membranacea 0.001 0.3784222
## Mesochaetopterus taylori 0.001 0.3326938
## Mitrella bicincta 0.001 0.3352732
## Neotrypaea californiensis 0.001 0.3528721
## Oithona atlantica 0.001 0.3094134
## Phoronopsis harmeri 0.001 0.4457009
Machida18S_Species_plot <- ggplot(Machida18S.data.scores, aes(x = NMDS1, y = NMDS2)) +geom_point(aes(colour = station))+theme_bw()+theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+
geom_segment(data = spp.scrs,size=0.4,
aes(x = 0, xend = NMDS1, y = 0, yend = NMDS2),
arrow = arrow(length = unit(0.2, "cm")), colour = "darkgrey")+
geom_polygon(data=hull.data,aes(x=NMDS1,y=NMDS2,fill=station,group=station),alpha=0.20, size=0.1, linetype=1, colour="black") +
annotate("text", label = "2D Stress: 0.218", x =-0.6, y = 0.8, size = 5, colour = "black")+
geom_label_repel(data = spp.scrs, aes(x = NMDS1, y = NMDS2, label = Species),
size = 3, fontface="bold", fill="white", label.padding = unit(0.15, "lines"), box.padding = unit(0.16, "lines"), label.size = 0.05)
Machida18S_Species_plot
save plot
setwd("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-3/output")
ggsave(filename = "Machida18S_NMDS_oblique.png", plot = Machida18S_Species_plot, height = 170, width = 200, units="mm", device='png', dpi=300)
Remove non-zooplanton phyla
unique(Fishcytb_counts$Phylum)
## [1] NA "Chordata" "Arthropoda" "Mollusca"
## [5] "Annelida" "Echinodermata" "Pseudomonadota"
levels_to_remove <- c('Bacillariophyta', 'Rhodophyta',NA,'Oomycota','Ascomycota','Onychophora','Cryptophyceae__p','Zoopagomycota','Phaeophyceae__p','Haptophyta','Basidiomycota','Chlorophyta','Dinophyceae__p','Discosea','Tubulinea','Dictyochophyceae__p','Bacteroidota','Blastocladiomycota','Pseudomonadota','Chrysophyceae__p','Malawimonadida__c__p','Prasinodermophyta','Chrysoparadoxa__f__o__c__p','Apusomonadida__c__p','Streptophyta','Pelagophyceae__p','Eustigmatophyceae__p','Pseudomonadota')
# Remove those levels and drop them from the factor
Fishcytb_counts <- droplevels(Fishcytb_counts[!Fishcytb_counts$Phylum %in% levels_to_remove,])
subset to fish only
Fishcytb_counts<-subset(Fishcytb_counts,Class=="Actinopteri")
remove non-species rows
Fishcytb_comp<- Fishcytb_counts[complete.cases(Fishcytb_counts), ]
convert to presence-absence
Fishcytb_comp_pa<-Fishcytb_comp %>%
mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))
remove excess columns
Fishcytb_comp_pa <- Fishcytb_comp_pa[ -c(1:7) ]
wide to long format
Fishcytb_long<-melt(Fishcytb_comp_pa, id.vars=c("Species"))
add up multiple lines per station and convert back to presence-absence
Fishcytb_long_sum <- Fishcytb_long %>%
group_by(Species,variable) %>%
summarise(
value = sum(value))
Fishcytb_long_sum<-Fishcytb_long_sum %>%
mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))
split code string up to extract metadata
Fishcytb_long_sum$variable<-str_split_fixed(Fishcytb_long_sum$variable,"_",4)
remove samples of non-interest
unique(Fishcytb_long_sum$variable[,3])
## [1] "P12" "P22" "P28" "P38" "P402" "P4" "P8" "CA042" "P105"
## [10] "P123" "P132" "P136" "P7" ""
unique(Fishcytb_long_sum$variable[,2])
## [1] "1804" "1806" "1807" "1809" "1904" "1907"
## [7] "1909" "2007" "2009" "2010" "Control1"
#remove excess stations
levels_to_remove <- c('CA042','',"P105","P123","P132","P136","P381","P7")
Fishcytb_long_sum <- droplevels(Fishcytb_long_sum[!Fishcytb_long_sum$variable[,3] %in% levels_to_remove,])
split up replicate columns
unique(Fishcytb_long_sum$variable[,4])
## [1] "Ob" "VeA" "Ve" "ObB" "ObA" "VeB"
Fishcytb_long_sum <- Fishcytb_long_sum %>%
mutate(
tow_type = substr(variable[,4], 1, 2),
replicate = substr(variable[,4], 3, nchar(variable[,4]))
)
split up year and month columns
Fishcytb_long_sum <- Fishcytb_long_sum %>%
mutate(
Year = substr(variable[,2], 1, 2),
Month = substr(variable[,2], 3, 4)
)
make station column
Fishcytb_long_sum$station<-Fishcytb_long_sum$variable[,3]
remove vertical samples
Fishcytb_long_sum<-subset(Fishcytb_long_sum,tow_type=="Ob")
merge replicates
colnames(Fishcytb_long_sum)
## [1] "Species" "variable" "value" "tow_type" "replicate" "Year"
## [7] "Month" "station"
Fishcytb_long_sum$sample<-paste(Fishcytb_long_sum$Year,Fishcytb_long_sum$Month,Fishcytb_long_sum$station,Fishcytb_long_sum$tow_type,sep="")
Fishcytb_long_sum <- Fishcytb_long_sum %>%
group_by(Year,Month,station,sample,Species) %>%
summarise(
value = sum(value))
#convert back to presence-absence
Fishcytb_long_sum<-Fishcytb_long_sum %>%
mutate(across(where(is.numeric), ~ifelse(.x >0, 1, 0)))
back to wide format
Fishcytb_wide<-dcast(Fishcytb_long_sum, sample+Year+Month+station ~ Species )
remove any columns with all zeroes
target_cols <- 6:ncol(Fishcytb_wide)
non_zero_cols <- target_cols[colSums(Fishcytb_wide[, target_cols]) != 0]
Fishcytb_wide <- Fishcytb_wide[, c(1:5, non_zero_cols)]
remove rows with all zeroes
row_sums <- rowSums(Fishcytb_wide[, 5:32])
# Create a logical vector to identify rows where the sum is not zero
non_zero_rows <- row_sums != 0
# Subset the data frame to keep only rows with a non-zero sum
Fishcytb_wide <- Fishcytb_wide[non_zero_rows, ]
remove environmental data
Fishcytb_wide_simple<-Fishcytb_wide[ , 5:32]
Make environmental columns categorical
# Specify the columns to convert to factors
cols_to_factor <- c("station", "Year", "Month")
# Convert the specified columns to factors
Fishcytb_wide <- Fishcytb_wide %>%
mutate_at(vars(one_of(cols_to_factor)), as.factor)
dist<-vegdist(Fishcytb_wide_simple, method='bray')
perm<-adonis2(dist~station*Month, data=Fishcytb_wide, permutations = 999, method="bray")
perm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist ~ station * Month, data = Fishcytb_wide, permutations = 999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## station 6 3.3596 0.23778 2.8840 0.001 ***
## Month 3 2.9107 0.20601 4.9973 0.001 ***
## station:Month 13 3.5873 0.25390 1.4213 0.030 *
## Residual 22 4.2714 0.30231
## Total 44 14.1289 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
station comparisons
adonis.pair(vegdist(Fishcytb_wide_simple),Fishcytb_wide$station)
## combination SumsOfSqs MeanSqs F.Model R2 P.value
## 1 P12 <-> P22 1.2570176 1.2570176 4.4154240 0.26898020 0.005994006
## 2 P12 <-> P28 0.5265152 0.5265152 1.3982861 0.13447275 0.175824176
## 3 P12 <-> P38 0.8039701 0.8039701 2.4763880 0.18375755 0.059940060
## 4 P12 <-> P4 0.9286159 0.9286159 3.7590368 0.27320494 0.021978022
## 5 P12 <-> P402 0.6744163 0.6744163 2.7202768 0.19826691 0.037962038
## 6 P12 <-> P8 1.1848082 1.1848082 4.3954958 0.30533827 0.004995005
## 7 P22 <-> P28 0.4252049 0.4252049 1.2291927 0.10051299 0.321678322
## 8 P22 <-> P38 0.4431602 0.4431602 1.4448029 0.10002233 0.190809191
## 9 P22 <-> P4 0.3801767 0.3801767 1.5804422 0.11637634 0.144855145
## 10 P22 <-> P402 0.3797361 0.3797361 1.5704575 0.10778368 0.189810190
## 11 P22 <-> P8 0.7501007 0.7501007 2.8926427 0.19423300 0.028971029
## 12 P28 <-> P38 0.3269048 0.3269048 0.8254992 0.07625507 0.608391608
## 13 P28 <-> P4 0.5022366 0.5022366 1.5808821 0.14940929 0.110889111
## 14 P28 <-> P402 0.3671006 0.3671006 1.1781033 0.10539385 0.322677323
## 15 P28 <-> P8 0.2708882 0.2708882 0.7904289 0.08073486 0.681318681
## 16 P38 <-> P4 0.3747189 0.3747189 1.3551981 0.10968647 0.237762238
## 17 P38 <-> P402 0.1571306 0.1571306 0.5716717 0.04547300 0.751248751
## 18 P38 <-> P8 0.3378666 0.3378666 1.1377013 0.09373285 0.344655345
## 19 P4 <-> P402 0.3196842 0.3196842 1.6002412 0.12700084 0.192807193
## 20 P4 <-> P8 0.7789558 0.7789558 3.5965172 0.26451753 0.005994006
## 21 P402 <-> P8 0.5935853 0.5935853 2.6951672 0.19679695 0.027972028
## P.value.corrected
## 1 0.04195804
## 2 0.28921079
## 3 0.15734266
## 4 0.10139860
## 5 0.11388611
## 6 0.04195804
## 7 0.39860140
## 8 0.28921079
## 9 0.28921079
## 10 0.28921079
## 11 0.10139860
## 12 0.67243283
## 13 0.25874126
## 14 0.39860140
## 15 0.71538462
## 16 0.33286713
## 17 0.75124875
## 18 0.40209790
## 19 0.28921079
## 20 0.04195804
## 21 0.10139860
month comparisons
adonis.pair(vegdist(Fishcytb_wide_simple),Fishcytb_wide$Year)
## combination SumsOfSqs MeanSqs F.Model R2 P.value
## 1 18 <-> 19 0.3755977 0.3755977 1.298178 0.04587497 0.2447552
## 2 18 <-> 20 0.4379156 0.4379156 1.347149 0.04439128 0.2537463
## 3 19 <-> 20 0.5061051 0.5061051 1.530874 0.05183978 0.1598402
## P.value.corrected
## 1 0.2537463
## 2 0.2537463
## 3 0.2537463
Fishcytb_NMDSmodel <- metaMDS(Fishcytb_wide_simple, distance = "bray",trymax = 999)
## Run 0 stress 0.1583673
## Run 1 stress 0.1666955
## Run 2 stress 0.1766177
## Run 3 stress 0.1580672
## ... New best solution
## ... Procrustes: rmse 0.1102832 max resid 0.3243578
## Run 4 stress 0.1564601
## ... New best solution
## ... Procrustes: rmse 0.1066288 max resid 0.3252197
## Run 5 stress 0.1579185
## Run 6 stress 0.1560572
## ... New best solution
## ... Procrustes: rmse 0.09080185 max resid 0.3545808
## Run 7 stress 0.1605663
## Run 8 stress 0.1601261
## Run 9 stress 0.1496531
## ... New best solution
## ... Procrustes: rmse 0.1091031 max resid 0.3172181
## Run 10 stress 0.1484116
## ... New best solution
## ... Procrustes: rmse 0.09294735 max resid 0.3427064
## Run 11 stress 0.1588653
## Run 12 stress 0.1622118
## Run 13 stress 0.1566133
## Run 14 stress 0.1554879
## Run 15 stress 0.1536969
## Run 16 stress 0.1580743
## Run 17 stress 0.1596336
## Run 18 stress 0.1545446
## Run 19 stress 0.154086
## Run 20 stress 0.156618
## Run 21 stress 0.161681
## Run 22 stress 0.1581779
## Run 23 stress 0.169016
## Run 24 stress 0.158542
## Run 25 stress 0.1636469
## Run 26 stress 0.1496268
## Run 27 stress 0.1545171
## Run 28 stress 0.1502626
## Run 29 stress 0.1615822
## Run 30 stress 0.1527706
## Run 31 stress 0.1540708
## Run 32 stress 0.1850368
## Run 33 stress 0.1670124
## Run 34 stress 0.1653676
## Run 35 stress 0.1503019
## Run 36 stress 0.1536123
## Run 37 stress 0.1494774
## Run 38 stress 0.1497016
## Run 39 stress 0.1592069
## Run 40 stress 0.1643382
## Run 41 stress 0.1584598
## Run 42 stress 0.1516966
## Run 43 stress 0.1578278
## Run 44 stress 0.1631824
## Run 45 stress 0.1701022
## Run 46 stress 0.1524939
## Run 47 stress 0.1602812
## Run 48 stress 0.1609328
## Run 49 stress 0.156683
## Run 50 stress 0.148843
## ... Procrustes: rmse 0.06957941 max resid 0.3816475
## Run 51 stress 0.1546622
## Run 52 stress 0.1639194
## Run 53 stress 0.1570247
## Run 54 stress 0.1541572
## Run 55 stress 0.1582515
## Run 56 stress 0.1581801
## Run 57 stress 0.158738
## Run 58 stress 0.1509434
## Run 59 stress 0.1491922
## Run 60 stress 0.1597418
## Run 61 stress 0.1553132
## Run 62 stress 0.1616525
## Run 63 stress 0.1577165
## Run 64 stress 0.1664544
## Run 65 stress 0.1486091
## ... Procrustes: rmse 0.02503328 max resid 0.1078624
## Run 66 stress 0.1605558
## Run 67 stress 0.1487986
## ... Procrustes: rmse 0.06278361 max resid 0.3175811
## Run 68 stress 0.1565008
## Run 69 stress 0.1501814
## Run 70 stress 0.1579488
## Run 71 stress 0.1500097
## Run 72 stress 0.1520563
## Run 73 stress 0.1621642
## Run 74 stress 0.1578314
## Run 75 stress 0.1554758
## Run 76 stress 0.1541767
## Run 77 stress 0.1525381
## Run 78 stress 0.1607285
## Run 79 stress 0.1611361
## Run 80 stress 0.1676238
## Run 81 stress 0.1503132
## Run 82 stress 0.1480305
## ... New best solution
## ... Procrustes: rmse 0.01919301 max resid 0.1046834
## Run 83 stress 0.1662694
## Run 84 stress 0.1517157
## Run 85 stress 0.1480314
## ... Procrustes: rmse 0.0003021542 max resid 0.001639335
## ... Similar to previous best
## *** Best solution repeated 1 times
Fishcytb_NMDSmodel
##
## Call:
## metaMDS(comm = Fishcytb_wide_simple, distance = "bray", trymax = 999)
##
## global Multidimensional Scaling using monoMDS
##
## Data: Fishcytb_wide_simple
## Distance: bray
##
## Dimensions: 2
## Stress: 0.1480305
## Stress type 1, weak ties
## Best solution was repeated 1 time in 85 tries
## The best solution was from try 82 (random start)
## Scaling: centring, PC rotation, halfchange scaling
## Species: expanded scores based on 'Fishcytb_wide_simple'
get datascores
Fishcytb.data.scores<-scores(Fishcytb_NMDSmodel,display="sites")
Fishcytb.data.scores<- as.data.frame(Fishcytb.data.scores)
add environmental variables back in
# Extract the first 5 columns from df1
first_5_columns <- Fishcytb_wide[, 1:4]
# Combine the first 5 columns from df1 with df2
Fishcytb.data.scores <- cbind(first_5_columns, Fishcytb.data.scores)
plot datascores
xx <- ggplot(Fishcytb.data.scores, aes(x = NMDS1, y = NMDS2,colour = station)) +geom_point(aes(colour = station))+theme_bw()+theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
xx
for the plot I had to zoom way in, because there were 3 outliers. I
think there isn’t enough data for nmds here
extract hulls
unique(Fishcytb.data.scores$station)
## [1] P12 P22 P28 P38 P402 P4 P8
## Levels: P12 P22 P28 P38 P4 P402 P8
P12 <- Fishcytb.data.scores[Fishcytb.data.scores$station == "P12", ][chull(Fishcytb.data.scores[Fishcytb.data.scores$station == "P12", c("NMDS1", "NMDS2")]), ]
P22 <- Fishcytb.data.scores[Fishcytb.data.scores$station == "P22", ][chull(Fishcytb.data.scores[Fishcytb.data.scores$station == "P22", c("NMDS1", "NMDS2")]), ]
P28 <- Fishcytb.data.scores[Fishcytb.data.scores$station == "P28", ][chull(Fishcytb.data.scores[Fishcytb.data.scores$station == "P28", c("NMDS1", "NMDS2")]), ]
P38 <- Fishcytb.data.scores[Fishcytb.data.scores$station == "P38", ][chull(Fishcytb.data.scores[Fishcytb.data.scores$station == "P38", c("NMDS1", "NMDS2")]), ]
P4 <- Fishcytb.data.scores[Fishcytb.data.scores$station == "P4", ][chull(Fishcytb.data.scores[Fishcytb.data.scores$station == "P4", c("NMDS1", "NMDS2")]), ]
P402 <- Fishcytb.data.scores[Fishcytb.data.scores$station == "P402", ][chull(Fishcytb.data.scores[Fishcytb.data.scores$station == "P402", c("NMDS1", "NMDS2")]), ]
P8 <- Fishcytb.data.scores[Fishcytb.data.scores$station == "P8", ][chull(Fishcytb.data.scores[Fishcytb.data.scores$station == "P8", c("NMDS1", "NMDS2")]), ]
get hull data
hull.data <- rbind(P12,P22,P28,P38,P402,P4,P8) #combine grp.a and grp.b
hull.data
## sample Year Month station NMDS1 NMDS2
## 26 1907P12Ob 19 07 P12 1.02527080 0.35531582
## 8 1807P12Ob 18 07 P12 1.02527080 0.35531582
## 1 1804P12Ob 18 04 P12 -0.84854566 0.45664004
## 20 1904P12Ob 19 04 P12 -1.49236325 0.61629606
## 35 2007P12Ob 20 07 P12 0.49561271 0.60914544
## 27 1907P22Ob 19 07 P22 0.80952364 -0.35360545
## 36 2007P22Ob 20 07 P22 -0.45130300 -1.19501852
## 2 1804P22Ob 18 04 P22 -0.79137166 -0.45872049
## 43 2009P22Ob 20 09 P22 -0.54992194 0.78083456
## 14 1809P22Ob 18 09 P22 0.23304934 0.16725370
## 37 2007P28Ob 20 07 P28 0.60199652 -0.48057934
## 50 2010P28Ob 20 10 P28 -0.60428744 -1.05703984
## 3 1804P28Ob 18 04 P28 -0.73388498 0.08992029
## 44 2009P28Ob 20 09 P28 -0.09091469 1.00569721
## 9 1807P28Ob 18 07 P28 1.02527080 0.35531582
## 45 2009P38Ob 20 09 P38 0.50662348 -0.74041767
## 16 1809P38Ob 18 09 P38 0.20564989 -0.63980523
## 22 1904P38Ob 19 04 P38 -0.49053664 -0.37742784
## 4 1804P38Ob 18 04 P38 -0.95301882 0.39258863
## 33 1909P38Ob 19 09 P38 0.56019960 1.00316286
## 38 2007P38Ob 20 07 P38 0.68039527 0.07712894
## 10 1807P402Ob 18 07 P402 0.34463063 -0.19932061
## 46 2009P402Ob 20 09 P402 -0.01616893 -0.23459010
## 5 1804P402Ob 18 04 P402 -0.51998445 0.33170597
## 39 2007P402Ob 20 07 P402 0.49157829 0.27504512
## 29 1907P402Ob 19 07 P402 0.68039527 0.07712894
## 30 1907P4Ob 19 07 P4 0.05653786 0.25512507
## 6 1804P4Ob 18 04 P4 -0.83124451 -0.09560046
## 47 2009P4Ob 20 09 P4 -0.68783155 0.63334617
## 40 2007P4Ob 20 07 P4 0.04169239 0.49716243
## 48 2009P8Ob 20 09 P8 0.45606155 -1.31707329
## 7 1804P8Ob 18 04 P8 -0.33120097 -0.61879064
## 34 1909P8Ob 19 09 P8 -0.21672482 -0.18032775
## 25 1904P8Ob 19 04 P8 0.80952364 -0.35360545
vf <- envfit(Fishcytb_NMDSmodel, Fishcytb_wide, perm = 999)
spp.scrs <- as.data.frame(scores(vf, display = "vectors"))
spp.scrs <- cbind(spp.scrs, Species = rownames(spp.scrs))
####for ggplot
arrow_factor <- ordiArrowMul(vf)
spp.scrs <- as.data.frame(scores(vf, display = "vectors")) * arrow_factor
spp.scrs <- cbind(spp.scrs, Species = rownames(spp.scrs), Pvalues = vf$vectors$pvals, R_squared = vf$vectors$r)
# select significance similarly to `plot(vf, p.max = 0.01)`
spp.scrs <- subset(spp.scrs, Pvalues < 0.05)
spp.scrs <- subset(spp.scrs, R_squared > 0.3)
spp.scrs
## NMDS1 NMDS2 Species Pvalues R_squared
## Clupea pallasii -0.3951285 -0.7922745 Clupea pallasii 0.001 0.4200226
## Engraulis mordax 0.7500000 0.3024734 Engraulis mordax 0.001 0.3504487
## Parophrys vetulus -0.7742137 0.3349948 Parophrys vetulus 0.001 0.3813349
Fishcytb_Species_plot <- ggplot(Fishcytb.data.scores, aes(x = NMDS1, y = NMDS2)) +geom_point(aes(colour = station))+theme_bw()+theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+
geom_segment(data = spp.scrs,size=0.4,
aes(x = 0, xend = NMDS1, y = 0, yend = NMDS2),
arrow = arrow(length = unit(0.2, "cm")), colour = "darkgrey")+
geom_polygon(data=hull.data,aes(x=NMDS1,y=NMDS2,fill=station,group=station),alpha=0.20, size=0.1, linetype=1, colour="black") +
annotate("text", label = "2D Stress: 0.147", x = 0.55, y = 0.67, size = 5, colour = "black")+
geom_label_repel(data = spp.scrs, aes(x = NMDS1, y = NMDS2, label = Species),
size = 3, fontface="bold", fill="white", label.padding = unit(0.15, "lines"), box.padding = unit(0.16, "lines"), label.size = 0.05)
Fishcytb_Species_plot
save plot
setwd("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-3/output")
ggsave(filename = "Fishcytb_NMDS_oblique.png", plot = Fishcytb_Species_plot, height = 170, width = 200, units="mm", device='png', dpi=300)