This markdown file details analyses done for hypothesis 1 of chapter 2 of my dissertation: Gelatinous zooplankton will be more abundant than crustaceans during marine heatwave years.
load packages
library(ggrepel)
library("scales")
library(dplyr)
library(tidyr)
library(data.table)
Read zooplanton data into R
Zooplankton <- read.csv("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-2/data/WOAC_Species_Densities_2014-2022.csv")
Read environmental data into R
Environmental <- read.csv("/Users/hailaschultz/Dropbox/Schultz_Dissertation/Data_Analysis/Schultz_dissertation-2/data/WOAC_Chem_data_2014-2022_all-niskins.csv")
recode months to match with zooplankton data
unique(Environmental$Month)
## [1] "Jul" "Sep" "Oct" "Apr" "May" "Nov" "Aug" "Jun"
Environmental$Month <- recode(Environmental$Month,
"Jul" = "JUL",
"Sep"="SEP",
"Oct"="SEP",
"Apr"="APR",
"May"="MAY",
"Nov"="NOV",
"Aug"="AUG",
"Jun"="JUL",
"OCT"="SEP")
#remove unnecessary sample dates
unique(Environmental$Date)
## [1] "7/16/14" "7/15/14" "7/14/14" "7/18/14" "9/30/14" "9/29/14"
## [7] "10/30/14" "10/1/14" "10/31/14" "10/23/14" "10/29/14" "10/3/14"
## [13] "4/7/15" "4/5/15" "4/10/15" "4/6/15" "5/24/15" "7/9/15"
## [19] "7/8/15" "7/7/15" "7/11/15" "9/25/15" "9/24/15" "9/23/15"
## [25] "9/27/15" "11/18/15" "11/16/15" "11/17/15" "4/7/16" "4/6/16"
## [31] "4/5/16" "4/9/16" "7/23/16" "7/22/16" "7/7/16" "7/25/16"
## [37] "9/23/16" "9/22/16" "9/21/16" "9/25/16" "10/27/16" "4/7/17"
## [43] "4/8/17" "4/6/17" "4/10/17" "5/3/17" "5/4/17" "7/13/17"
## [49] "7/12/17" "7/11/17" "7/15/17" "9/13/17" "9/12/17" "9/11/17"
## [55] "9/18/17" "9/15/17" "10/18/17" "4/9/18" "4/8/18" "4/7/18"
## [61] "4/11/18" "5/24/18" "7/25/18" "7/24/18" "7/27/18" "7/23/18"
## [67] "9/13/18" "9/12/18" "9/11/18" "9/15/18" "10/18/18" "10/19/18"
## [73] "4/24/19" "4/23/19" "4/22/19" "4/26/19" "5/23/19" "7/7/19"
## [79] "7/6/19" "7/5/19" "7/9/19" "9/13/19" "9/12/19" "9/11/19"
## [85] "9/15/19" "7/10/20" "7/4/20" "7/9/20" "7/8/20" "7/12/20"
## [91] "8/31/20" "9/14/20" "9/13/20" "9/30/20" "9/12/20" "9/16/20"
## [97] "4/19/21" "4/18/21" "4/17/21" "4/21/21" "7/14/21" "7/13/21"
## [103] "7/12/21" "7/16/21" "9/15/21" "9/14/21" "9/29/21" "9/13/21"
## [109] "9/17/21" "10/1/21" "4/27/22" "4/26/22" "4/25/22" "4/29/22"
## [115] "5/6/22" "6/29/22" "6/28/22" "6/27/22" "7/1/22" "10/14/22"
## [121] "10/15/22"
unique(Zooplankton$Date)
## [1] "9/5/14" "10/30/14" "4/10/15" "10/29/14" "10/31/14"
## [6] "4/5/15" "4/6/15" "1/20/15" "2/15/15" "3/6/15"
## [11] "7/31/14" "10/21/14" "10/14/14" "11/10/14" "11/5/14"
## [16] "9/30/14" "10/7/14" "4/7/15" "9/29/14" "10/1/14"
## [21] "7/14/14" "7/16/14" "7/3/14" "10/3/14" "7/18/14"
## [26] "7/15/14" "7/7/16" "7/22/16" "7/8/15" "7/23/16"
## [31] "7/25/16" "7/9/15" "7/4/15" "7/7/15" "7/11/15"
## [36] "9/23/15" "9/23/16" "9/24/15" "9/25/15" "9/27/15"
## [41] "9/25/16" "9/21/16" "9/22/16" "4/5/16" "4/6/16"
## [46] "4/7/16" "4/8/16" "11/16/15" "3/17/16" "3/18/16"
## [51] "11/18/15" "11/19/15" "3/19/16" "4/6/17" "4/7/17"
## [56] "4/8/17" "4/10/17" "4/9/18" "4/7/18" "7/11/17"
## [61] "7/12/17" "7/13/17" "7/15/17" "9/11/17" "4/8/18"
## [66] "9/12/17" "9/13/17" "4/11/18" "9/15/17" "7/25/18"
## [71] "7/24/18" "7/27/18" "7/23/18" "9/13/18" "9/15/18"
## [76] "9/12/18" "9/11/18" "4/22/19" "4/23/19" "4/24/19"
## [81] "4/26/19" "7/6/19" "7/5/19" "7/7/19" "7/9/19"
## [86] "9/11/19" "9/12/19" "9/13/19" "9/15/19" "7/8/20"
## [91] "9/12/20" "9/14/20" "7/10/20" "9/16/20" "9/13/20"
## [96] "9/28/20" "9/30/20" "9/29/20" "7/12/20" "7/9/20"
## [101] "4/16/21" "4/18/21" "4/19/21" "9/13/21" "9/14/21"
## [106] "9/15/21" "9/17/21" "4/25/22" "4/26/22" "4/27/22"
## [111] "9/14/22" "9/16/22" "9/13/22" "4/29/22" "9/12/22"
## [116] "4/17/21" "07/12/2021" "07/13/2021" "07/14/2021" "07/16/2021"
## [121] "4/21/21" "06/27/2022" "06/28/2022" "06/29/2022" "6/29/22"
## [126] "7/1/22"
Environmental<-subset(Environmental,Date!="10/30/14")
Environmental<-subset(Environmental,Date!="10/31/14")
Environmental<-subset(Environmental,Date!="10/23/14")
Environmental<-subset(Environmental,Date!="10/29/14")
Environmental<-subset(Environmental,Date!="5/24/15")
Environmental<-subset(Environmental,Date!="11/18/15")
Environmental<-subset(Environmental,Date!="11/16/15")
Environmental<-subset(Environmental,Date!="11/17/15")
Environmental<-subset(Environmental,Date!="10/27/16")
Environmental<-subset(Environmental,Date!="5/3/17")
Environmental<-subset(Environmental,Date!="5/4/17")
Environmental<-subset(Environmental,Date!="10/18/17")
Environmental<-subset(Environmental,Date!="5/24/18")
Environmental<-subset(Environmental,Date!="10/18/18")
Environmental<-subset(Environmental,Date!="10/19/18")
Environmental<-subset(Environmental,Date!="5/23/19")
Environmental<-subset(Environmental,Date!="8/31/20")
Environmental<-subset(Environmental,Date!="10/1/21")
Environmental<-subset(Environmental,Date!="5/6/22")
Environmental<-subset(Environmental,Date!="10/14/22")
Environmental<-subset(Environmental,Date!="10/15/22")
Environmental<-subset(Environmental,Date!="9/29/21")
Environmental<-subset(Environmental,Date!="7/4/20")
Environmental<-subset(Environmental,Date!="9/30/20")
Environmental<-subset(Environmental,Date!="9/18/17")
Temp: summarize by depth category
temp<-Environmental %>%
group_by(Station,Date,Depth,Year,Month) %>%
summarise(
temp = mean(CTDTMP_DEG_C_ITS90))
#subset to surface samples
temp_surf<-subset(temp,Depth=="Surface")
#make unique column
temp_surf$code<-paste(temp_surf$Station, temp_surf$Year,temp_surf$Month,sep="-")
pH: remove missing rows and remove weird values
pH<-Environmental %>% drop_na(pH)
pH <- subset(pH, pH != 2130.2)
summarize by depth category
pH<-pH %>%
group_by(Station,Date,Depth,Year,Month) %>%
summarise(
pH = mean(pH))
#subset to surface samples
pH_surf<-subset(pH,Depth=="Surface")
#make unique column
pH_surf$code<-paste(pH_surf$Station, pH_surf$Year,pH_surf$Month,sep="-")
O2: remove missing rows and remove weird values
Oxygen<-Environmental %>% drop_na(O2.in.mg.l)
summarize by depth category
Oxygen<-Oxygen %>%
group_by(Station,Date,Depth,Year,Month) %>%
summarise(
Oxygen = mean(O2.in.mg.l))
#subset to surface samples
Oxygen_surf<-subset(Oxygen,Depth=="Surface")
#make unique column
Oxygen_surf$code<-paste(Oxygen_surf$Station, Oxygen_surf$Year,Oxygen_surf$Month,sep="-")
Merge environmental tables
joined <- merge(temp_surf, pH_surf, by.x = "code",
by.y = "code", all.x = TRUE, all.y = TRUE)
joined <- merge(joined, Oxygen_surf, by.x = "code",
by.y = "code", all.x = TRUE, all.y = TRUE)
#remove excess columns
env_joined <- joined[ -c(8:12,14:18) ]
subset to stations sampled during WOAC cruises
unique(Zooplankton$Station)
## [1] "PEFS1d" "P8" "PEFN1d" "P12" "P38" "P4" "P28" "P22"
## [9] "PEFN2" "PEFS2" "P402" "p4" "P381" "P105" "P7" "P136"
## [17] "P132" "P123"
Zoop_sub<-subset(Zooplankton,Station!="PEFS1d")
Zoop_sub<-subset(Zoop_sub,Station!="PEFN1d")
Zoop_sub<-subset(Zoop_sub,Station!="PEFN2")
Zoop_sub<-subset(Zoop_sub,Station!="PEFS2")
Zoop_sub<-subset(Zoop_sub,Station!="P381")
Zoop_sub<-subset(Zoop_sub,Station!="P105")
Zoop_sub<-subset(Zoop_sub,Station!="P136")
Zoop_sub<-subset(Zoop_sub,Station!="P7")
Zoop_sub<-subset(Zoop_sub,Station!="P123")
Zoop_sub<-subset(Zoop_sub,Station!="P132")
unique(Zoop_sub$Date)
## [1] "10/30/14" "4/10/15" "10/29/14" "10/31/14" "4/5/15"
## [6] "4/6/15" "4/7/15" "9/30/14" "9/29/14" "10/1/14"
## [11] "7/14/14" "7/16/14" "7/3/14" "10/3/14" "7/18/14"
## [16] "7/15/14" "7/7/16" "7/22/16" "7/8/15" "7/23/16"
## [21] "7/25/16" "7/9/15" "7/4/15" "7/7/15" "7/11/15"
## [26] "9/23/15" "9/23/16" "9/24/15" "9/25/15" "9/27/15"
## [31] "9/25/16" "9/21/16" "9/22/16" "4/5/16" "4/6/16"
## [36] "4/7/16" "4/8/16" "11/16/15" "3/17/16" "3/18/16"
## [41] "11/18/15" "4/6/17" "4/7/17" "4/8/17" "4/10/17"
## [46] "4/9/18" "4/7/18" "7/11/17" "7/12/17" "7/13/17"
## [51] "7/15/17" "9/11/17" "4/8/18" "9/12/17" "9/13/17"
## [56] "4/11/18" "9/15/17" "7/25/18" "7/24/18" "7/27/18"
## [61] "7/23/18" "9/13/18" "9/15/18" "9/12/18" "9/11/18"
## [66] "4/22/19" "4/23/19" "4/24/19" "4/26/19" "7/6/19"
## [71] "7/5/19" "7/7/19" "7/9/19" "9/11/19" "9/12/19"
## [76] "9/13/19" "9/15/19" "7/8/20" "9/12/20" "9/14/20"
## [81] "7/10/20" "9/16/20" "9/13/20" "9/30/20" "7/12/20"
## [86] "7/9/20" "4/16/21" "4/18/21" "4/19/21" "9/13/21"
## [91] "9/14/21" "9/15/21" "9/17/21" "4/25/22" "4/26/22"
## [96] "4/27/22" "9/14/22" "9/16/22" "9/13/22" "4/29/22"
## [101] "9/12/22" "4/17/21" "07/12/2021" "07/13/2021" "07/14/2021"
## [106] "07/16/2021" "4/21/21" "06/27/2022" "06/28/2022" "06/29/2022"
## [111] "6/29/22" "7/1/22"
#remove unnecessary dates
Zoop_sub<-subset(Zoop_sub,Date!="10/30/14")
Zoop_sub<-subset(Zoop_sub,Date!="10/31/14")
Zoop_sub<-subset(Zoop_sub,Date!="10/29/14")
Zoop_sub<-subset(Zoop_sub,Date!="9/14/22")
Zoop_sub<-subset(Zoop_sub,Date!="9/16/22")
Zoop_sub<-subset(Zoop_sub,Date!="9/13/22")
Zoop_sub<-subset(Zoop_sub,Date!="9/12/22")
Zoop_sub<-subset(Zoop_sub,Date!="11/16/15")
Zoop_sub<-subset(Zoop_sub,Date!="3/17/16")
Zoop_sub<-subset(Zoop_sub,Date!="3/18/16")
Zoop_sub<-subset(Zoop_sub,Date!="11/18/15")
#recode p4
Zoop_sub$Station <- recode(Zoop_sub$Station,
"p4" = "P4")
#recode month names to match
unique(Zoop_sub$Month)
## [1] "APR" "SEP" "JUL" "OCT" "Jul"
Zoop_sub$Month <- recode(Zoop_sub$Month,
"Jul" = "JUL",
"Sep"="SEP",
"OCT"="SEP")
#remove ethanol samples
Zoop_sub<-Zoop_sub[!grepl('_EtOH', Zoop_sub$Code),]
#remove oblique samples
Zoop_sub<-subset(Zoop_sub,Tow.Type!="Oblique")
add up multiple lines per station, categorize by crustaceans and gelatinous
Zoop_sub <- Zoop_sub %>%
group_by(Code,Station,Basin,Year,Month,Crustaceans.vs.Gelatinous) %>%
summarise(
zoop_density = sum(Density....m3.))
change from long to wide format
colnames(Zoop_sub)
## [1] "Code" "Station"
## [3] "Basin" "Year"
## [5] "Month" "Crustaceans.vs.Gelatinous"
## [7] "zoop_density"
Zoop_sub_wide<-dcast(Zoop_sub, Code+Station+Basin+Year+Month~ Crustaceans.vs.Gelatinous,value.var = "zoop_density")
#make sure factors are formatted right
Zoop_sub_wide$Year=as.factor(Zoop_sub_wide$Year)
Zoop_sub_wide$Station=as.factor(Zoop_sub_wide$Station)
Zoop_sub_wide$Month=as.factor(Zoop_sub_wide$Month)
Zoop_sub_wide$Basin=as.factor(Zoop_sub_wide$Basin)
#make unique column
Zoop_sub_wide$code<-paste(Zoop_sub_wide$Station, Zoop_sub_wide$Year,Zoop_sub_wide$Month,sep="-")
unique(Zoop_sub$Code)
## [1] "040515P280945" "040515P41740" "040516P281100" "040516P41823"
## [5] "040615P221412" "040615P81946" "040616P221010" "040616P81650"
## [9] "040617P281133" "040617P81737" "040715P121500" "040715P4021825"
## [13] "040716P4021712" "040716p121514" "040717P121240" "040717P4021420"
## [17] "040718P281239" "040718P4021710" "040718P42020" "040816P381307"
## [21] "040817P221405" "040817P41905" "040818P221146" "040918P121454"
## [25] "040918P80729" "041015P381310" "041017P381113" "041118P381055"
## [29] "041621P281140" "041721P41811" "041821P221015" "041821P81709"
## [33] "041921P121531" "041921P4021744" "042121P381216" "042219P280849"
## [37] "042219P41455" "042319P220840" "042319P81509" "042419P121215"
## [41] "042419P4021410" "042522P280857" "042522P41615" "042619P381005"
## [45] "042622P220940" "042622P81438" "042722P121435" "042722P4021622"
## [49] "042922P381106" "062722P280909" "062722P41604" "062822P221046"
## [53] "062822P81630" "062922P121455" "062922P4021638" "070122P381115"
## [57] "070314P281935" "070415p281040" "070519P281210" "070519P41858"
## [61] "070619P220850" "070619P81535" "070715p41822" "070716p280801"
## [65] "070716p41601" "070719P121300" "070719P4021445" "070815p221219"
## [69] "070815p81920" "070820P280852" "070820P41611" "070915p121630"
## [73] "070915p4021904" "070919P381145" "070920P220930" "070920P81620"
## [77] "071020P121345" "071020P4021613" "071115p381308" "071117P281010"
## [81] "071117P41455" "071217P220909" "071217P81200" "071220P381150"
## [85] "071221P281002" "071221P41701" "071317P121420" "071317P4021605"
## [89] "071321P221002" "071321P81614" "071414P281105" "071414P41919"
## [93] "071421P121503" "071421P4021659" "071514P221430" "071514P82034"
## [97] "071517P381015" "071614P121645" "071614P4021943" "071621P381018"
## [101] "071814P381317" "072216p220917" "072216p81614" "072316p121223"
## [105] "072316p4021420" "072318P281050" "072318P41826" "072418P81536"
## [109] "072418p221029" "072516p381008" "072518P121346" "072518P4021551"
## [113] "072718P381006" "091117P280840" "091117P41555" "091118P281144"
## [117] "091118P41817" "091119P280949" "091119P41620" "091217P221120"
## [121] "091217P81717" "091218P220849" "091218P81536" "091219P220754"
## [125] "091219P81320" "091220P280944" "091220P41656" "091317P121312"
## [129] "091317P4021504" "091318P121400" "091318P4021626" "091319P121350"
## [133] "091319P4021552" "091320P220906" "091320P81415" "091321P280908"
## [137] "091321P41630" "091420P121457" "091420P4021658" "091421P220955"
## [141] "091421P81514" "091517P381125" "091518P381130" "091519P381009"
## [145] "091521P121433" "091521P4021640" "091620P380904" "091721P381230"
## [149] "092116p281030" "092116p41800" "092216P221118" "092216P81738"
## [153] "092315p281040" "092315p41739" "092316P121339" "092316p4021550"
## [157] "092415p220956" "092415p81520" "092515p121502" "092515p4021731"
## [161] "092516p381020" "092715p381330" "092914p281147" "092914p41808"
## [165] "093014P221153" "093014P81950" "093020P221209" "093020P281838"
## [169] "100114P121502" "100114p4021755" "100314p381403"
Zoop_env <- merge(env_joined,Zoop_sub_wide, by.x = "code",
by.y = "code", all.x = TRUE, all.y = TRUE)
#remove excess columns
Zoop_env <- Zoop_env[ -c(11,13:14) ]
#remove rows with NA
Zoop_env<-Zoop_env[!is.na(Zoop_env$Year.x),]
remove non-data columns, convert to proportions
RE2<- Zoop_env[,12:ncol(Zoop_env)]
#replace N/A with 0
RE2[is.na(RE2)] <- 0
#convert to proportions
RE2<-RE2/rowSums(RE2)
add columns back in
RE2$code = Zoop_env$code
RE2$Station.x = Zoop_env$Station.x
RE2$Year.x = Zoop_env$Year.x
RE2$Month.x = Zoop_env$Month.x
RE2$pH = Zoop_env$pH
RE2$temp = Zoop_env$temp
RE2$Oxygen = Zoop_env$Oxygen
Zoop_env_prop<-RE2
Zoop_env_long<-melt(Zoop_env_prop, na.rm = FALSE, c("code", "Station.x","Year.x","Month.x","temp","pH","Oxygen"))
Zoop_env_long$value[Zoop_env_long$value == 0] <- NA
Zoop_env_long<-Zoop_env_long[complete.cases(Zoop_env_long), ]
Gelatinous<-subset(Zoop_env_long,variable=="Gelatinous")
Gelatinous$Year.x<-as.factor(Gelatinous$Year.x)
#change factor order
Gelatinous$Station.x <- factor(Gelatinous$Station.x, levels = c("P22", "P4", "P8","P28","P12","P402","P38"))
Gelatinous_plot<-ggplot(Gelatinous, aes(x=temp, y=value))+geom_point(aes(colour = Station.x))+xlab("Temperature")+ylab("Gelatinous Zooplankton Relative Abundance")+theme_bw()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+ scale_colour_manual(values=c("#F8766D","#C49A00","#53B400","#00C094","#00B6EB","#A69AFF","#FB61D7"))
Gelatinous_plot
matrix of plots - surface values
pairs(Gelatinous[,c(5:7,9)], pch = 19)
Crustacean<-subset(Zoop_env_long,variable=="Crustaceans")
Crustacean$Year.x<-as.factor(Crustacean$Year.x)
#change factor order
Crustacean$Station.x <- factor(Crustacean$Station.x, levels = c("P22", "P4", "P8","P28","P12","P402","P38"))
Crustacean_plot<-ggplot(Crustacean, aes(x=temp, y=value))+geom_point(aes(colour = Station.x))+xlab("Temperature")+ylab("Crustacean Zooplankton Relative Abundance")+theme_bw()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+ scale_colour_manual(values=c("#F8766D","#C49A00","#53B400","#00C094","#00B6EB","#A69AFF","#FB61D7"))
Crustacean_plot
matrix of plots - surface values
pairs(Crustacean[,c(5:7,9)], pch = 19)
#subset columns
Zoop_env<-select(Zoop_env, Gelatinous,Crustaceans, code, Station.x, Year.x, Month.x, temp, pH, Oxygen)
Zoop_env_long<-melt(Zoop_env, na.rm = FALSE, c("code", "Station.x","Year.x","Month.x","temp","pH","Oxygen"))
#remove NA rows
Zoop_env_long$value[Zoop_env_long$value == 0] <- NA
Zoop_env_long<-Zoop_env_long[complete.cases(Zoop_env_long), ]
Gelatinous<-subset(Zoop_env_long,variable=="Gelatinous")
Gelatinous$Year.x<-as.factor(Gelatinous$Year.x)
#change factor order
Gelatinous$Station.x <- factor(Gelatinous$Station.x, levels = c("P22", "P4", "P8","P28","P12","P402","P38"))
Gelatinous_plot<-ggplot(Gelatinous, aes(x=temp, y=value))+geom_point(aes(colour = Station.x))+xlab("Temperature")+ylab("Gelatinous Zooplankton Abundance")+theme_bw()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+ scale_colour_manual(values=c("#F8766D","#C49A00","#53B400","#00C094","#00B6EB","#A69AFF","#FB61D7"))
Gelatinous_plot
matrix of plots - surface values
pairs(Gelatinous[,c(5:7,9)], pch = 19)
Crustacean<-subset(Zoop_env_long,variable=="Crustaceans")
Crustacean$Year.x<-as.factor(Crustacean$Year.x)
#change factor order
Crustacean$Station.x <- factor(Crustacean$Station.x, levels = c("P22", "P4", "P8","P28","P12","P402","P38"))
Crustacean_plot<-ggplot(Crustacean, aes(x=temp, y=value))+geom_point(aes(colour = Station.x))+xlab("Temperature")+ylab("Crustacean Zooplankton Abundance")+theme_bw()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))+ scale_colour_manual(values=c("#F8766D","#C49A00","#53B400","#00C094","#00B6EB","#A69AFF","#FB61D7"))
Crustacean_plot
matrix of plots - surface values
pairs(Crustacean[,c(5:7,9)], pch = 19)