This code fits generalized linear models (GLMs) to field counts of Ditrichocorycaeus anglicus and medium calnaoids to determine the predictors affecting their probability of abundance. Coefficients were calculated and reported according to recommendations from this paper: https://psycnet.apa.org/record/2021-40400-001
The report is located here: https://rpubs.com/HailaSchultz/field_GLM
load packages
library(MASS)
library(MuMIn)
library(car)
library(ggeffects)
library(dplyr)
library(forcats)
library(stringr)
library(tidyverse)
library(ggpubr)
Read database into R. If current database changes, just change the path
Database <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Final_Aurelia_Database_Jan11_2023.csv")
Subset to field stations and Quartermaster and Sinclair Inlet. Budd and Eld Inlet are excluded because they didn’t have jellyfish populations. subset data
#select only field data
Field_data<-subset(Database,Trial.Type=="Field")
#subset to QM and SC
Field_SCQM<-subset(Field_data, Site %in% c("QM","SC"))
recode copepod names
Field_SCQM$Species_lifestage <- paste(Field_SCQM$Genus.species, Field_SCQM$Life.History.Stage, sep="_")
Field_SCQM <- Field_SCQM %>%
mutate(Species_lifestage_combined = fct_recode(Species_lifestage,
"CALANOIDA_Medium" = "ACARTIA_Copepodite",
"CALANOIDA_Medium" = "ACARTIA_Female, Adult",
"CALANUS PACIFICUS" = "CALANUS PACIFICUS_C5-adult",
"CALANUS PACIFICUS" = "CALANUS PACIFICUS_C5-Adult",
"CALANUS PACIFICUS" = "CALANUS PACIFICUS_Copepodite",
"CALANUS PACIFICUS" = "CALANUS PACIFICUS_Female, Adult",
"CALANUS PACIFICUS" = "CALANUS PACIFICUS_Male, Adult",
"DITRICHOCORYCAEUS ANGLICUS" = "DITRICHOCORYCAEUS ANGLICUS_Large",
"DITRICHOCORYCAEUS ANGLICUS" = "DITRICHOCORYCAEUS ANGLICUS_Small"))
unique(Field_SCQM$Species_lifestage_combined)
## [1] CALANOIDA_Medium AETIDEUS_Copepodite
## [3] AETIDEUS_Female, Adult AURELIA LABIATA_Ephyra
## [5] BARNACLES_Cyprid larva BARNACLES_Nauplius
## [7] BIVALVIA_Veliger BRACHYURA_Unknown
## [9] BRYOZOA_Cyphonaut CALANOIDA_Copepodite
## [11] CALANOIDA_Large CALANOIDA_Small
## [13] CALANUS PACIFICUS CENTROPAGES_Female, Adult
## [15] Chaet/Euphaus Egg_Egg CHAETOGNATHA_Unknown
## [17] CLADOCERA_Unknown CLYTIA GREGARIA_Medusa
## [19] COPEPODA_Egg COPEPODA_Nauplius
## [21] CRABS_Megalopa CRABS_Zoea
## [23] Diatom-Centric_Unknown DITRICHOCORYCAEUS ANGLICUS
## [25] ECHINODERMATA_Pluteus larva EUPHAUSIIDAE_Calyptopis
## [27] EUPHAUSIIDAE_Furcilia EUPHAUSIIDAE_Nauplius
## [29] EUTONINA INDICANS_Medusa FISH_Egg
## [31] FISH_Larva GAETANUS_Adult
## [33] GAMMARIDEA_Unknown GASTROPODA_Veliger
## [35] HARPACTICOIDA_Unknown HYPERIIDEA_Unknown
## [37] Insecta_Unknown ISOPODA_Unknown
## [39] JELLYFISHES_Ephyra JELLYFISHES_Medusa
## [41] JELLYFISHES_Planula larvae LARVACEA_Unknown
## [43] LITTORINA_Egg METRIDIA_Female, Adult
## [45] MOLLUSCA_Trochophore larva Nemertea_Pilidium
## [47] NEOTRYPAEA CALIFORNIENSIS_Unknown NOCTILUCA_Unknown
## [49] OITHONA_Unknown POLYCHAETA_Unknown
## [51] SHRIMP_Unknown SIPHONOPHORA_Bract
## [53] SIPHONOPHORA_Gonophore SIPHONOPHORA_Unknown
## [55] UNKNOWN_Larva
## 55 Levels: CALANOIDA_Medium AETIDEUS_Copepodite ... UNKNOWN_Larva
Deal with duplicate station names
# combine station and date for unique stations
Field_SCQM$Station_unique <- paste(Field_SCQM$Station, Field_SCQM$Sample.Date)
unique(Field_SCQM$Station_unique)
## [1] "QM1 08/28/2019" "SC4 09/25/2019" "SC2 09/25/2019" "SC3 09/25/2019"
## [5] "SC1 09/25/2019" "QM10 08/24/2021" "QM5b 07/29/2021" "QM6b 07/29/2021"
## [9] "QM2c 08/22/2021" "SC11 08/26/2021" "SC5c 08/25/2021" "SC6c 08/25/2021"
## [13] "SC1a 07/27/2021" "SC4a 07/27/2021" "SC16 08/26/2021" "SC10 08/26/2021"
## [17] "SC7 08/25/2021" "QM1b 07/29/2021" "SC2c 08/24/2021" "QM1c 08/22/2021"
## [21] "SC14 08/26/2021" "SC3a 07/27/2021" "QM9 08/23/2021" "SC5d 08/25/2021"
## [25] "QM2b 07/28/2021" "SC3c 08/24/2021" "QM6c 08/22/2021" "QM5c 08/22/2021"
## [29] "SC17 08/27/2021" "QM3c 08/22/2021" "SC5 09/25/2019" "QM6 08/28/2019"
## [33] "QM5 08/28/2019" "QM2 08/28/2019" "SC6 09/25/2019" "QM3 08/28/2019"
## [37] "QM4 08/28/2019" "QM3a 08/29/2020" "QM1a 08/28/2020" "SC2a 07/27/2021"
## [41] "QM5a 08/29/2020" "QM4a 08/29/2020" "SC8 08/25/2021" "SC12 08/26/2021"
## [45] "SC4c 08/25/2021" "SC6a 07/27/2021" "QM11 08/24/2021" "QM4b 07/29/2021"
## [49] "QM12 08/24/2021" "SC5a 07/27/2021" "QM3b 07/29/2021" "QM2 07/24/2019"
## [53] "QM6 07/24/2019" "QM1 07/24/2019" "QM2a 08/28/2020" "QM8a 08/29/2020"
## [57] "QM3 07/24/2019" "QM5 07/24/2019" "QM4 07/24/2019" "QM7a 08/29/2020"
## [61] "QM4c 08/22/2021"
Field_SCQM$Station_unique <-recode(Field_SCQM$Station_unique,
'QM1 07/24/2019'='QM1s 07/24/2019',
'QM2 07/24/2019'='QM2s 07/24/2019',
'QM3 07/24/2019'='QM3s 07/24/2019',
'QM4 07/24/2019'='QM4s 07/24/2019',
'QM5 07/24/2019'='QM5s 07/24/2019',
'QM6 07/24/2019'='QM6s 07/24/2019',
)
#remove date
Field_SCQM$Station_unique<-str_sub(Field_SCQM$Station_unique, end=-12)
unique(Field_SCQM$Station_unique)
## [1] "QM1" "SC4" "SC2" "SC3" "SC1" "QM10" "QM5b" "QM6b" "QM2c" "SC11"
## [11] "SC5c" "SC6c" "SC1a" "SC4a" "SC16" "SC10" "SC7" "QM1b" "SC2c" "QM1c"
## [21] "SC14" "SC3a" "QM9" "SC5d" "QM2b" "SC3c" "QM6c" "QM5c" "SC17" "QM3c"
## [31] "SC5" "QM6" "QM5" "QM2" "SC6" "QM3" "QM4" "QM3a" "QM1a" "SC2a"
## [41] "QM5a" "QM4a" "SC8" "SC12" "SC4c" "SC6a" "QM11" "QM4b" "QM12" "SC5a"
## [51] "QM3b" "QM2s" "QM6s" "QM1s" "QM2a" "QM8a" "QM3s" "QM5s" "QM4s" "QM7a"
## [61] "QM4c"
Deal with Jelly density
#convert sample year to a factor
Field_SCQM$Sample.Year<- as.factor(Field_SCQM$Sample.Year)
#remove commas from jelly density
Field_SCQM$Jelly.Density <- as.numeric(gsub(",","",Field_SCQM$Jelly.Density..g.m3.))
#add 1 to jelly density for analyses that prohibit zeroes
Field_SCQM$Jelly.Density <-Field_SCQM$Jelly.Density+1
colnames(Field_SCQM)
## [1] "BugSampleID" "Project"
## [3] "Sample.Code" "Sampling.Group"
## [5] "Station" "Site"
## [7] "Site.Name" "Basin"
## [9] "Sub.Basin" "Latitude"
## [11] "Longitude" "Sample.Date"
## [13] "Sample.Year" "Sample.Month"
## [15] "Sample.Time" "Tow.Type"
## [17] "Mesh.Size" "Station.Depth..m."
## [19] "Flow.meter..revs." "Broad.Group"
## [21] "Mid.Level.Group" "X1st.Word.Taxa"
## [23] "Genus.species" "Life.History.Stage"
## [25] "Total.Ct" "Density....m3."
## [27] "Vol.Filtered..m3." "Jelly.Mass..g."
## [29] "Number.of.Jellies" "Trial.Time"
## [31] "Trial.Type" "Jelly.Size"
## [33] "Jelly.Density....m3." "Jelly.Density..g.m3."
## [35] "Location" "Species_lifestage"
## [37] "Species_lifestage_combined" "Station_unique"
## [39] "Jelly.Density"
some of this code is similar to 13_filed_CTD-table
import CTD downcasts into one file
#set working directory to folder with CTD files
setwd("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/CTD/CTD_Downcasts-edited")
CTDCombined <-
list.files(pattern = "*.csv") %>%
map_df(~read_csv(.))
loop through stations: interpolate to equal intervals and take mean
results_list <- list()
unique_factors <-unique(CTDCombined$Station)
for (factor_value in unique_factors) {
subset_data <- CTDCombined[CTDCombined$Station == factor_value, ]
table <- as.data.frame(approx(x = subset_data$Depth, y = subset_data$salinity, method = "linear", n = 100))
summarize <- table %>%
summarise(
FactorValue = factor_value,
Smean = mean(y),
Smin = min(y),
Smax = max(y),
Ssd = sd(y)
)
results_list[[factor_value]] <- summarize
}
salinity <- bind_rows(results_list)
results_list <- list()
unique_factors <-unique(CTDCombined$Station)
for (factor_value in unique_factors) {
subset_data <- CTDCombined[CTDCombined$Station == factor_value, ]
table <- as.data.frame(approx(x = subset_data$Depth, y = subset_data$Temp, method = "linear", n = 100))
summarize <- table %>%
summarise(
FactorValue = factor_value,
Tmean = mean(y),
Tmin = min(y),
Tmax = max(y),
Tsd = sd(y)
)
results_list[[factor_value]] <- summarize
}
Temp <- bind_rows(results_list)
results_list <- list()
unique_factors <-unique(CTDCombined$Station)
for (factor_value in unique_factors) {
subset_data <- CTDCombined[CTDCombined$Station == factor_value, ]
table <- as.data.frame(approx(x = subset_data$Depth, y = subset_data$Oxygen, method = "linear", n = 100))
summarize <- table %>%
summarise(
FactorValue = factor_value,
Omean = mean(y),
Omin = min(y),
Omax = max(y),
Osd = sd(y)
)
results_list[[factor_value]] <- summarize
}
Oxygen <- bind_rows(results_list)
results_list <- list()
unique_factors <-unique(CTDCombined$Station)
for (factor_value in unique_factors) {
subset_data <- CTDCombined[CTDCombined$Station == factor_value, ]
table <- as.data.frame(approx(x = subset_data$Depth, y = subset_data$pH, method = "linear", n = 100))
summarize <- table %>%
summarise(
FactorValue = factor_value,
pHmean = mean(y),
pHmin = min(y),
pHmax = max(y),
pHsd = sd(y)
)
results_list[[factor_value]] <- summarize
}
pH <- bind_rows(results_list)
results_list <- list()
unique_factors <-unique(CTDCombined$Station)
for (factor_value in unique_factors) {
subset_data <- CTDCombined[CTDCombined$Station == factor_value, ]
table <- as.data.frame(approx(x = subset_data$Depth, y = subset_data$fluorescence, method = "linear", n = 100))
summarize <- table %>%
summarise(
FactorValue = factor_value,
Fmean = mean(y),
Fmin = min(y),
Fmax = max(y),
Fsd = sd(y)
)
results_list[[factor_value]] <- summarize
}
fluorescence <- bind_rows(results_list)
Env<-merge(salinity, Oxygen,by=c("FactorValue"))
Env1<-merge(Env,Temp,by=c("FactorValue"),all.x = TRUE)
Env2<-merge(Env1,pH,by=c("FactorValue"))
Env3<-merge(Env2, fluorescence,by=c("FactorValue"))
#rename station column
Env3 <- Env3 %>%
rename("Station" = "FactorValue")
recode to match database names
Env3$Station <-recode(Env3$Station, '2020_BUDD2s'='BUDD2s_2020', 'Budd1a'='BUDD1a', 'Budd2s'='BUDD2s','Budd3a'='BUDD3a','Budd4a'='BUDD4a')
Keep only means for each variable
colnames(Env3)
## [1] "Station" "Smean" "Smin" "Smax" "Ssd" "Omean" "Omin"
## [8] "Omax" "Osd" "Tmean" "Tmin" "Tmax" "Tsd" "pHmean"
## [15] "pHmin" "pHmax" "pHsd" "Fmean" "Fmin" "Fmax" "Fsd"
Env3 = subset(Env3, select = c(Station,Smean,Omean,Tmean,pHmean,Fmean))
subset
Field_DA<-subset(Field_SCQM, Species_lifestage_combined == "DITRICHOCORYCAEUS ANGLICUS")
#remove july station
Field_DA<-subset(Field_DA, Station_unique != "SC5a")
add up multiple lines per station
colnames(Field_DA)
## [1] "BugSampleID" "Project"
## [3] "Sample.Code" "Sampling.Group"
## [5] "Station" "Site"
## [7] "Site.Name" "Basin"
## [9] "Sub.Basin" "Latitude"
## [11] "Longitude" "Sample.Date"
## [13] "Sample.Year" "Sample.Month"
## [15] "Sample.Time" "Tow.Type"
## [17] "Mesh.Size" "Station.Depth..m."
## [19] "Flow.meter..revs." "Broad.Group"
## [21] "Mid.Level.Group" "X1st.Word.Taxa"
## [23] "Genus.species" "Life.History.Stage"
## [25] "Total.Ct" "Density....m3."
## [27] "Vol.Filtered..m3." "Jelly.Mass..g."
## [29] "Number.of.Jellies" "Trial.Time"
## [31] "Trial.Type" "Jelly.Size"
## [33] "Jelly.Density....m3." "Jelly.Density..g.m3."
## [35] "Location" "Species_lifestage"
## [37] "Species_lifestage_combined" "Station_unique"
## [39] "Jelly.Density"
Field_DA <- Field_DA %>%
group_by(Sample.Code,Station_unique,Sample.Year,Sample.Month,Site,Location) %>%
summarise(
copepod_density = sum(Density....m3.),
jelly_biomass = mean(Jelly.Density))
Field_DA$Station<-Field_DA$Station_unique
Field_DA <- merge(Field_DA,Env3,by.x = "Station")
Histograms of Jellyfish Density and Copepod Density
hist(Field_DA$copepod_density)
hist(Field_DA$jelly_biomass)
both have a log distribution
##investigate year to see if it is important
Field_DA$Sample.Year<-as.factor(Field_DA$Sample.Year)
DAYear<-glm(copepod_density~Sample.Year,data=Field_DA,family = Gamma(link="log"))
summary(DAYear)
##
## Call:
## glm(formula = copepod_density ~ Sample.Year, family = Gamma(link = "log"),
## data = Field_DA)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.4244 0.4307 12.595 4.72e-13 ***
## Sample.Year2021 0.4153 0.4919 0.844 0.406
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 1.298407)
##
## Null deviance: 46.485 on 29 degrees of freedom
## Residual deviance: 45.628 on 28 degrees of freedom
## AIC: 409.86
##
## Number of Fisher Scoring iterations: 7
plot(copepod_density~Sample.Year,data=Field_DA)
no effect of year
Plot Jellyfish Density against zooplankton density
ggplot(Field_DA, aes(x=jelly_biomass, y=copepod_density)) + geom_point()
#round to integers
Field_DA$copepod_density_round<-round(as.numeric(Field_DA$copepod_density), 0)
#run model
Field_DA_model1 <- glm.nb(copepod_density_round~jelly_biomass, data = Field_DA)
summary(Field_DA_model1)
##
## Call:
## glm.nb(formula = copepod_density_round ~ jelly_biomass, data = Field_DA,
## init.theta = 1.111750266, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.9853879 0.2097315 28.538 < 2e-16 ***
## jelly_biomass -0.0021745 0.0005351 -4.064 4.82e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(1.1118) family taken to be 1)
##
## Null deviance: 50.997 on 29 degrees of freedom
## Residual deviance: 34.159 on 28 degrees of freedom
## AIC: 396.18
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 1.112
## Std. Err.: 0.259
## Warning while fitting theta: alternation limit reached
##
## 2 x log-likelihood: -390.179
plot(Field_DA_model1)
Field_DA_model2<-glm(copepod_density~jelly_biomass, data= Field_DA, family="gaussian")
summary(Field_DA_model2)
##
## Call:
## glm(formula = copepod_density ~ jelly_biomass, family = "gaussian",
## data = Field_DA)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 432.1144 66.8466 6.464 5.3e-07 ***
## jelly_biomass -0.5203 0.1687 -3.085 0.00455 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 91871.49)
##
## Null deviance: 3446721 on 29 degrees of freedom
## Residual deviance: 2572402 on 28 degrees of freedom
## AIC: 431.91
##
## Number of Fisher Scoring iterations: 2
plot(Field_DA_model2)
Field_DA_model3<-glm(copepod_density~jelly_biomass, data= Field_DA, family="Gamma")
summary(Field_DA_model3)
##
## Call:
## glm(formula = copepod_density ~ jelly_biomass, family = "Gamma",
## data = Field_DA)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.700e-03 3.485e-04 4.877 3.88e-05 ***
## jelly_biomass 3.223e-05 7.279e-06 4.427 0.000132 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.5023986)
##
## Null deviance: 46.485 on 29 degrees of freedom
## Residual deviance: 21.186 on 28 degrees of freedom
## AIC: 383.12
##
## Number of Fisher Scoring iterations: 6
plot(Field_DA_model3)
Looks like NB QQ plot looks best
check for multicollinearity
DAData1 = subset(Field_DA, select = c(Fmean,pHmean,Smean,Tmean,Omean,jelly_biomass,copepod_density) )
pairs(DAData1 , panel = panel.smooth)
salinity and temperature have strong relationship
full model
DA1<-glm.nb(copepod_density_round~ jelly_biomass+Tmean + Fmean + Sample.Year, data=Field_DA, na.action="na.fail")
summary(DA1)
##
## Call:
## glm.nb(formula = copepod_density_round ~ jelly_biomass + Tmean +
## Fmean + Sample.Year, data = Field_DA, na.action = "na.fail",
## init.theta = 1.511147141, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 13.9099639 4.9891091 2.788 0.005302 **
## jelly_biomass -0.0016896 0.0004873 -3.467 0.000526 ***
## Tmean -0.5541199 0.3625481 -1.528 0.126412
## Fmean -0.0583690 0.0589615 -0.990 0.322198
## Sample.Year2021 0.5228110 0.4020302 1.300 0.193455
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(1.5111) family taken to be 1)
##
## Null deviance: 69.053 on 29 degrees of freedom
## Residual deviance: 33.252 on 25 degrees of freedom
## AIC: 391.14
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 1.511
## Std. Err.: 0.364
## Warning while fitting theta: alternation limit reached
##
## 2 x log-likelihood: -379.143
par(mfrow = c(2, 2))
plot(DA1)
AICc(DA1)
## [1] 394.7956
allperms <- dredge(DA1)
mod.table1 <- data.frame(allperms)
mod.table1
## X.Intercept. Fmean jelly_biomass Sample.Year Tmean df logLik
## 4 6.756106 -0.13051334 -0.001798873 <NA> NA 4 -191.2252
## 15 16.948053 NA -0.001714860 + -0.7972991 5 -190.1204
## 11 17.773118 NA -0.001582821 <NA> -0.8170713 4 -191.6196
## 12 13.229083 -0.08210967 -0.001592250 <NA> -0.4688598 5 -190.4033
## 8 6.391163 -0.12065700 -0.001893363 + NA 5 -190.7376
## 16 13.909805 -0.05836626 -0.001689436 + -0.5541143 6 -189.5717
## 3 5.985388 NA -0.002174485 <NA> NA 3 -195.0896
## 7 5.493106 NA -0.002282685 + NA 4 -194.0407
## 9 21.949045 NA NA <NA> -1.1160225 3 -196.2059
## 10 17.466513 -0.07398910 NA <NA> -0.7761812 4 -195.2736
## 13 21.225506 NA NA + -1.0926943 4 -195.6296
## 2 6.719369 -0.15608235 NA <NA> NA 3 -197.2278
## 14 17.763562 -0.06188874 NA + -0.8183373 5 -195.0544
## 6 6.598690 -0.15327516 NA + NA 4 -197.1869
## 1 5.757218 NA NA <NA> NA 2 -202.0601
## 5 5.424320 NA NA + NA 3 -201.7262
## AICc delta weight
## 4 392.0503 0.0000000 2.656570e-01
## 15 392.7408 0.6904930 1.880973e-01
## 11 392.8392 0.7888693 1.790690e-01
## 12 393.3066 1.2563167 1.417475e-01
## 8 393.9752 1.9249205 1.014682e-01
## 16 394.7956 2.7452510 6.732828e-02
## 3 397.1023 5.0519319 2.124752e-02
## 7 397.6815 5.6311479 1.590498e-02
## 9 399.3350 7.2846309 6.957989e-03
## 10 400.1472 8.0968490 4.635673e-03
## 13 400.8591 8.8087879 3.247261e-03
## 2 401.3787 9.3283307 2.504378e-03
## 14 402.6087 10.5584068 1.353918e-03
## 6 403.9738 11.9234666 6.841847e-04
## 1 408.5646 16.5142485 6.891246e-05
## 5 410.3755 18.3251351 2.786562e-05
best model includes only fluorescence and jelly biomass
DA_final<-glm.nb(copepod_density_round~ Fmean+jelly_biomass, data=Field_DA, na.action="na.fail")
summary(DA_final)
##
## Call:
## glm.nb(formula = copepod_density_round ~ Fmean + jelly_biomass,
## data = Field_DA, na.action = "na.fail", init.theta = 1.375328852,
## link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.7561063 0.3632559 18.599 < 2e-16 ***
## Fmean -0.1305133 0.0453898 -2.875 0.004035 **
## jelly_biomass -0.0017989 0.0004934 -3.646 0.000267 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(1.3753) family taken to be 1)
##
## Null deviance: 62.928 on 29 degrees of freedom
## Residual deviance: 33.491 on 27 degrees of freedom
## AIC: 390.45
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 1.375
## Std. Err.: 0.328
##
## 2 x log-likelihood: -382.450
plot(DA_final)
AICc(DA_final)
## [1] 392.0503
#mcfadden's rsquared
with(summary(DA_final), 1 - deviance/null.deviance)
## [1] 0.4677958
extract rate of change estimates with confidence intervals
exp(coef(DA_final))
## (Intercept) Fmean jelly_biomass
## 859.2898337 0.8776448 0.9982027
exp(confint(DA_final, level=.95))
## 2.5 % 97.5 %
## (Intercept) 456.9979807 1735.7605222
## Fmean 0.8092034 0.9559715
## jelly_biomass 0.9974997 0.9990376
#get rate of change (1-exp(coefficient))
1-0.9982027
## [1] 0.0017973
Calculate rate of change another way (to verify)
a<-exp(-0.0017989)
a
## [1] 0.9982027
b<-a-1
c<-b*100
c
## [1] -0.1797283
ANOVA test for type II variance
ANOVADA<-car::Anova(DA_final, type = 2)
summary(ANOVADA)
## LR Chisq Df Pr(>Chisq)
## Min. : 8.612 Min. :1 Min. :0.0001621
## 1st Qu.:10.016 1st Qu.:1 1st Qu.:0.0009562
## Median :11.420 Median :1 Median :0.0017504
## Mean :11.420 Mean :1 Mean :0.0017504
## 3rd Qu.:12.823 3rd Qu.:1 3rd Qu.:0.0025446
## Max. :14.227 Max. :1 Max. :0.0033388
ANOVADA
## Analysis of Deviance Table (Type II tests)
##
## Response: copepod_density_round
## LR Chisq Df Pr(>Chisq)
## Fmean 8.6124 1 0.0033388 **
## jelly_biomass 14.2267 1 0.0001621 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Predict values for jelly density vs. copepod densty plot
predict1<-ggpredict(
DA_final,
terms=c("jelly_biomass"),
ci.lvl = 0.95,
type = "fe",
typical = "mean",
condition = NULL,
back.transform = TRUE,
ppd = FALSE,
vcov.fun = NULL,
vcov.type = NULL,
vcov.args = NULL,
interval = "confidence")
subset
Field_Cal<-subset(Field_SCQM, Species_lifestage_combined == "CALANOIDA_Medium")
#remove july station
Field_Cal<-subset(Field_Cal, Station_unique != "SC5a")
add up multiple lines per station
colnames(Field_Cal)
## [1] "BugSampleID" "Project"
## [3] "Sample.Code" "Sampling.Group"
## [5] "Station" "Site"
## [7] "Site.Name" "Basin"
## [9] "Sub.Basin" "Latitude"
## [11] "Longitude" "Sample.Date"
## [13] "Sample.Year" "Sample.Month"
## [15] "Sample.Time" "Tow.Type"
## [17] "Mesh.Size" "Station.Depth..m."
## [19] "Flow.meter..revs." "Broad.Group"
## [21] "Mid.Level.Group" "X1st.Word.Taxa"
## [23] "Genus.species" "Life.History.Stage"
## [25] "Total.Ct" "Density....m3."
## [27] "Vol.Filtered..m3." "Jelly.Mass..g."
## [29] "Number.of.Jellies" "Trial.Time"
## [31] "Trial.Type" "Jelly.Size"
## [33] "Jelly.Density....m3." "Jelly.Density..g.m3."
## [35] "Location" "Species_lifestage"
## [37] "Species_lifestage_combined" "Station_unique"
## [39] "Jelly.Density"
Field_Cal <- Field_Cal %>%
group_by(Sample.Code,Station_unique,Sample.Year,Sample.Month,Site,Location) %>%
summarise(
copepod_density = sum(Density....m3.),
jelly_biomass = mean(Jelly.Density))
Field_Cal$Station<-Field_Cal$Station_unique
Field_Cal <- merge(Field_Cal,Env3,by.x = "Station")
Histograms of Jellyfish Density and Copepod Density
hist(Field_Cal$copepod_density)
hist(Field_Cal$jelly_biomass)
only jelly biomass has abnormal distribution
##investigate year to see if it is important
Field_Cal$Sample.Year<-as.factor(Field_Cal$Sample.Year)
CalYear<-glm(copepod_density~Sample.Year,data=Field_Cal,family = Gamma(link="log"))
summary(CalYear)
##
## Call:
## glm(formula = copepod_density ~ Sample.Year, family = Gamma(link = "log"),
## data = Field_Cal)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.5821 0.2394 27.495 < 2e-16 ***
## Sample.Year2021 0.7610 0.2734 2.783 0.00953 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.401164)
##
## Null deviance: 13.782 on 29 degrees of freedom
## Residual deviance: 11.093 on 28 degrees of freedom
## AIC: 481.97
##
## Number of Fisher Scoring iterations: 5
plot(copepod_density~Sample.Year,data=Field_Cal)
Year is significant - potentially include in model
Plot Jellyfish Density against zooplankton density
ggplot(Field_Cal, aes(x=jelly_biomass, y=copepod_density)) + geom_point()
#round to integers
Field_Cal$copepod_density_round<-round(as.numeric(Field_Cal$copepod_density), 0)
#run model
Field_Cal_model1 <- glm.nb(copepod_density_round~jelly_biomass, data = Field_Cal)
summary(Field_Cal_model1)
##
## Call:
## glm.nb(formula = copepod_density_round ~ jelly_biomass, data = Field_Cal,
## init.theta = 2.643917945, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.3428565 0.1357612 54.087 <2e-16 ***
## jelly_biomass -0.0007212 0.0003428 -2.104 0.0354 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(2.6439) family taken to be 1)
##
## Null deviance: 36.292 on 29 degrees of freedom
## Residual deviance: 31.882 on 28 degrees of freedom
## AIC: 484.69
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 2.644
## Std. Err.: 0.647
##
## 2 x log-likelihood: -478.688
plot(Field_Cal_model1)
### Gamma distribution
Field_Cal_model3<-glm(copepod_density~jelly_biomass, data= Field_Cal, family="Gamma")
summary(Field_Cal_model3)
##
## Call:
## glm(formula = copepod_density ~ jelly_biomass, family = "Gamma",
## data = Field_Cal)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.242e-04 7.461e-05 8.366 4.22e-09 ***
## jelly_biomass 7.580e-07 3.205e-07 2.365 0.0252 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.2560637)
##
## Null deviance: 13.782 on 29 degrees of freedom
## Residual deviance: 11.908 on 28 degrees of freedom
## AIC: 484.23
##
## Number of Fisher Scoring iterations: 5
plot(Field_Cal_model3)
These are about the same, go with NB
check for multicollinearity
Field_Cal1 = subset(Field_Cal, select = c(Fmean,pHmean,Smean,Tmean,Omean,jelly_biomass,copepod_density) )
pairs(Field_Cal1 , panel = panel.smooth)
fluorescence and salinity are correlated, exclude salinity
full model
Cal1<-glm.nb(copepod_density_round~ jelly_biomass+Tmean + Fmean + Sample.Year, data=Field_Cal, na.action="na.fail")
summary(Cal1)
##
## Call:
## glm.nb(formula = copepod_density_round ~ jelly_biomass + Tmean +
## Fmean + Sample.Year, data = Field_Cal, na.action = "na.fail",
## init.theta = 3.63222859, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 10.4256523 3.2124752 3.245 0.001173 **
## jelly_biomass -0.0006533 0.0003096 -2.110 0.034837 *
## Tmean -0.2677631 0.2334407 -1.147 0.251370
## Fmean 0.0109781 0.0379493 0.289 0.772365
## Sample.Year2021 0.9155822 0.2584031 3.543 0.000395 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(3.6322) family taken to be 1)
##
## Null deviance: 49.790 on 29 degrees of freedom
## Residual deviance: 31.401 on 25 degrees of freedom
## AIC: 480.21
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 3.632
## Std. Err.: 0.903
##
## 2 x log-likelihood: -468.210
par(mfrow = c(2, 2))
plot(Cal1)
AICc(Cal1)
## [1] 483.8626
qq plot looks good
Make model comparison table
allperms <- dredge(Cal1)
mod.table2 <- data.frame(allperms)
mod.table2
## X.Intercept. Fmean jelly_biomass Sample.Year Tmean df logLik
## 7 6.689247 NA -0.0007878746 + NA 4 -234.8619
## 15 9.810444 NA -0.0006522665 + -0.21882447 5 -234.1497
## 8 6.810121 -0.016363531 -0.0007414513 + NA 5 -234.6938
## 13 11.436548 NA NA + -0.33812659 4 -236.2037
## 5 6.582223 NA NA + NA 3 -237.9470
## 16 10.425590 0.010979081 -0.0006533204 + -0.26775978 6 -234.1052
## 6 6.822730 -0.031375103 NA + NA 4 -237.3420
## 14 11.989432 0.009657259 NA + -0.38197316 5 -236.1703
## 3 7.342857 NA -0.0007212024 <NA> NA 3 -239.3438
## 1 7.210375 NA NA <NA> NA 2 -241.4159
## 4 7.513210 -0.025610856 -0.0006582708 <NA> NA 4 -239.0297
## 11 8.321136 NA -0.0006774798 <NA> -0.06723145 4 -239.2958
## 2 7.474948 -0.037281875 NA <NA> NA 3 -240.7373
## 9 10.265098 NA NA <NA> -0.20821840 3 -240.9165
## 12 6.803637 -0.030211501 -0.0006799857 <NA> 0.05086844 5 -239.0114
## 10 8.807967 -0.028047495 NA <NA> -0.09533199 4 -240.6711
## AICc delta weight
## 7 479.3238 0.000000 0.391001642
## 15 480.7993 1.475534 0.186969379
## 8 481.8875 2.563735 0.108510224
## 13 482.0074 2.683541 0.102200963
## 5 482.8171 3.493336 0.068172656
## 16 483.8626 4.538798 0.040419508
## 6 484.2841 4.960277 0.032739204
## 14 484.8406 5.516830 0.024786439
## 3 485.6107 6.286871 0.016865602
## 1 487.2763 7.952525 0.007333474
## 4 487.6594 8.335578 0.006055233
## 11 488.1915 8.867712 0.004640656
## 2 488.3977 9.073892 0.004186085
## 9 488.7560 9.432180 0.003499506
## 12 490.5229 11.199077 0.001446538
## 10 490.9423 11.618480 0.001172891
jelly biomass and sample year are the most important
Cal_final<-glm.nb(copepod_density_round~ jelly_biomass+Sample.Year, data=Field_Cal, na.action="na.fail")
summary(Cal_final)
##
## Call:
## glm.nb(formula = copepod_density_round ~ jelly_biomass + Sample.Year,
## data = Field_Cal, na.action = "na.fail", init.theta = 3.46768619,
## link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.6892460 0.2123501 31.501 < 2e-16 ***
## jelly_biomass -0.0007879 0.0002997 -2.629 0.008559 **
## Sample.Year2021 0.8070946 0.2324610 3.472 0.000517 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(3.4677) family taken to be 1)
##
## Null deviance: 47.545 on 29 degrees of freedom
## Residual deviance: 31.466 on 27 degrees of freedom
## AIC: 477.72
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 3.468
## Std. Err.: 0.860
##
## 2 x log-likelihood: -469.724
plot(Cal_final)
AICc(Cal_final)
## [1] 479.3238
#mcfadden's rsquared
with(summary(Cal_final), 1 - deviance/null.deviance)
## [1] 0.3381865
extract estimates for tables, betas, variance-covariance matrix
exp(coef(Cal_final))
## (Intercept) jelly_biomass Sample.Year2021
## 803.7160057 0.9992124 2.2413864
exp(confint(Cal_final, level=.95))
## 2.5 % 97.5 %
## (Intercept) 546.9442580 1242.6299326
## jelly_biomass 0.9987005 0.9997918
## Sample.Year2021 1.3903831 3.4843010
1-0.9992124
## [1] 0.0007876
ANOVA test for type II variance
ANOVADA<-car::Anova(Cal_final, type = 2)
summary(ANOVADA)
## LR Chisq Df Pr(>Chisq)
## Min. : 6.787 Min. :1 Min. :0.001331
## 1st Qu.: 7.665 1st Qu.:1 1st Qu.:0.003294
## Median : 8.543 Median :1 Median :0.005257
## Mean : 8.543 Mean :1 Mean :0.005257
## 3rd Qu.: 9.421 3rd Qu.:1 3rd Qu.:0.007221
## Max. :10.299 Max. :1 Max. :0.009184
ANOVADA
## Analysis of Deviance Table (Type II tests)
##
## Response: copepod_density_round
## LR Chisq Df Pr(>Chisq)
## jelly_biomass 6.7868 1 0.009184 **
## Sample.Year 10.2988 1 0.001331 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Predict values for jelly density vs. copepod densty plot
predict2<-ggpredict(
Cal_final,
terms=c("jelly_biomass","Sample.Year"),
ci.lvl = 0.95,
type = "fe",
typical = "mean",
condition = NULL,
back.transform = TRUE,
ppd = FALSE,
vcov.fun = NULL,
vcov.type = NULL,
vcov.args = NULL,
interval = "confidence")
DitrichocorycaeusPlot<-plot(predict1,add.data = TRUE,dot.size=1.5,dot.alpha=0.65)+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black"))+theme_classic() +
scale_x_continuous(breaks=seq(0,1400,350),expand = c(0, 0), limits = c(-10, 1400)) +
scale_y_continuous(breaks=seq(0,1200,400),expand = c(0, 0), limits = c(-10, 1200))+
geom_line(size=1.5) +
geom_ribbon( aes(ymin = conf.low, ymax = conf.high), alpha = .15)+ xlab(expression(italic('A. labiata')~ plain('Biomass ( g /' ~m^3~ ')')))+
ylab(expression(italic('D. anglicus') ~ plain('Density') ~ plain('( # /' ~ m^3 ~ ')')))+
theme(axis.text=element_text(size=15,colour="black"),
legend.position=c(0.87, 0.8),
legend.text=element_text(size=15,colour="black"),
legend.title=element_text(size=15,colour="black"),
axis.title=element_text(size=20,colour="black"),
axis.line = element_line(colour = "black"))+theme(plot.title = element_blank())+
theme(plot.margin=margin(0.7, 0.7, 0.7, 0.7, unit = "cm"))+
annotate("text",x=1300, y=1150, label= "a",size=10)
DitrichocorycaeusPlot
CalanoidPlot<-plot(predict2,add.data = TRUE,dot.size=1.5,dot.alpha=0.65)+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black"))+theme_classic() +
scale_x_continuous(breaks=seq(0,1400,350),expand = c(0,0), limits = c(-10, 1400)) +
scale_y_continuous(breaks=seq(0,3000,500),expand = c(0,0), limits = c(-10, 3000))+
geom_line(size=1.5) +
geom_ribbon( aes(ymin = conf.low, ymax = conf.high), alpha = .15)+ xlab(expression(italic('A. labiata')~ plain('Biomass ( g /' ~m^3~ ')')))+
ylab(expression(plain('Medium Calanoid Density') ~ plain('( # /' ~ m^3 ~ ')')))+
theme(axis.text=element_text(size=15,colour="black"),
legend.position=c(0.83, 0.7),
legend.text=element_text(size=15,colour="black"),
legend.title=element_text(size=15,colour="black"),
axis.title=element_text(size=20,colour="black"),
axis.line = element_line(colour = "black"))+theme(plot.title = element_blank())+
theme(plot.margin=margin(0.7, 0.7, 0.7, 0.7, unit = "cm"))+labs(col="Year",title=NULL)+
annotate("text",x=1300, y=2800, label= "b",size=10)
CalanoidPlot
Field_GLM<-ggarrange(DitrichocorycaeusPlot,CalanoidPlot,
ncol = 2, nrow = 1)
Field_GLM
save plot
setwd("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/output")
ggsave(filename = "Field_GLM.png", plot = Field_GLM, height = 164, width = 328, units="mm", device='png', dpi=600)
ggsave(filename = "Field_GLM.tif", plot = Field_GLM, height = 164, width = 328, units="mm", device='tiff', dpi=600)
combine tables
mod.table1$Taxon<-"Ditrichocorycaeus anglicus"
mod.table2$Taxon<-"Medium Calanoid"
Field_model_table<-rbind(mod.table1,mod.table2)
remove excess columns and rename columns
colnames(Field_model_table)
## [1] "X.Intercept." "Fmean" "jelly_biomass" "Sample.Year"
## [5] "Tmean" "df" "logLik" "AICc"
## [9] "delta" "weight" "Taxon"
Field_model_table <- Field_model_table[ -c(1,7,8) ]
colnames(Field_model_table)
## [1] "Fmean" "jelly_biomass" "Sample.Year" "Tmean"
## [5] "df" "delta" "weight" "Taxon"
#reorder columns
Field_model_table <- Field_model_table[, c(8,2,1,3,4,5,6,7)]
Field_model_table <- Field_model_table %>%
rename("Jellyfish Biomass" = "jelly_biomass",
"Chla" = "Fmean",
"Year" = "Sample.Year",
"Temperature" = "Tmean",
"Aikake Weight" = "weight")
save model list
write.csv(Field_model_table,"/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/output/Field_model_table.csv", row.names = FALSE)