This code fits generalized linear models (GLMs) to field counts of Ditrichocorycaeus anglicus and medium calnaoids to determine the predictors affecting their probability of abundance. Coefficients were calculated and reported according to recommendations from this paper: https://psycnet.apa.org/record/2021-40400-001

The report is located here: https://rpubs.com/HailaSchultz/field_GLM

load packages

library(MASS)
library(MuMIn)
library(car)
library(ggeffects)
library(dplyr)
library(forcats)
library(stringr)
library(tidyverse)
library(ggpubr)

Data Prep

subset database to field stations

Read database into R. If current database changes, just change the path

Database <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Final_Aurelia_Database_Jan11_2023.csv")

Subset to field stations and Quartermaster and Sinclair Inlet. Budd and Eld Inlet are excluded because they didn’t have jellyfish populations. subset data

#select only field data
Field_data<-subset(Database,Trial.Type=="Field")
#subset to QM and SC
Field_SCQM<-subset(Field_data, Site %in% c("QM","SC"))

Combine taxa

recode copepod names

Field_SCQM$Species_lifestage <- paste(Field_SCQM$Genus.species, Field_SCQM$Life.History.Stage, sep="_")
Field_SCQM <- Field_SCQM %>%
                   mutate(Species_lifestage_combined = fct_recode(Species_lifestage, 
                                     "CALANOIDA_Medium" = "ACARTIA_Copepodite", 
                                     "CALANOIDA_Medium" = "ACARTIA_Female, Adult",
                                     "CALANUS PACIFICUS" = "CALANUS PACIFICUS_C5-adult",
                                     "CALANUS PACIFICUS" = "CALANUS PACIFICUS_C5-Adult",
                                     "CALANUS PACIFICUS" = "CALANUS PACIFICUS_Copepodite",
                                     "CALANUS PACIFICUS" = "CALANUS PACIFICUS_Female, Adult",
                                     "CALANUS PACIFICUS" = "CALANUS PACIFICUS_Male, Adult",
                                     "DITRICHOCORYCAEUS ANGLICUS" = "DITRICHOCORYCAEUS ANGLICUS_Large",
                                     "DITRICHOCORYCAEUS ANGLICUS" = "DITRICHOCORYCAEUS ANGLICUS_Small"))
unique(Field_SCQM$Species_lifestage_combined)

##  [1] CALANOIDA_Medium                  AETIDEUS_Copepodite              
##  [3] AETIDEUS_Female, Adult            AURELIA LABIATA_Ephyra           
##  [5] BARNACLES_Cyprid larva            BARNACLES_Nauplius               
##  [7] BIVALVIA_Veliger                  BRACHYURA_Unknown                
##  [9] BRYOZOA_Cyphonaut                 CALANOIDA_Copepodite             
## [11] CALANOIDA_Large                   CALANOIDA_Small                  
## [13] CALANUS PACIFICUS                 CENTROPAGES_Female, Adult        
## [15] Chaet/Euphaus Egg_Egg             CHAETOGNATHA_Unknown             
## [17] CLADOCERA_Unknown                 CLYTIA GREGARIA_Medusa           
## [19] COPEPODA_Egg                      COPEPODA_Nauplius                
## [21] CRABS_Megalopa                    CRABS_Zoea                       
## [23] Diatom-Centric_Unknown            DITRICHOCORYCAEUS ANGLICUS       
## [25] ECHINODERMATA_Pluteus larva       EUPHAUSIIDAE_Calyptopis          
## [27] EUPHAUSIIDAE_Furcilia             EUPHAUSIIDAE_Nauplius            
## [29] EUTONINA INDICANS_Medusa          FISH_Egg                         
## [31] FISH_Larva                        GAETANUS_Adult                   
## [33] GAMMARIDEA_Unknown                GASTROPODA_Veliger               
## [35] HARPACTICOIDA_Unknown             HYPERIIDEA_Unknown               
## [37] Insecta_Unknown                   ISOPODA_Unknown                  
## [39] JELLYFISHES_Ephyra                JELLYFISHES_Medusa               
## [41] JELLYFISHES_Planula larvae        LARVACEA_Unknown                 
## [43] LITTORINA_Egg                     METRIDIA_Female, Adult           
## [45] MOLLUSCA_Trochophore larva        Nemertea_Pilidium                
## [47] NEOTRYPAEA CALIFORNIENSIS_Unknown NOCTILUCA_Unknown                
## [49] OITHONA_Unknown                   POLYCHAETA_Unknown               
## [51] SHRIMP_Unknown                    SIPHONOPHORA_Bract               
## [53] SIPHONOPHORA_Gonophore            SIPHONOPHORA_Unknown             
## [55] UNKNOWN_Larva                    
## 55 Levels: CALANOIDA_Medium AETIDEUS_Copepodite ... UNKNOWN_Larva

Deal with duplicate station names

# combine station and date for unique stations
Field_SCQM$Station_unique <- paste(Field_SCQM$Station, Field_SCQM$Sample.Date)
unique(Field_SCQM$Station_unique)

##  [1] "QM1 08/28/2019"  "SC4 09/25/2019"  "SC2 09/25/2019"  "SC3 09/25/2019" 
##  [5] "SC1 09/25/2019"  "QM10 08/24/2021" "QM5b 07/29/2021" "QM6b 07/29/2021"
##  [9] "QM2c 08/22/2021" "SC11 08/26/2021" "SC5c 08/25/2021" "SC6c 08/25/2021"
## [13] "SC1a 07/27/2021" "SC4a 07/27/2021" "SC16 08/26/2021" "SC10 08/26/2021"
## [17] "SC7 08/25/2021"  "QM1b 07/29/2021" "SC2c 08/24/2021" "QM1c 08/22/2021"
## [21] "SC14 08/26/2021" "SC3a 07/27/2021" "QM9 08/23/2021"  "SC5d 08/25/2021"
## [25] "QM2b 07/28/2021" "SC3c 08/24/2021" "QM6c 08/22/2021" "QM5c 08/22/2021"
## [29] "SC17 08/27/2021" "QM3c 08/22/2021" "SC5 09/25/2019"  "QM6 08/28/2019" 
## [33] "QM5 08/28/2019"  "QM2 08/28/2019"  "SC6 09/25/2019"  "QM3 08/28/2019" 
## [37] "QM4 08/28/2019"  "QM3a 08/29/2020" "QM1a 08/28/2020" "SC2a 07/27/2021"
## [41] "QM5a 08/29/2020" "QM4a 08/29/2020" "SC8 08/25/2021"  "SC12 08/26/2021"
## [45] "SC4c 08/25/2021" "SC6a 07/27/2021" "QM11 08/24/2021" "QM4b 07/29/2021"
## [49] "QM12 08/24/2021" "SC5a 07/27/2021" "QM3b 07/29/2021" "QM2 07/24/2019" 
## [53] "QM6 07/24/2019"  "QM1 07/24/2019"  "QM2a 08/28/2020" "QM8a 08/29/2020"
## [57] "QM3 07/24/2019"  "QM5 07/24/2019"  "QM4 07/24/2019"  "QM7a 08/29/2020"
## [61] "QM4c 08/22/2021"

Field_SCQM$Station_unique <-recode(Field_SCQM$Station_unique, 
                                 'QM1 07/24/2019'='QM1s 07/24/2019',
                                 'QM2 07/24/2019'='QM2s 07/24/2019',
                                 'QM3 07/24/2019'='QM3s 07/24/2019',
                                 'QM4 07/24/2019'='QM4s 07/24/2019',
                                 'QM5 07/24/2019'='QM5s 07/24/2019',
                                 'QM6 07/24/2019'='QM6s 07/24/2019',
                                 )
#remove date
Field_SCQM$Station_unique<-str_sub(Field_SCQM$Station_unique, end=-12)
unique(Field_SCQM$Station_unique)

##  [1] "QM1"  "SC4"  "SC2"  "SC3"  "SC1"  "QM10" "QM5b" "QM6b" "QM2c" "SC11"
## [11] "SC5c" "SC6c" "SC1a" "SC4a" "SC16" "SC10" "SC7"  "QM1b" "SC2c" "QM1c"
## [21] "SC14" "SC3a" "QM9"  "SC5d" "QM2b" "SC3c" "QM6c" "QM5c" "SC17" "QM3c"
## [31] "SC5"  "QM6"  "QM5"  "QM2"  "SC6"  "QM3"  "QM4"  "QM3a" "QM1a" "SC2a"
## [41] "QM5a" "QM4a" "SC8"  "SC12" "SC4c" "SC6a" "QM11" "QM4b" "QM12" "SC5a"
## [51] "QM3b" "QM2s" "QM6s" "QM1s" "QM2a" "QM8a" "QM3s" "QM5s" "QM4s" "QM7a"
## [61] "QM4c"

Deal with Jelly density

#convert sample year to a factor
Field_SCQM$Sample.Year<- as.factor(Field_SCQM$Sample.Year)
#remove commas from jelly density
Field_SCQM$Jelly.Density <- as.numeric(gsub(",","",Field_SCQM$Jelly.Density..g.m3.))
#add 1 to jelly  density for analyses that prohibit zeroes
Field_SCQM$Jelly.Density <-Field_SCQM$Jelly.Density+1
colnames(Field_SCQM)

##  [1] "BugSampleID"                "Project"                   
##  [3] "Sample.Code"                "Sampling.Group"            
##  [5] "Station"                    "Site"                      
##  [7] "Site.Name"                  "Basin"                     
##  [9] "Sub.Basin"                  "Latitude"                  
## [11] "Longitude"                  "Sample.Date"               
## [13] "Sample.Year"                "Sample.Month"              
## [15] "Sample.Time"                "Tow.Type"                  
## [17] "Mesh.Size"                  "Station.Depth..m."         
## [19] "Flow.meter..revs."          "Broad.Group"               
## [21] "Mid.Level.Group"            "X1st.Word.Taxa"            
## [23] "Genus.species"              "Life.History.Stage"        
## [25] "Total.Ct"                   "Density....m3."            
## [27] "Vol.Filtered..m3."          "Jelly.Mass..g."            
## [29] "Number.of.Jellies"          "Trial.Time"                
## [31] "Trial.Type"                 "Jelly.Size"                
## [33] "Jelly.Density....m3."       "Jelly.Density..g.m3."      
## [35] "Location"                   "Species_lifestage"         
## [37] "Species_lifestage_combined" "Station_unique"            
## [39] "Jelly.Density"

Environmental Data

some of this code is similar to 13_filed_CTD-table

import CTD downcasts into one file

#set working directory to folder with CTD files
setwd("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/CTD/CTD_Downcasts-edited")

CTDCombined <-
    list.files(pattern = "*.csv") %>% 
    map_df(~read_csv(.))

Calculate means per station

salinity

loop through stations: interpolate to equal intervals and take mean

results_list <- list()
unique_factors <-unique(CTDCombined$Station)
for (factor_value in unique_factors) {
  subset_data <- CTDCombined[CTDCombined$Station == factor_value, ]
   table <- as.data.frame(approx(x = subset_data$Depth, y = subset_data$salinity, method = "linear", n = 100))
    summarize <- table %>%
    summarise(
      FactorValue = factor_value,
      Smean = mean(y), 
      Smin = min(y),
      Smax = max(y),
      Ssd = sd(y)
    )
    results_list[[factor_value]] <- summarize
}

salinity <- bind_rows(results_list)

Temperature

results_list <- list()
unique_factors <-unique(CTDCombined$Station)
for (factor_value in unique_factors) {
  subset_data <- CTDCombined[CTDCombined$Station == factor_value, ]
   table <- as.data.frame(approx(x = subset_data$Depth, y = subset_data$Temp, method = "linear", n = 100))
    summarize <- table %>%
    summarise(
      FactorValue = factor_value,
      Tmean = mean(y), 
      Tmin = min(y),
      Tmax = max(y),
      Tsd = sd(y)
    )
    results_list[[factor_value]] <- summarize
}

Temp <- bind_rows(results_list)

Oxygen

results_list <- list()
unique_factors <-unique(CTDCombined$Station)
for (factor_value in unique_factors) {
  subset_data <- CTDCombined[CTDCombined$Station == factor_value, ]
   table <- as.data.frame(approx(x = subset_data$Depth, y = subset_data$Oxygen, method = "linear", n = 100))
    summarize <- table %>%
    summarise(
      FactorValue = factor_value,
      Omean = mean(y), 
      Omin = min(y),
      Omax = max(y),
      Osd = sd(y)
    )
    results_list[[factor_value]] <- summarize
}

Oxygen <- bind_rows(results_list)

pH

results_list <- list()
unique_factors <-unique(CTDCombined$Station)
for (factor_value in unique_factors) {
  subset_data <- CTDCombined[CTDCombined$Station == factor_value, ]
   table <- as.data.frame(approx(x = subset_data$Depth, y = subset_data$pH, method = "linear", n = 100))
    summarize <- table %>%
    summarise(
      FactorValue = factor_value,
      pHmean = mean(y), 
      pHmin = min(y),
      pHmax = max(y),
      pHsd = sd(y)
    )
    results_list[[factor_value]] <- summarize
}

pH <- bind_rows(results_list)

fluorescence

results_list <- list()
unique_factors <-unique(CTDCombined$Station)
for (factor_value in unique_factors) {
  subset_data <- CTDCombined[CTDCombined$Station == factor_value, ]
   table <- as.data.frame(approx(x = subset_data$Depth, y = subset_data$fluorescence, method = "linear", n = 100))
    summarize <- table %>%
    summarise(
      FactorValue = factor_value,
      Fmean = mean(y), 
      Fmin = min(y),
      Fmax = max(y),
      Fsd = sd(y)
    )
    results_list[[factor_value]] <- summarize
}

fluorescence <- bind_rows(results_list)

combine dataframes

Env<-merge(salinity, Oxygen,by=c("FactorValue"))
Env1<-merge(Env,Temp,by=c("FactorValue"),all.x = TRUE)
Env2<-merge(Env1,pH,by=c("FactorValue"))
Env3<-merge(Env2, fluorescence,by=c("FactorValue"))
#rename station column
Env3 <- Env3 %>% 
       rename("Station" = "FactorValue")

recode to match database names

Env3$Station <-recode(Env3$Station, '2020_BUDD2s'='BUDD2s_2020', 'Budd1a'='BUDD1a', 'Budd2s'='BUDD2s','Budd3a'='BUDD3a','Budd4a'='BUDD4a')

Keep only means for each variable

colnames(Env3)

##  [1] "Station" "Smean"   "Smin"    "Smax"    "Ssd"     "Omean"   "Omin"   
##  [8] "Omax"    "Osd"     "Tmean"   "Tmin"    "Tmax"    "Tsd"     "pHmean" 
## [15] "pHmin"   "pHmax"   "pHsd"    "Fmean"   "Fmin"    "Fmax"    "Fsd"

Env3 = subset(Env3, select = c(Station,Smean,Omean,Tmean,pHmean,Fmean))

Ditrichocorycaeus anglicus

subset

Field_DA<-subset(Field_SCQM, Species_lifestage_combined == "DITRICHOCORYCAEUS ANGLICUS")
#remove july station
Field_DA<-subset(Field_DA, Station_unique != "SC5a")

add up multiple lines per station

colnames(Field_DA)

##  [1] "BugSampleID"                "Project"                   
##  [3] "Sample.Code"                "Sampling.Group"            
##  [5] "Station"                    "Site"                      
##  [7] "Site.Name"                  "Basin"                     
##  [9] "Sub.Basin"                  "Latitude"                  
## [11] "Longitude"                  "Sample.Date"               
## [13] "Sample.Year"                "Sample.Month"              
## [15] "Sample.Time"                "Tow.Type"                  
## [17] "Mesh.Size"                  "Station.Depth..m."         
## [19] "Flow.meter..revs."          "Broad.Group"               
## [21] "Mid.Level.Group"            "X1st.Word.Taxa"            
## [23] "Genus.species"              "Life.History.Stage"        
## [25] "Total.Ct"                   "Density....m3."            
## [27] "Vol.Filtered..m3."          "Jelly.Mass..g."            
## [29] "Number.of.Jellies"          "Trial.Time"                
## [31] "Trial.Type"                 "Jelly.Size"                
## [33] "Jelly.Density....m3."       "Jelly.Density..g.m3."      
## [35] "Location"                   "Species_lifestage"         
## [37] "Species_lifestage_combined" "Station_unique"            
## [39] "Jelly.Density"

Field_DA <- Field_DA %>%
  group_by(Sample.Code,Station_unique,Sample.Year,Sample.Month,Site,Location) %>%
  summarise(
    copepod_density = sum(Density....m3.),
    jelly_biomass = mean(Jelly.Density))

Add environmental table to copepod table

Field_DA$Station<-Field_DA$Station_unique
Field_DA <- merge(Field_DA,Env3,by.x = "Station")

Examine Data

Histograms of Jellyfish Density and Copepod Density

hist(Field_DA$copepod_density)

hist(Field_DA$jelly_biomass)

both have a log distribution

##investigate year to see if it is important

Field_DA$Sample.Year<-as.factor(Field_DA$Sample.Year)
DAYear<-glm(copepod_density~Sample.Year,data=Field_DA,family = Gamma(link="log"))
summary(DAYear)

## 
## Call:
## glm(formula = copepod_density ~ Sample.Year, family = Gamma(link = "log"), 
##     data = Field_DA)
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       5.4244     0.4307  12.595 4.72e-13 ***
## Sample.Year2021   0.4153     0.4919   0.844    0.406    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 1.298407)
## 
##     Null deviance: 46.485  on 29  degrees of freedom
## Residual deviance: 45.628  on 28  degrees of freedom
## AIC: 409.86
## 
## Number of Fisher Scoring iterations: 7

plot(copepod_density~Sample.Year,data=Field_DA)

no effect of year

Plot Jellyfish Density against zooplankton density

ggplot(Field_DA, aes(x=jelly_biomass, y=copepod_density)) + geom_point()

Family Options

Negative Binomial Distribution

#round to integers
Field_DA$copepod_density_round<-round(as.numeric(Field_DA$copepod_density), 0)
#run model
Field_DA_model1 <- glm.nb(copepod_density_round~jelly_biomass, data = Field_DA)
summary(Field_DA_model1)

## 
## Call:
## glm.nb(formula = copepod_density_round ~ jelly_biomass, data = Field_DA, 
##     init.theta = 1.111750266, link = log)
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    5.9853879  0.2097315  28.538  < 2e-16 ***
## jelly_biomass -0.0021745  0.0005351  -4.064 4.82e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(1.1118) family taken to be 1)
## 
##     Null deviance: 50.997  on 29  degrees of freedom
## Residual deviance: 34.159  on 28  degrees of freedom
## AIC: 396.18
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  1.112 
##           Std. Err.:  0.259 
## Warning while fitting theta: alternation limit reached 
## 
##  2 x log-likelihood:  -390.179

plot(Field_DA_model1)

Gaussian distribution

Field_DA_model2<-glm(copepod_density~jelly_biomass, data= Field_DA, family="gaussian")
summary(Field_DA_model2)

## 
## Call:
## glm(formula = copepod_density ~ jelly_biomass, family = "gaussian", 
##     data = Field_DA)
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   432.1144    66.8466   6.464  5.3e-07 ***
## jelly_biomass  -0.5203     0.1687  -3.085  0.00455 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 91871.49)
## 
##     Null deviance: 3446721  on 29  degrees of freedom
## Residual deviance: 2572402  on 28  degrees of freedom
## AIC: 431.91
## 
## Number of Fisher Scoring iterations: 2

plot(Field_DA_model2)

Gamma distribution

Field_DA_model3<-glm(copepod_density~jelly_biomass, data= Field_DA, family="Gamma")
summary(Field_DA_model3)

## 
## Call:
## glm(formula = copepod_density ~ jelly_biomass, family = "Gamma", 
##     data = Field_DA)
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.700e-03  3.485e-04   4.877 3.88e-05 ***
## jelly_biomass 3.223e-05  7.279e-06   4.427 0.000132 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 0.5023986)
## 
##     Null deviance: 46.485  on 29  degrees of freedom
## Residual deviance: 21.186  on 28  degrees of freedom
## AIC: 383.12
## 
## Number of Fisher Scoring iterations: 6

plot(Field_DA_model3)

Looks like NB QQ plot looks best

Full Model

check for multicollinearity

DAData1 = subset(Field_DA, select = c(Fmean,pHmean,Smean,Tmean,Omean,jelly_biomass,copepod_density) )
pairs(DAData1 , panel = panel.smooth)

salinity and temperature have strong relationship

full model

DA1<-glm.nb(copepod_density_round~ jelly_biomass+Tmean + Fmean + Sample.Year, data=Field_DA, na.action="na.fail")
summary(DA1)

## 
## Call:
## glm.nb(formula = copepod_density_round ~ jelly_biomass + Tmean + 
##     Fmean + Sample.Year, data = Field_DA, na.action = "na.fail", 
##     init.theta = 1.511147141, link = log)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     13.9099639  4.9891091   2.788 0.005302 ** 
## jelly_biomass   -0.0016896  0.0004873  -3.467 0.000526 ***
## Tmean           -0.5541199  0.3625481  -1.528 0.126412    
## Fmean           -0.0583690  0.0589615  -0.990 0.322198    
## Sample.Year2021  0.5228110  0.4020302   1.300 0.193455    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(1.5111) family taken to be 1)
## 
##     Null deviance: 69.053  on 29  degrees of freedom
## Residual deviance: 33.252  on 25  degrees of freedom
## AIC: 391.14
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  1.511 
##           Std. Err.:  0.364 
## Warning while fitting theta: alternation limit reached 
## 
##  2 x log-likelihood:  -379.143

par(mfrow = c(2, 2))
plot(DA1)

AICc(DA1)

## [1] 394.7956

allperms <- dredge(DA1)
mod.table1 <- data.frame(allperms)
mod.table1

##    X.Intercept.       Fmean jelly_biomass Sample.Year      Tmean df    logLik
## 4      6.756106 -0.13051334  -0.001798873        <NA>         NA  4 -191.2252
## 15    16.948053          NA  -0.001714860           + -0.7972991  5 -190.1204
## 11    17.773118          NA  -0.001582821        <NA> -0.8170713  4 -191.6196
## 12    13.229083 -0.08210967  -0.001592250        <NA> -0.4688598  5 -190.4033
## 8      6.391163 -0.12065700  -0.001893363           +         NA  5 -190.7376
## 16    13.909805 -0.05836626  -0.001689436           + -0.5541143  6 -189.5717
## 3      5.985388          NA  -0.002174485        <NA>         NA  3 -195.0896
## 7      5.493106          NA  -0.002282685           +         NA  4 -194.0407
## 9     21.949045          NA            NA        <NA> -1.1160225  3 -196.2059
## 10    17.466513 -0.07398910            NA        <NA> -0.7761812  4 -195.2736
## 13    21.225506          NA            NA           + -1.0926943  4 -195.6296
## 2      6.719369 -0.15608235            NA        <NA>         NA  3 -197.2278
## 14    17.763562 -0.06188874            NA           + -0.8183373  5 -195.0544
## 6      6.598690 -0.15327516            NA           +         NA  4 -197.1869
## 1      5.757218          NA            NA        <NA>         NA  2 -202.0601
## 5      5.424320          NA            NA           +         NA  3 -201.7262
##        AICc      delta       weight
## 4  392.0503  0.0000000 2.656570e-01
## 15 392.7408  0.6904930 1.880973e-01
## 11 392.8392  0.7888693 1.790690e-01
## 12 393.3066  1.2563167 1.417475e-01
## 8  393.9752  1.9249205 1.014682e-01
## 16 394.7956  2.7452510 6.732828e-02
## 3  397.1023  5.0519319 2.124752e-02
## 7  397.6815  5.6311479 1.590498e-02
## 9  399.3350  7.2846309 6.957989e-03
## 10 400.1472  8.0968490 4.635673e-03
## 13 400.8591  8.8087879 3.247261e-03
## 2  401.3787  9.3283307 2.504378e-03
## 14 402.6087 10.5584068 1.353918e-03
## 6  403.9738 11.9234666 6.841847e-04
## 1  408.5646 16.5142485 6.891246e-05
## 5  410.3755 18.3251351 2.786562e-05

best model includes only fluorescence and jelly biomass

Final model

DA_final<-glm.nb(copepod_density_round~ Fmean+jelly_biomass, data=Field_DA, na.action="na.fail")
summary(DA_final)

## 
## Call:
## glm.nb(formula = copepod_density_round ~ Fmean + jelly_biomass, 
##     data = Field_DA, na.action = "na.fail", init.theta = 1.375328852, 
##     link = log)
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    6.7561063  0.3632559  18.599  < 2e-16 ***
## Fmean         -0.1305133  0.0453898  -2.875 0.004035 ** 
## jelly_biomass -0.0017989  0.0004934  -3.646 0.000267 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(1.3753) family taken to be 1)
## 
##     Null deviance: 62.928  on 29  degrees of freedom
## Residual deviance: 33.491  on 27  degrees of freedom
## AIC: 390.45
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  1.375 
##           Std. Err.:  0.328 
## 
##  2 x log-likelihood:  -382.450

plot(DA_final)

AICc(DA_final)

## [1] 392.0503

#mcfadden's rsquared
with(summary(DA_final), 1 - deviance/null.deviance)

## [1] 0.4677958

extract rate of change estimates with confidence intervals

exp(coef(DA_final))

##   (Intercept)         Fmean jelly_biomass 
##   859.2898337     0.8776448     0.9982027

exp(confint(DA_final, level=.95))

##                     2.5 %       97.5 %
## (Intercept)   456.9979807 1735.7605222
## Fmean           0.8092034    0.9559715
## jelly_biomass   0.9974997    0.9990376

#get rate of change (1-exp(coefficient))
1-0.9982027

## [1] 0.0017973

Calculate rate of change another way (to verify)

a<-exp(-0.0017989)
a

## [1] 0.9982027

b<-a-1
c<-b*100
c

## [1] -0.1797283

ANOVA test for type II variance

ANOVADA<-car::Anova(DA_final, type = 2)
summary(ANOVADA)

##     LR Chisq            Df      Pr(>Chisq)       
##  Min.   : 8.612   Min.   :1   Min.   :0.0001621  
##  1st Qu.:10.016   1st Qu.:1   1st Qu.:0.0009562  
##  Median :11.420   Median :1   Median :0.0017504  
##  Mean   :11.420   Mean   :1   Mean   :0.0017504  
##  3rd Qu.:12.823   3rd Qu.:1   3rd Qu.:0.0025446  
##  Max.   :14.227   Max.   :1   Max.   :0.0033388

ANOVADA

## Analysis of Deviance Table (Type II tests)
## 
## Response: copepod_density_round
##               LR Chisq Df Pr(>Chisq)    
## Fmean           8.6124  1  0.0033388 ** 
## jelly_biomass  14.2267  1  0.0001621 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Predict values for jelly density vs. copepod densty plot

predict1<-ggpredict(
  DA_final,
  terms=c("jelly_biomass"),
  ci.lvl = 0.95,
  type = "fe",
  typical = "mean",
  condition = NULL,
  back.transform = TRUE,
  ppd = FALSE,
  vcov.fun = NULL,
  vcov.type = NULL,
  vcov.args = NULL,
  interval = "confidence")

Medium Calanoids

subset

Field_Cal<-subset(Field_SCQM, Species_lifestage_combined == "CALANOIDA_Medium")
#remove july station
Field_Cal<-subset(Field_Cal, Station_unique != "SC5a")

add up multiple lines per station

colnames(Field_Cal)

##  [1] "BugSampleID"                "Project"                   
##  [3] "Sample.Code"                "Sampling.Group"            
##  [5] "Station"                    "Site"                      
##  [7] "Site.Name"                  "Basin"                     
##  [9] "Sub.Basin"                  "Latitude"                  
## [11] "Longitude"                  "Sample.Date"               
## [13] "Sample.Year"                "Sample.Month"              
## [15] "Sample.Time"                "Tow.Type"                  
## [17] "Mesh.Size"                  "Station.Depth..m."         
## [19] "Flow.meter..revs."          "Broad.Group"               
## [21] "Mid.Level.Group"            "X1st.Word.Taxa"            
## [23] "Genus.species"              "Life.History.Stage"        
## [25] "Total.Ct"                   "Density....m3."            
## [27] "Vol.Filtered..m3."          "Jelly.Mass..g."            
## [29] "Number.of.Jellies"          "Trial.Time"                
## [31] "Trial.Type"                 "Jelly.Size"                
## [33] "Jelly.Density....m3."       "Jelly.Density..g.m3."      
## [35] "Location"                   "Species_lifestage"         
## [37] "Species_lifestage_combined" "Station_unique"            
## [39] "Jelly.Density"

Field_Cal <- Field_Cal %>%
  group_by(Sample.Code,Station_unique,Sample.Year,Sample.Month,Site,Location) %>%
  summarise(
    copepod_density = sum(Density....m3.),
    jelly_biomass = mean(Jelly.Density))

Add environmental table to copepod table

Field_Cal$Station<-Field_Cal$Station_unique
Field_Cal <- merge(Field_Cal,Env3,by.x = "Station")

Examine Data

Histograms of Jellyfish Density and Copepod Density

hist(Field_Cal$copepod_density)

hist(Field_Cal$jelly_biomass)

only jelly biomass has abnormal distribution

##investigate year to see if it is important

Field_Cal$Sample.Year<-as.factor(Field_Cal$Sample.Year)
CalYear<-glm(copepod_density~Sample.Year,data=Field_Cal,family = Gamma(link="log"))
summary(CalYear)

## 
## Call:
## glm(formula = copepod_density ~ Sample.Year, family = Gamma(link = "log"), 
##     data = Field_Cal)
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       6.5821     0.2394  27.495  < 2e-16 ***
## Sample.Year2021   0.7610     0.2734   2.783  0.00953 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 0.401164)
## 
##     Null deviance: 13.782  on 29  degrees of freedom
## Residual deviance: 11.093  on 28  degrees of freedom
## AIC: 481.97
## 
## Number of Fisher Scoring iterations: 5

plot(copepod_density~Sample.Year,data=Field_Cal)

Year is significant - potentially include in model

Plot Jellyfish Density against zooplankton density

ggplot(Field_Cal, aes(x=jelly_biomass, y=copepod_density)) + geom_point()

Family Options

Negative Binomial Distribution

#round to integers
Field_Cal$copepod_density_round<-round(as.numeric(Field_Cal$copepod_density), 0)
#run model
Field_Cal_model1 <- glm.nb(copepod_density_round~jelly_biomass, data = Field_Cal)
summary(Field_Cal_model1)

## 
## Call:
## glm.nb(formula = copepod_density_round ~ jelly_biomass, data = Field_Cal, 
##     init.theta = 2.643917945, link = log)
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    7.3428565  0.1357612  54.087   <2e-16 ***
## jelly_biomass -0.0007212  0.0003428  -2.104   0.0354 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(2.6439) family taken to be 1)
## 
##     Null deviance: 36.292  on 29  degrees of freedom
## Residual deviance: 31.882  on 28  degrees of freedom
## AIC: 484.69
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  2.644 
##           Std. Err.:  0.647 
## 
##  2 x log-likelihood:  -478.688

plot(Field_Cal_model1)

### Gamma distribution

Field_Cal_model3<-glm(copepod_density~jelly_biomass, data= Field_Cal, family="Gamma")
summary(Field_Cal_model3)

## 
## Call:
## glm(formula = copepod_density ~ jelly_biomass, family = "Gamma", 
##     data = Field_Cal)
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   6.242e-04  7.461e-05   8.366 4.22e-09 ***
## jelly_biomass 7.580e-07  3.205e-07   2.365   0.0252 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 0.2560637)
## 
##     Null deviance: 13.782  on 29  degrees of freedom
## Residual deviance: 11.908  on 28  degrees of freedom
## AIC: 484.23
## 
## Number of Fisher Scoring iterations: 5

plot(Field_Cal_model3)

These are about the same, go with NB

Full Model

check for multicollinearity

Field_Cal1 = subset(Field_Cal, select = c(Fmean,pHmean,Smean,Tmean,Omean,jelly_biomass,copepod_density) )
pairs(Field_Cal1 , panel = panel.smooth)

fluorescence and salinity are correlated, exclude salinity

full model

Cal1<-glm.nb(copepod_density_round~ jelly_biomass+Tmean + Fmean + Sample.Year, data=Field_Cal, na.action="na.fail")
summary(Cal1)

## 
## Call:
## glm.nb(formula = copepod_density_round ~ jelly_biomass + Tmean + 
##     Fmean + Sample.Year, data = Field_Cal, na.action = "na.fail", 
##     init.theta = 3.63222859, link = log)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     10.4256523  3.2124752   3.245 0.001173 ** 
## jelly_biomass   -0.0006533  0.0003096  -2.110 0.034837 *  
## Tmean           -0.2677631  0.2334407  -1.147 0.251370    
## Fmean            0.0109781  0.0379493   0.289 0.772365    
## Sample.Year2021  0.9155822  0.2584031   3.543 0.000395 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(3.6322) family taken to be 1)
## 
##     Null deviance: 49.790  on 29  degrees of freedom
## Residual deviance: 31.401  on 25  degrees of freedom
## AIC: 480.21
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  3.632 
##           Std. Err.:  0.903 
## 
##  2 x log-likelihood:  -468.210

par(mfrow = c(2, 2))
plot(Cal1)

AICc(Cal1)

## [1] 483.8626

qq plot looks good

Make model comparison table

allperms <- dredge(Cal1)
mod.table2 <- data.frame(allperms)
mod.table2

##    X.Intercept.        Fmean jelly_biomass Sample.Year       Tmean df    logLik
## 7      6.689247           NA -0.0007878746           +          NA  4 -234.8619
## 15     9.810444           NA -0.0006522665           + -0.21882447  5 -234.1497
## 8      6.810121 -0.016363531 -0.0007414513           +          NA  5 -234.6938
## 13    11.436548           NA            NA           + -0.33812659  4 -236.2037
## 5      6.582223           NA            NA           +          NA  3 -237.9470
## 16    10.425590  0.010979081 -0.0006533204           + -0.26775978  6 -234.1052
## 6      6.822730 -0.031375103            NA           +          NA  4 -237.3420
## 14    11.989432  0.009657259            NA           + -0.38197316  5 -236.1703
## 3      7.342857           NA -0.0007212024        <NA>          NA  3 -239.3438
## 1      7.210375           NA            NA        <NA>          NA  2 -241.4159
## 4      7.513210 -0.025610856 -0.0006582708        <NA>          NA  4 -239.0297
## 11     8.321136           NA -0.0006774798        <NA> -0.06723145  4 -239.2958
## 2      7.474948 -0.037281875            NA        <NA>          NA  3 -240.7373
## 9     10.265098           NA            NA        <NA> -0.20821840  3 -240.9165
## 12     6.803637 -0.030211501 -0.0006799857        <NA>  0.05086844  5 -239.0114
## 10     8.807967 -0.028047495            NA        <NA> -0.09533199  4 -240.6711
##        AICc     delta      weight
## 7  479.3238  0.000000 0.391001642
## 15 480.7993  1.475534 0.186969379
## 8  481.8875  2.563735 0.108510224
## 13 482.0074  2.683541 0.102200963
## 5  482.8171  3.493336 0.068172656
## 16 483.8626  4.538798 0.040419508
## 6  484.2841  4.960277 0.032739204
## 14 484.8406  5.516830 0.024786439
## 3  485.6107  6.286871 0.016865602
## 1  487.2763  7.952525 0.007333474
## 4  487.6594  8.335578 0.006055233
## 11 488.1915  8.867712 0.004640656
## 2  488.3977  9.073892 0.004186085
## 9  488.7560  9.432180 0.003499506
## 12 490.5229 11.199077 0.001446538
## 10 490.9423 11.618480 0.001172891

jelly biomass and sample year are the most important

Final model

Cal_final<-glm.nb(copepod_density_round~ jelly_biomass+Sample.Year, data=Field_Cal, na.action="na.fail")
summary(Cal_final)

## 
## Call:
## glm.nb(formula = copepod_density_round ~ jelly_biomass + Sample.Year, 
##     data = Field_Cal, na.action = "na.fail", init.theta = 3.46768619, 
##     link = log)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      6.6892460  0.2123501  31.501  < 2e-16 ***
## jelly_biomass   -0.0007879  0.0002997  -2.629 0.008559 ** 
## Sample.Year2021  0.8070946  0.2324610   3.472 0.000517 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(3.4677) family taken to be 1)
## 
##     Null deviance: 47.545  on 29  degrees of freedom
## Residual deviance: 31.466  on 27  degrees of freedom
## AIC: 477.72
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  3.468 
##           Std. Err.:  0.860 
## 
##  2 x log-likelihood:  -469.724

plot(Cal_final)

AICc(Cal_final)

## [1] 479.3238

#mcfadden's rsquared
with(summary(Cal_final), 1 - deviance/null.deviance)

## [1] 0.3381865

extract estimates for tables, betas, variance-covariance matrix

exp(coef(Cal_final))

##     (Intercept)   jelly_biomass Sample.Year2021 
##     803.7160057       0.9992124       2.2413864

exp(confint(Cal_final, level=.95))

##                       2.5 %       97.5 %
## (Intercept)     546.9442580 1242.6299326
## jelly_biomass     0.9987005    0.9997918
## Sample.Year2021   1.3903831    3.4843010

1-0.9992124

## [1] 0.0007876

ANOVA test for type II variance

ANOVADA<-car::Anova(Cal_final, type = 2)
summary(ANOVADA)

##     LR Chisq            Df      Pr(>Chisq)      
##  Min.   : 6.787   Min.   :1   Min.   :0.001331  
##  1st Qu.: 7.665   1st Qu.:1   1st Qu.:0.003294  
##  Median : 8.543   Median :1   Median :0.005257  
##  Mean   : 8.543   Mean   :1   Mean   :0.005257  
##  3rd Qu.: 9.421   3rd Qu.:1   3rd Qu.:0.007221  
##  Max.   :10.299   Max.   :1   Max.   :0.009184

ANOVADA

## Analysis of Deviance Table (Type II tests)
## 
## Response: copepod_density_round
##               LR Chisq Df Pr(>Chisq)   
## jelly_biomass   6.7868  1   0.009184 **
## Sample.Year    10.2988  1   0.001331 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Predict values for jelly density vs. copepod densty plot

predict2<-ggpredict(
  Cal_final,
  terms=c("jelly_biomass","Sample.Year"),
  ci.lvl = 0.95,
  type = "fe",
  typical = "mean",
  condition = NULL,
  back.transform = TRUE,
  ppd = FALSE,
  vcov.fun = NULL,
  vcov.type = NULL,
  vcov.args = NULL,
  interval = "confidence")

Make Plots

DitrichocorycaeusPlot<-plot(predict1,add.data = TRUE,dot.size=1.5,dot.alpha=0.65)+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black"))+theme_classic() +
  scale_x_continuous(breaks=seq(0,1400,350),expand = c(0, 0), limits = c(-10, 1400)) + 
  scale_y_continuous(breaks=seq(0,1200,400),expand = c(0, 0), limits = c(-10, 1200))+
  geom_line(size=1.5) +
  geom_ribbon( aes(ymin = conf.low, ymax = conf.high), alpha = .15)+ xlab(expression(italic('A. labiata')~ plain('Biomass ( g /' ~m^3~ ')')))+ 
  ylab(expression(italic('D. anglicus') ~ plain('Density') ~ plain('( # /' ~ m^3 ~ ')')))+
  theme(axis.text=element_text(size=15,colour="black"),
        legend.position=c(0.87, 0.8),
        legend.text=element_text(size=15,colour="black"),
        legend.title=element_text(size=15,colour="black"),
        axis.title=element_text(size=20,colour="black"),
        axis.line = element_line(colour = "black"))+theme(plot.title = element_blank())+
  theme(plot.margin=margin(0.7, 0.7, 0.7, 0.7, unit = "cm"))+
  annotate("text",x=1300, y=1150, label= "a",size=10)
DitrichocorycaeusPlot

CalanoidPlot<-plot(predict2,add.data = TRUE,dot.size=1.5,dot.alpha=0.65)+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black"))+theme_classic() +
  scale_x_continuous(breaks=seq(0,1400,350),expand = c(0,0), limits = c(-10, 1400)) + 
  scale_y_continuous(breaks=seq(0,3000,500),expand = c(0,0), limits = c(-10, 3000))+
  geom_line(size=1.5) +
  geom_ribbon( aes(ymin = conf.low, ymax = conf.high), alpha = .15)+ xlab(expression(italic('A. labiata')~ plain('Biomass ( g /' ~m^3~ ')')))+ 
  ylab(expression(plain('Medium Calanoid Density') ~ plain('( # /' ~ m^3 ~ ')')))+
  theme(axis.text=element_text(size=15,colour="black"),
        legend.position=c(0.83, 0.7),
        legend.text=element_text(size=15,colour="black"),
        legend.title=element_text(size=15,colour="black"),
        axis.title=element_text(size=20,colour="black"),
        axis.line = element_line(colour = "black"))+theme(plot.title = element_blank())+
  theme(plot.margin=margin(0.7, 0.7, 0.7, 0.7, unit = "cm"))+labs(col="Year",title=NULL)+
  annotate("text",x=1300, y=2800, label= "b",size=10)
CalanoidPlot

Final Plot

Field_GLM<-ggarrange(DitrichocorycaeusPlot,CalanoidPlot,
          ncol = 2, nrow = 1)
Field_GLM

save plot

setwd("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/output")
ggsave(filename = "Field_GLM.png", plot = Field_GLM, height = 164, width = 328, units="mm", device='png', dpi=600)
ggsave(filename = "Field_GLM.tif", plot = Field_GLM, height = 164, width = 328, units="mm", device='tiff', dpi=600)

Model Comparison Table

combine tables

mod.table1$Taxon<-"Ditrichocorycaeus anglicus"
mod.table2$Taxon<-"Medium Calanoid"

Field_model_table<-rbind(mod.table1,mod.table2)

remove excess columns and rename columns

colnames(Field_model_table)

##  [1] "X.Intercept."  "Fmean"         "jelly_biomass" "Sample.Year"  
##  [5] "Tmean"         "df"            "logLik"        "AICc"         
##  [9] "delta"         "weight"        "Taxon"

Field_model_table <- Field_model_table[ -c(1,7,8) ]
colnames(Field_model_table)

## [1] "Fmean"         "jelly_biomass" "Sample.Year"   "Tmean"        
## [5] "df"            "delta"         "weight"        "Taxon"

#reorder columns
Field_model_table <- Field_model_table[, c(8,2,1,3,4,5,6,7)]

Field_model_table <- Field_model_table %>% 
                    rename("Jellyfish Biomass" = "jelly_biomass",
                           "Chla" = "Fmean",
                           "Year" = "Sample.Year",
                           "Temperature" = "Tmean",
                           "Aikake Weight" = "weight")

save model list

write.csv(Field_model_table,"/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/output/Field_model_table.csv", row.names = FALSE)

17_field_GLM

2023-08-29

Data Prep

subset database to field stations

Combine taxa

Environmental Data

Calculate means per station

salinity

Temperature

Oxygen

pH

fluorescence

combine dataframes

Ditrichocorycaeus anglicus

Add environmental table to copepod table

Examine Data

Family Options

Negative Binomial Distribution

Gaussian distribution

Gamma distribution

Full Model

Final model

Medium Calanoids

Add environmental table to copepod table

Examine Data

Family Options

Negative Binomial Distribution

Full Model

Final model

Make Plots

Final Plot

Model Comparison Table