This code makes and compares multiple generalized linear models of the experiment data with jellyfish bimass vs. medium calanoid copepod density (primarily Paracalanus sp. and Acartia sp.).
A best fit model is chosen, and the results are visualized.
Link to report here: https://rpubs.com/HailaSchultz/exp_GLM-MedCalanoids

load packages

library(MASS)
library(ggplot2)
library(ggeffects)
library(dplyr)
library(scales)
library(forcats)

Set up data

Read database into R. If current database changes, just change the path

Database <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Final_Aurelia_Database_Jan11_2023.csv")

Add control tanks

First, read in the file that designates different samples to tanks with jellies other, tanks with zero jellies sampled at the beginning of the experiment remove, and tanks with zero jellies sampled at the end of the experiment zero

Control_tanks <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Control_Tanks.csv")

Add control tank data to the database file

Database$Control_info <- Control_tanks$c1[match(Database$Sample.Code, Control_tanks$Sample.Code)]

Subset data

Subset to trials with large jellies where the number of jellies in the tanks was manipulated

Exp_numb_trials<- subset(Database, Trial.Type=='Number')
Exp_numb_large<-subset(Exp_numb_trials,Jelly.Size=="Large")

remove aug 22 2019 - the jellies we used for this experiment were younger/smaller than the other experiments, so I decided to omit it from analysis

Exp_sub<-subset(Exp_numb_large,Sample.Date!="08/22/2019")

remove tanks sampled at the beginning of experiment - we decided not to use these tanks because they were different than the tanks with zero jellies sampled at the end of the experiment and didn’t add anything beneficial

Exp_sub<-subset(Exp_sub,Control_info!="remove")

Prepare data

#convert sample year to a factor
Exp_sub$Sample.Year<- as.factor(Exp_sub$Sample.Year)
#Rename Vars (get the dots out)
Exp_sub$Jelly.Mass<-Exp_sub$Jelly.Mass..g.
Exp_sub$Density<-Exp_sub$Density....m3.
#calculate jelly density
Exp_sub$Jelly.Density<-Exp_sub$Jelly.Mass/Exp_sub$Vol.Filtered..m3.
#add 1 to jelly  density for analyses that prohibit zeroes
Exp_sub$Jelly.Density <-Exp_sub$Jelly.Density
colnames(Exp_sub)

##  [1] "BugSampleID"          "Project"              "Sample.Code"         
##  [4] "Sampling.Group"       "Station"              "Site"                
##  [7] "Site.Name"            "Basin"                "Sub.Basin"           
## [10] "Latitude"             "Longitude"            "Sample.Date"         
## [13] "Sample.Year"          "Sample.Month"         "Sample.Time"         
## [16] "Tow.Type"             "Mesh.Size"            "Station.Depth..m."   
## [19] "Flow.meter..revs."    "Broad.Group"          "Mid.Level.Group"     
## [22] "X1st.Word.Taxa"       "Genus.species"        "Life.History.Stage"  
## [25] "Total.Ct"             "Density....m3."       "Vol.Filtered..m3."   
## [28] "Jelly.Mass..g."       "Number.of.Jellies"    "Trial.Time"          
## [31] "Trial.Type"           "Jelly.Size"           "Jelly.Density....m3."
## [34] "Jelly.Density..g.m3." "Location"             "Control_info"        
## [37] "Jelly.Mass"           "Density"              "Jelly.Density"

Subset

Combine species and life history stage

Exp_sub$Species_lifestage <- paste(Exp_sub$Genus.species, Exp_sub$Life.History.Stage, sep="_")

Recode Acartia as medium calanoids

Exp_sub <- Exp_sub %>%
                   mutate(Species_lifestage_combined = fct_recode(Species_lifestage, 
                                     "CALANOIDA_Medium" = "ACARTIA_Copepodite", 
                                     "CALANOIDA_Medium" = "ACARTIA_Female, Adult"))

Subset data to medium calanoids and add multiple entries per tank:The mean of jelly density is taken because this should be the same number for all taxa in each tank.

MedCal<-subset(Exp_sub, Species_lifestage_combined == "CALANOIDA_Medium")
MedCal <- MedCal %>%
  group_by(Sample.Code, Station,Sample.Date,Sample.Year,Control_info) %>%
  summarise(
    CopDensity = sum(Density),
    Jelly.Density = mean(Jelly.Density))

Control geometric means

Calculate the geometric mean of the zero-jelly tanks to use as a control.

MedCal$Control_info_combined <- paste(MedCal$Sample.Date, MedCal$Control_info)
MedCal2<- MedCal %>%
  group_by(Control_info,Control_info_combined,Sample.Date) %>%
  summarise(
    ave = exp(mean(log(CopDensity))))
MedCal3<-subset(MedCal2,Control_info=="zero")
# match the geometric mean of zero jelly tanks to each experiment
MedCal$Control<- MedCal3$ave[match(MedCal$Sample.Date, MedCal3$Sample.Date)]

Examine Data

Histograms of Jellyfish Density and Copepod Density

hist(MedCal$CopDensity)

hist(MedCal$Jelly.Density)

Both appear to have a right-skewed a log distribution

See if years are different

boxplot(CopDensity~Sample.Year, data=MedCal)

The two years do look different - may need to include this in the model later on. There are some outliers from one experiment

Plot Jellyfish Density against zooplankton density

ggplot(MedCal, aes(x=Jelly.Density, y=CopDensity)) + geom_point(aes(colour=Sample.Date))

Family Options

Negative Binomial Distribution

#round to integers
MedCal$CopDensity_round<-round(as.numeric(MedCal$CopDensity), 0)
MedCal$Control_round<-round(as.numeric(MedCal$Control), 0)
#run model
MedCal_model1 <- glm.nb(CopDensity_round~Jelly.Density, data = MedCal)
summary(MedCal_model1)

## 
## Call:
## glm.nb(formula = CopDensity_round ~ Jelly.Density, data = MedCal, 
##     init.theta = 0.7801044884, link = log)
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    9.846e+00  1.911e-01   51.52   <2e-16 ***
## Jelly.Density -1.521e-04  7.607e-05   -2.00   0.0455 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(0.7801) family taken to be 1)
## 
##     Null deviance: 82.016  on 64  degrees of freedom
## Residual deviance: 77.405  on 63  degrees of freedom
## AIC: 1379.4
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  0.780 
##           Std. Err.:  0.118 
## 
##  2 x log-likelihood:  -1373.439

plot(MedCal_model1)

## Gaussian distribution

MedCal_model2<-glm(CopDensity~Jelly.Density, data= MedCal, family="gaussian")
summary(MedCal_model2)

## 
## Call:
## glm(formula = CopDensity ~ Jelly.Density, family = "gaussian", 
##     data = MedCal)
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   19497.088   3688.811   5.285 1.67e-06 ***
## Jelly.Density    -2.490      1.468  -1.696   0.0949 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 477658034)
## 
##     Null deviance: 3.1466e+10  on 64  degrees of freedom
## Residual deviance: 3.0092e+10  on 63  degrees of freedom
## AIC: 1487.4
## 
## Number of Fisher Scoring iterations: 2

plot(MedCal_model2)

## Gamma distribution

MedCal_model3<-glm(CopDensity~Jelly.Density, data= MedCal, family="Gamma")
summary(MedCal_model3)

## 
## Call:
## glm(formula = CopDensity ~ Jelly.Density, family = "Gamma", data = MedCal)
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.759e-05  1.251e-05   3.804 0.000324 ***
## Jelly.Density 1.566e-08  8.720e-09   1.795 0.077396 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 1.885926)
## 
##     Null deviance: 105.149  on 64  degrees of freedom
## Residual deviance:  97.551  on 63  degrees of freedom
## AIC: 1379.4
## 
## Number of Fisher Scoring iterations: 7

plot(MedCal_model3)

looks about the same as NB, proceed with Gamma

Add other parameters

log link

MedCal_model4<-glm(CopDensity~Jelly.Density, data= MedCal, family="Gamma"(link="log"))
summary(MedCal_model4)

## 
## Call:
## glm(formula = CopDensity ~ Jelly.Density, family = Gamma(link = "log"), 
##     data = MedCal)
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    9.846e+00  2.304e-01  42.731   <2e-16 ***
## Jelly.Density -1.521e-04  9.172e-05  -1.659    0.102    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 1.863884)
## 
##     Null deviance: 105.15  on 64  degrees of freedom
## Residual deviance:  99.24  on 63  degrees of freedom
## AIC: 1380.7
## 
## Number of Fisher Scoring iterations: 6

plot(MedCal_model4)

about the same as the identity link. However, I have not been able to add a control factor with the identity link, so I will proceed with log link. ## log link and offset

MedCal_model3<-glm(CopDensity~Jelly.Density+offset(log(Control)), data= MedCal, family="Gamma"(link="log"))
summary(MedCal_model3)

## 
## Call:
## glm(formula = CopDensity ~ Jelly.Density + offset(log(Control)), 
##     family = Gamma(link = "log"), data = MedCal)
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -1.973e-01  6.903e-02  -2.859  0.00576 ** 
## Jelly.Density -1.404e-04  2.748e-05  -5.111 3.22e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 0.1672518)
## 
##     Null deviance: 16.898  on 64  degrees of freedom
## Residual deviance: 11.999  on 63  degrees of freedom
## AIC: 1229.7
## 
## Number of Fisher Scoring iterations: 8

plot(MedCal_model3)

Adding control improves the AIC and distribution a lot!!

log link, year and offset

MedCal_model5<-glm(CopDensity~Sample.Year+Jelly.Density+offset(log(Control)), data= MedCal, family="Gamma"(link="log"))
summary(MedCal_model5)

## 
## Call:
## glm(formula = CopDensity ~ Sample.Year + Jelly.Density + offset(log(Control)), 
##     family = Gamma(link = "log"), data = MedCal)
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -4.657e-02  9.712e-02  -0.479    0.633    
## Sample.Year2020 -2.287e-01  1.049e-01  -2.181    0.033 *  
## Jelly.Density   -1.580e-04  2.824e-05  -5.595 5.33e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 0.1693569)
## 
##     Null deviance: 16.898  on 64  degrees of freedom
## Residual deviance: 11.241  on 62  degrees of freedom
## AIC: 1227.4
## 
## Number of Fisher Scoring iterations: 7

plot(MedCal_model5)

I had included year in the past, but it actually doesn’t look like it adds much, so I will not include it moving forward. Although the years were significantly different, I realized that adding it is a little arbitrary, since the control parameter already accounts for starting density. Therefore, years with lower overall copepod abundance are already accounted for.

Medium Calanoid Final Model

MedCalMod<-glm(CopDensity~Jelly.Density+offset(log(Control)), data= MedCal, family="Gamma"(link="log"))
summary(MedCalMod)

## 
## Call:
## glm(formula = CopDensity ~ Jelly.Density + offset(log(Control)), 
##     family = Gamma(link = "log"), data = MedCal)
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -1.973e-01  6.903e-02  -2.859  0.00576 ** 
## Jelly.Density -1.404e-04  2.748e-05  -5.111 3.22e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 0.1672518)
## 
##     Null deviance: 16.898  on 64  degrees of freedom
## Residual deviance: 11.999  on 63  degrees of freedom
## AIC: 1229.7
## 
## Number of Fisher Scoring iterations: 8

plot(MedCalMod)

extract rate of change estimates with confidence intervals

exp(coef(MedCalMod))

##   (Intercept) Jelly.Density 
##     0.8209125     0.9998596

exp(confint(MedCalMod, level=.95))

##                   2.5 %    97.5 %
## (Intercept)   0.7234391 0.9358889
## Jelly.Density 0.9998125 0.9999085

#get rate of change (1-exp(coefficient))
1-0.9998596

## [1] 0.0001404

Calculate rate of change

a<-exp(-1.404e-04)
a

## [1] 0.9998596

b<-a-1
c<-b*100
c

## [1] -0.01403901

Use predict function to predict model values

predictmedcal<-ggpredict(
  MedCalMod,
  terms=c("Jelly.Density"),
  ci.lvl = 0.95,
  type = "fe",
  typical = "mean",
  condition = NULL,
  back.transform = TRUE,
  ppd = FALSE,
  vcov.fun = NULL,
  vcov.type = NULL,
  vcov.args = NULL,
  interval = "confidence")

Plot with original values

MedCalPlot<-plot(predictmedcal,add.data = TRUE,dot.size=1.5,dot.alpha=0.65)+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black"))+theme_classic() +
  geom_line(size=1.5) +
  geom_ribbon( aes(ymin = conf.low, ymax = conf.high), alpha = .15) +
  scale_x_continuous(breaks=seq(0,7500,1500),expand = c(0, 20), limits = c(-10, 7500)) + 
  scale_y_continuous(breaks=seq(0,125000,25000),expand = c(0, 20), limits = c(-10, 125000))+
  xlab(bquote('Jellyfish Biomass ( g /' ~m^3~ ')'))+ylab(bquote('Med. Calanoid Density ( # /'~m^3~')'))+
  theme(axis.text=element_text(size=15,colour="black"),
        legend.position=c(0.87, 0.8),
        legend.text=element_text(size=15,colour="black"),
        legend.title=element_text(size=15,colour="black"),
        axis.title=element_text(size=15,colour="black"),
        axis.line = element_line(colour = "black"))+theme(plot.title = element_blank())+
  theme(plot.margin=margin(0.7, 0.7, 0.7, 0.7, unit = "cm"))
MedCalPlot

save plot

setwd("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/output")
ggsave(filename = "Exp_GLM_MedCal.png", plot = MedCalPlot, width = 6, height = 5, device='png', dpi=700)

Plot without original values

MedCalPlot_nodata<-plot(predictmedcal)+xlab('Jellyfish Biomass ( g /' ~m^3~ ')')+
  ylab(bquote('Copepod Density ( individuals /'~m^3~')'))+ 
  theme(axis.line = element_line(colour = "black"))+theme_classic() +
  geom_line(size=1) +
  geom_ribbon( aes(ymin = conf.low, ymax = conf.high, color = NULL),
               alpha = .15,show.legend=FALSE) +
  scale_x_continuous(breaks=seq(0,7500,1500),expand = c(0, 20), limits = c(-10, 7500),labels=label_comma()) + 
  scale_y_continuous(breaks=seq(0,25000,5000),expand = c(0, 20), limits = c(-10, 25000),labels=label_comma())+
  theme(axis.text=element_text(size=8,colour="black"),
        axis.title=element_text(size=10),
        plot.title=element_blank())+
  theme(legend.position='none')+
  theme(plot.margin=margin(0.5, 0.5, 0.5, 0.5, unit = "cm"))
MedCalPlot_nodata