This code makes and compares multiple generalized linear models of the experiment data with jellyfish bimass vs. zooplankton density.
A best fit model is chosen, and the results are visualized.
Link to report here: https://rpubs.com/HailaSchultz/exp_GLM-AllZoop

load packages

library(MASS)
library(ggplot2)
library(ggeffects)
library(dplyr)

Set up data

Read database into R. If current database changes, just change the path

Database <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Final_Aurelia_Database_Jan11_2023.csv")

Add control tanks

First, read in the file that designates different samples to tanks with jellies other, tanks with zero jellies sampled at the beginning of the experiment remove, and tanks with zero jellies sampled at the end of the experiment zero

Control_tanks <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Control_Tanks.csv")

Add control tank data to the database file

Database$Control_info <- Control_tanks$c1[match(Database$Sample.Code, Control_tanks$Sample.Code)]

Subset data

Subset to trials with large jellies where the number of jellies in the tanks was manipulated

Exp_numb_trials<- subset(Database, Trial.Type=='Number')
Exp_numb_large<-subset(Exp_numb_trials,Jelly.Size=="Large")

remove aug 22 2019 - the jellies we used for this experiment were younger/smaller than the other experiments, so I decided to omit it from analysis

Exp_sub<-subset(Exp_numb_large,Sample.Date!="08/22/2019")

remove tanks sampled at the beginning of experiment - we decided not to use these tanks because they were different than the tanks with zero jellies sampled at the end of the experiment and didn’t add anything beneficial

Exp_sub<-subset(Exp_sub,Control_info!="remove")

Make database more manageable for analysis

#convert sample year to a factor
Exp_sub$Sample.Year<- as.factor(Exp_sub$Sample.Year)
#Rename Vars (get the dots out)
Exp_sub$Jelly.Mass<-Exp_sub$Jelly.Mass..g.
Exp_sub$Density<-Exp_sub$Density....m3.
#calculate jelly density
Exp_sub$Jelly.Density<-Exp_sub$Jelly.Mass/Exp_sub$Vol.Filtered..m3.
#add 1 to jelly  density for analyses that prohibit zeroes
Exp_sub$Jelly.Density <-Exp_sub$Jelly.Density
colnames(Exp_sub)

##  [1] "BugSampleID"          "Project"              "Sample.Code"         
##  [4] "Sampling.Group"       "Station"              "Site"                
##  [7] "Site.Name"            "Basin"                "Sub.Basin"           
## [10] "Latitude"             "Longitude"            "Sample.Date"         
## [13] "Sample.Year"          "Sample.Month"         "Sample.Time"         
## [16] "Tow.Type"             "Mesh.Size"            "Station.Depth..m."   
## [19] "Flow.meter..revs."    "Broad.Group"          "Mid.Level.Group"     
## [22] "X1st.Word.Taxa"       "Genus.species"        "Life.History.Stage"  
## [25] "Total.Ct"             "Density....m3."       "Vol.Filtered..m3."   
## [28] "Jelly.Mass..g."       "Number.of.Jellies"    "Trial.Time"          
## [31] "Trial.Type"           "Jelly.Size"           "Jelly.Density....m3."
## [34] "Jelly.Density..g.m3." "Location"             "Control_info"        
## [37] "Jelly.Mass"           "Density"              "Jelly.Density"

Model: All Zooplankton

This code summarizes all zooplankton taxa for a total zooplankton metric.

Prep data

Remove Centric Diatoms

Exp_sub<-subset(Exp_sub,Genus.species!="Diatom-Centric")

Subset data:The mean of jelly density is taken because this should be the same number for all taxa in each tank.

AllZoop <- Exp_sub %>%
  group_by(Sample.Code, Station,Sample.Date,Sample.Year,Control_info) %>%
  summarise(
    ZoopDensity = sum(Density),
    Jelly.Density = mean(Jelly.Density))

Calculate the geometric mean of the zero-jelly tanks to use as a control.

AllZoop$Control_info_combined <- paste(AllZoop$Sample.Date, AllZoop$Control_info)
AllZoop2<- AllZoop %>%
  group_by(Control_info,Control_info_combined,Sample.Date) %>%
  summarise(
    ave = exp(mean(log(ZoopDensity))))
AllZoop3<-subset(AllZoop2,Control_info=="zero")
# match the geometric mean of zero jelly tanks to each experiment
AllZoop$Control<- AllZoop3$ave[match(AllZoop$Sample.Date, AllZoop3$Sample.Date)]

Examine Data

Histograms of Jellyfish Density and Zooplankton Density

hist(AllZoop$ZoopDensity)

hist(AllZoop$Jelly.Density)

See if years are different

boxplot(ZoopDensity~Sample.Year, data=AllZoop)

Looks like 2019 and 2020 may be different.

Plot Jellyfish Density against zooplankton density

ggplot(AllZoop, aes(x=Jelly.Density, y=ZoopDensity)) + geom_point(aes(colour=Sample.Date))

At values of low Jellyfish density, zooplankton density has a wide spread. However, at high jellyfish densities, values are consistently low. The lowest valeus all belong to the same exeriment, likely indicating that the overall zooplankton abundance was low that day. This means that zooplankton stocking density must be incorporated into the analysis. August 13, 2019 also had very high stocking densities. This should be considered when interpereting analyses.

Simple model, Negative Binomial Distribution

#round to integers
AllZoop$ZoopDensity_round<-round(as.numeric(AllZoop$ZoopDensity), 0)
AllZoop$Control_round<-round(as.numeric(AllZoop$Control), 0)
#run model
Allzoop_model1 <- glm.nb(ZoopDensity_round~Jelly.Density, data = AllZoop)
summary(Allzoop_model1)

## 
## Call:
## glm.nb(formula = ZoopDensity_round ~ Jelly.Density, data = AllZoop, 
##     init.theta = 1.635194218, link = log)
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    1.135e+01  1.320e-01  85.993  < 2e-16 ***
## Jelly.Density -1.797e-04  5.254e-05  -3.421 0.000624 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(1.6352) family taken to be 1)
## 
##     Null deviance: 85.379  on 64  degrees of freedom
## Residual deviance: 71.410  on 63  degrees of freedom
## AIC: 1563.5
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  1.635 
##           Std. Err.:  0.263 
## 
##  2 x log-likelihood:  -1557.524

plot(Allzoop_model1)

Simple model, Gaussian distribution

Allzoop_model2<-glm(ZoopDensity~Jelly.Density, data= AllZoop, family="gaussian")
summary(Allzoop_model2)

## 
## Call:
## glm(formula = ZoopDensity ~ Jelly.Density, family = "gaussian", 
##     data = AllZoop)
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   89219.744   8317.827   10.73 7.56e-16 ***
## Jelly.Density   -13.144      3.311   -3.97 0.000187 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 2428644725)
## 
##     Null deviance: 1.9128e+11  on 64  degrees of freedom
## Residual deviance: 1.5300e+11  on 63  degrees of freedom
## AIC: 1593.1
## 
## Number of Fisher Scoring iterations: 2

plot(Allzoop_model2)

# Simple model, Gamma distribution

Allzoop_model3<-glm(ZoopDensity~Jelly.Density, data= AllZoop, family="Gamma")
summary(Allzoop_model3)

## 
## Call:
## glm(formula = ZoopDensity ~ Jelly.Density, family = "Gamma", 
##     data = AllZoop)
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.010e-05  1.361e-06   7.416 3.75e-10 ***
## Jelly.Density 4.710e-09  1.094e-09   4.305 5.94e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 0.4778059)
## 
##     Null deviance: 52.216  on 64  degrees of freedom
## Residual deviance: 40.530  on 63  degrees of freedom
## AIC: 1558.5
## 
## Number of Fisher Scoring iterations: 6

plot(Allzoop_model3)

Gamma looks to be the best - move forward using Gamma.

Simple model plus log link: gamma distribution

Allzoop_model4<-glm(ZoopDensity~Jelly.Density, data= AllZoop, family="Gamma"(link="log"))
summary(Allzoop_model4)

## 
## Call:
## glm(formula = ZoopDensity ~ Jelly.Density, family = Gamma(link = "log"), 
##     data = AllZoop)
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.135e+01  1.203e-01  94.384  < 2e-16 ***
## Jelly.Density -1.797e-04  4.787e-05  -3.755 0.000381 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 0.5076544)
## 
##     Null deviance: 52.216  on 64  degrees of freedom
## Residual deviance: 43.673  on 63  degrees of freedom
## AIC: 1563.9
## 
## Number of Fisher Scoring iterations: 10

plot(Allzoop_model4)

It looks better without log link, but I will go with log link for now because the model can’t find valid coefficients with the offset but without log link.

Simple model plus log link and offset: gamma distribution

Allzoop_model3<-glm(ZoopDensity~Jelly.Density+offset(log(Control)), data= AllZoop, family="Gamma"(link="log"))
summary(Allzoop_model3)

## 
## Call:
## glm(formula = ZoopDensity ~ Jelly.Density + offset(log(Control)), 
##     family = Gamma(link = "log"), data = AllZoop)
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -8.902e-02  5.203e-02  -1.711    0.092 .  
## Jelly.Density -2.111e-04  2.071e-05 -10.194 5.89e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 0.09502428)
## 
##     Null deviance: 16.3541  on 64  degrees of freedom
## Residual deviance:  6.6213  on 63  degrees of freedom
## AIC: 1435.2
## 
## Number of Fisher Scoring iterations: 6

plot(Allzoop_model3)

# Simple model plus log link, year and offset: gamma distribution

Allzoop_model5<-glm(ZoopDensity~Sample.Year+Jelly.Density+offset(log(Control)), data= AllZoop, family="Gamma"(link="log"))
summary(Allzoop_model5)

## 
## Call:
## glm(formula = ZoopDensity ~ Sample.Year + Jelly.Density + offset(log(Control)), 
##     family = Gamma(link = "log"), data = AllZoop)
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -0.0273616  0.0735930  -0.372    0.711    
## Sample.Year2020 -0.0949337  0.0794754  -1.195    0.237    
## Jelly.Density   -0.0002171  0.0000214 -10.144 8.58e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 0.09725202)
## 
##     Null deviance: 16.3541  on 64  degrees of freedom
## Residual deviance:  6.4853  on 62  degrees of freedom
## AIC: 1435.8
## 
## Number of Fisher Scoring iterations: 6

plot(Allzoop_model5)

Year doesn’t add much

All Zooplankton Final Model

ZoopMod<-glm(ZoopDensity~Jelly.Density+offset(log(Control)), data= AllZoop, family="Gamma"(link="log"))
summary(ZoopMod)

## 
## Call:
## glm(formula = ZoopDensity ~ Jelly.Density + offset(log(Control)), 
##     family = Gamma(link = "log"), data = AllZoop)
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -8.902e-02  5.203e-02  -1.711    0.092 .  
## Jelly.Density -2.111e-04  2.071e-05 -10.194 5.89e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 0.09502428)
## 
##     Null deviance: 16.3541  on 64  degrees of freedom
## Residual deviance:  6.6213  on 63  degrees of freedom
## AIC: 1435.2
## 
## Number of Fisher Scoring iterations: 6

plot(ZoopMod)

It looks like points 40, 41 and 60 are the outliers

Use predict function to predict model values

predictzoop<-ggpredict(
  ZoopMod,
  terms=c("Jelly.Density"),
  ci.lvl = 0.95,
  type = "fe",
  typical = "mean",
  condition = NULL,
  back.transform = TRUE,
  ppd = FALSE,
  vcov.fun = NULL,
  vcov.type = NULL,
  vcov.args = NULL,
  interval = "confidence")

Plot with original values

ZoopPlot<-plot(predictzoop,add.data = TRUE)+xlab("Jelly Density (g/m3)")+ylab("Copepod Density (#/m3)")+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black"))+theme_classic() +
  geom_line(size=1.5) +
  geom_ribbon( aes(ymin = conf.low, ymax = conf.high), alpha = .15) +
  xlab("Aurelia Biomass (g/m3)")+ylab("Total Zooplankton (#/m3)")+
  theme(axis.text=element_text(size=15,colour="black"),
        legend.position=c(0.87, 0.8),
        legend.text=element_text(size=15,colour="black"),
        legend.title=element_text(size=15,colour="black"),
        axis.title=element_text(size=20,colour="black"),
        axis.line = element_line(colour = "black"))+theme(plot.title = element_blank())
ZoopPlot

save plot

setwd("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/output")
ggsave(filename = "Exp_GLM_AllZoops.png", plot = ZoopPlot, width = 10, height = 10, device='png', dpi=700)

02_exp_GLM-AllZoop

2023-07-10