• This code makes and compares multiple generalized linear models of the experiment data with jellyfish bimass vs. larval crab density.
  • A best fit model is chosen, and the results are visualized.
  • Link to report here: https://rpubs.com/HailaSchultz/exp_GLM-crabs

load packages

library(MASS)
library(ggplot2)
library(ggeffects)
library(dplyr)
library(scales)

Set up data

Read database into R. If current database changes, just change the path

Database <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Final_Aurelia_Database_Jan11_2023.csv")

Add control tanks

First, read in the file that designates different samples to tanks with jellies other, tanks with zero jellies sampled at the beginning of the experiment remove, and tanks with zero jellies sampled at the end of the experiment zero

Control_tanks <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Control_Tanks.csv")

Add control tank data to the database file

Database$Control_info <- Control_tanks$c1[match(Database$Sample.Code, Control_tanks$Sample.Code)]

Subset data

Subset to trials with large jellies where the number of jellies in the tanks was manipulated

Exp_numb_trials<- subset(Database, Trial.Type=='Number')
Exp_numb_large<-subset(Exp_numb_trials,Jelly.Size=="Large")

remove aug 22 2019 - the jellies we used for this experiment were younger/smaller than the other experiments, so I decided to omit it from analysis

Exp_sub<-subset(Exp_numb_large,Sample.Date!="08/22/2019")

remove tanks sampled at the beginning of experiment - we decided not to use these tanks because they were different than the tanks with zero jellies sampled at the end of the experiment and didn’t add anything beneficial

Exp_sub<-subset(Exp_sub,Control_info!="remove")

Prepare data

#convert sample year to a factor
Exp_sub$Sample.Year<- as.factor(Exp_sub$Sample.Year)
#Rename Vars (get the dots out)
Exp_sub$Jelly.Mass<-Exp_sub$Jelly.Mass..g.
Exp_sub$Density<-Exp_sub$Density....m3.
#calculate jelly density
Exp_sub$Jelly.Density<-Exp_sub$Jelly.Mass/Exp_sub$Vol.Filtered..m3.
#add 1 to jelly  density for analyses that prohibit zeroes
Exp_sub$Jelly.Density <-Exp_sub$Jelly.Density
colnames(Exp_sub)
##  [1] "BugSampleID"          "Project"              "Sample.Code"         
##  [4] "Sampling.Group"       "Station"              "Site"                
##  [7] "Site.Name"            "Basin"                "Sub.Basin"           
## [10] "Latitude"             "Longitude"            "Sample.Date"         
## [13] "Sample.Year"          "Sample.Month"         "Sample.Time"         
## [16] "Tow.Type"             "Mesh.Size"            "Station.Depth..m."   
## [19] "Flow.meter..revs."    "Broad.Group"          "Mid.Level.Group"     
## [22] "X1st.Word.Taxa"       "Genus.species"        "Life.History.Stage"  
## [25] "Total.Ct"             "Density....m3."       "Vol.Filtered..m3."   
## [28] "Jelly.Mass..g."       "Number.of.Jellies"    "Trial.Time"          
## [31] "Trial.Type"           "Jelly.Size"           "Jelly.Density....m3."
## [34] "Jelly.Density..g.m3." "Location"             "Control_info"        
## [37] "Jelly.Mass"           "Density"              "Jelly.Density"

Subset data to crabs and add multiple entries per tank:The mean of jelly density is taken because this should be the same number for all taxa in each tank.

crabs<-subset(Exp_sub, Genus.species == "CRABS")
crabs <- crabs %>%
  group_by(Sample.Code, Station,Sample.Date,Sample.Year,Control_info) %>%
  summarise(
    CrabDensity = sum(Density),
    Jelly.Density = mean(Jelly.Density))

Control geometric means

Calculate the geometric mean of the zero-jelly tanks to use as a control.

crabs$Control_info_combined <- paste(crabs$Sample.Date, crabs$Control_info)
crabs2<- crabs %>%
  group_by(Control_info,Control_info_combined,Sample.Date) %>%
  summarise(
    ave = exp(mean(log(CrabDensity))))
crabs3<-subset(crabs2,Control_info=="zero")
# match the geometric mean of zero jelly tanks to each experiment
crabs$Control<- crabs3$ave[match(crabs$Sample.Date, crabs3$Sample.Date)]

Examine Data

Histograms of Jellyfish Density and Crabepod Density

hist(crabs$CrabDensity) 

hist(crabs$Jelly.Density) 

log distribution for both

See if years are different

boxplot(CrabDensity~Sample.Year, data=crabs)

Plot Jellyfish Density against crab density

ggplot(crabs, aes(x=Jelly.Density, y=CrabDensity)) + geom_point(aes(colour=Sample.Date))

Family Options

Negative Binomial Distribution

#round to integers
crabs$CrabDensity_round<-round(as.numeric(crabs$CrabDensity), 0)
crabs$Control_round<-round(as.numeric(crabs$Control), 0)
#run model
crabs_model1 <- glm.nb(CrabDensity_round~Jelly.Density, data = crabs)
summary(crabs_model1)
## 
## Call:
## glm.nb(formula = CrabDensity_round ~ Jelly.Density, data = crabs, 
##     init.theta = 1.312683768, link = log)
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    5.224e+00  1.630e-01  32.053   <2e-16 ***
## Jelly.Density -1.598e-04  6.712e-05  -2.381   0.0173 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(1.3127) family taken to be 1)
## 
##     Null deviance: 61.491  on 48  degrees of freedom
## Residual deviance: 54.779  on 47  degrees of freedom
## AIC: 589.89
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  1.313 
##           Std. Err.:  0.244 
## 
##  2 x log-likelihood:  -583.885
plot(crabs_model1)

Gaussian distribution

crabs_model2<-glm(CrabDensity~Jelly.Density, data= crabs, family="gaussian")
summary(crabs_model2)
## 
## Call:
## glm(formula = CrabDensity ~ Jelly.Density, family = "gaussian", 
##     data = crabs)
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   193.823279  24.008812   8.073 1.98e-10 ***
## Jelly.Density  -0.026484   0.009855  -2.687  0.00993 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 16669.99)
## 
##     Null deviance: 903874  on 48  degrees of freedom
## Residual deviance: 783489  on 47  degrees of freedom
## AIC: 619.36
## 
## Number of Fisher Scoring iterations: 2
plot(crabs_model2)

Gamma distribution

crabs_model3<-glm(CrabDensity~Jelly.Density, data= crabs, family="Gamma")
summary(crabs_model3)
## 
## Call:
## glm(formula = CrabDensity ~ Jelly.Density, family = "Gamma", 
##     data = crabs)
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.730e-03  8.161e-04   5.796 5.48e-07 ***
## Jelly.Density 1.920e-06  6.724e-07   2.856  0.00637 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 0.6614244)
## 
##     Null deviance: 47.944  on 48  degrees of freedom
## Residual deviance: 40.653  on 47  degrees of freedom
## AIC: 587.28
## 
## Number of Fisher Scoring iterations: 6
plot(crabs_model3)

Add other parameters

crabs Final Model

CrabMod<-glm(CrabDensity~Jelly.Density+offset(log(Control)), data= crabs, family="Gamma"(link="log"))
summary(CrabMod)
## 
## Call:
## glm(formula = CrabDensity ~ Jelly.Density + offset(log(Control)), 
##     family = Gamma(link = "log"), data = crabs)
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    7.494e-02  1.347e-01   0.556 0.580733    
## Jelly.Density -2.112e-04  5.531e-05  -3.817 0.000394 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 0.5250666)
## 
##     Null deviance: 34.764  on 48  degrees of freedom
## Residual deviance: 27.888  on 47  degrees of freedom
## AIC: 566.77
## 
## Number of Fisher Scoring iterations: 5
plot(CrabMod)

Use predict function to predict model values

predictcrabs<-ggpredict(
  CrabMod,
  terms=c("Jelly.Density"),
  ci.lvl = 0.95,
  type = "fe",
  typical = "mean",
  condition = NULL,
  back.transform = TRUE,
  ppd = FALSE,
  vcov.fun = NULL,
  vcov.type = NULL,
  vcov.args = NULL,
  interval = "confidence")

Plot with original values

CrabPlot<-plot(predictcrabs,add.data = TRUE,dot.size=1.5,dot.alpha=0.65)+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black"))+theme_classic() +
  scale_x_continuous(breaks=seq(0,7500,1500),expand = c(0, 20), limits = c(-10, 7500)) + 
  scale_y_continuous(breaks=seq(0,600,200),expand = c(0, 20), limits = c(-10, 600))+
  geom_line(size=1.5) +
  geom_ribbon( aes(ymin = conf.low, ymax = conf.high), alpha = .15) +
  xlab(bquote('Jellyfish Biomass ( g /' ~m^3~ ')'))+ylab(bquote('Crabepod Density ( individuals /'~m^3~')'))+
  theme(axis.text=element_text(size=15,colour="black"),
        legend.position=c(0.87, 0.8),
        legend.text=element_text(size=15,colour="black"),
        legend.title=element_text(size=15,colour="black"),
        axis.title=element_text(size=15,colour="black"),
        axis.line = element_line(colour = "black"))+theme(plot.title = element_blank())
CrabPlot

save plot

setwd("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/output")
ggsave(filename = "Exp_GLM_crabs.png", plot = CrabPlot, width = 6, height = 5, device='png', dpi=700)

Plot without original values

CrabPlot_nodata<-plot(predictcrabs)+xlab('Jellyfish Biomass ( g /' ~m^3~ ')')+
  ylab(bquote('Crabepod Density ( individuals /'~m^3~')'))+ 
  theme(axis.line = element_line(colour = "black"))+theme_classic() +
  geom_line(size=1) +
  geom_ribbon( aes(ymin = conf.low, ymax = conf.high, color = NULL),
               alpha = .15,show.legend=FALSE) +
  scale_x_continuous(breaks=seq(0,7500,1500),expand = c(0, 20), limits = c(-10, 7500)) + 
  scale_y_continuous(breaks=seq(0,350,70),expand = c(0, 20), limits = c(-10, 350))+
  theme(axis.text=element_text(size=8,colour="black"),
        axis.title=element_text(size=10),
        plot.title=element_blank())+
  theme(legend.position='none')+
  theme(plot.margin=margin(0.5, 0.5, 0.5, 0.5, unit = "cm"))
CrabPlot_nodata

save plot

setwd("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/output")
ggsave(filename = "Exp_GLM_crabs_nodata.png", plot = CrabPlot_nodata, width = 6, height = 5, device='png', dpi=700)