load packages
library(MASS)
library(ggplot2)
library(ggeffects)
library(dplyr)
Read database into R. If current database changes, just change the path
Database <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Final_Aurelia_Database_Jan11_2023.csv")
First, read in the file that designates different samples to tanks
with jellies other, tanks with zero jellies sampled at the
beginning of the experiment remove, and tanks with zero
jellies sampled at the end of the experiment zero
Control_tanks <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Control_Tanks.csv")
Add control tank data to the database file
Database$Control_info <- Control_tanks$c1[match(Database$Sample.Code, Control_tanks$Sample.Code)]
Subset to trials with large jellies where the number of jellies in the tanks was manipulated
Exp_numb_trials<- subset(Database, Trial.Type=='Number')
Exp_numb_large<-subset(Exp_numb_trials,Jelly.Size=="Large")
remove aug 22 2019 - the jellies we used for this experiment were younger/smaller than the other experiments, so I decided to omit it from analysis
Exp_sub<-subset(Exp_numb_large,Sample.Date!="08/22/2019")
remove tanks sampled at the beginning of experiment - we decided not to use these tanks because they were different than the tanks with zero jellies sampled at the end of the experiment and didn’t add anything beneficial
Exp_sub<-subset(Exp_sub,Control_info!="remove")
#convert sample year to a factor
Exp_sub$Sample.Year<- as.factor(Exp_sub$Sample.Year)
#Rename Vars (get the dots out)
Exp_sub$Jelly.Mass<-Exp_sub$Jelly.Mass..g.
Exp_sub$Density<-Exp_sub$Density....m3.
#calculate jelly density
Exp_sub$Jelly.Density<-Exp_sub$Jelly.Mass/Exp_sub$Vol.Filtered..m3.
#add 1 to jelly density for analyses that prohibit zeroes
Exp_sub$Jelly.Density <-Exp_sub$Jelly.Density
colnames(Exp_sub)
## [1] "BugSampleID" "Project" "Sample.Code"
## [4] "Sampling.Group" "Station" "Site"
## [7] "Site.Name" "Basin" "Sub.Basin"
## [10] "Latitude" "Longitude" "Sample.Date"
## [13] "Sample.Year" "Sample.Month" "Sample.Time"
## [16] "Tow.Type" "Mesh.Size" "Station.Depth..m."
## [19] "Flow.meter..revs." "Broad.Group" "Mid.Level.Group"
## [22] "X1st.Word.Taxa" "Genus.species" "Life.History.Stage"
## [25] "Total.Ct" "Density....m3." "Vol.Filtered..m3."
## [28] "Jelly.Mass..g." "Number.of.Jellies" "Trial.Time"
## [31] "Trial.Type" "Jelly.Size" "Jelly.Density....m3."
## [34] "Jelly.Density..g.m3." "Location" "Control_info"
## [37] "Jelly.Mass" "Density" "Jelly.Density"
This code summarizes all zooplankton taxa for a total zooplankton metric.
Remove Centric Diatoms
Exp_sub<-subset(Exp_sub,Genus.species!="Diatom-Centric")
Subset data:The mean of jelly density is taken because this should be the same number for all taxa in each tank.
AllZoop <- Exp_sub %>%
group_by(Sample.Code, Station,Sample.Date,Sample.Year,Control_info) %>%
summarise(
ZoopDensity = sum(Density),
Jelly.Density = mean(Jelly.Density))
Calculate the geometric mean of the zero-jelly tanks to use as a control.
AllZoop$Control_info_combined <- paste(AllZoop$Sample.Date, AllZoop$Control_info)
AllZoop2<- AllZoop %>%
group_by(Control_info,Control_info_combined,Sample.Date) %>%
summarise(
ave = exp(mean(log(ZoopDensity))))
AllZoop3<-subset(AllZoop2,Control_info=="zero")
# match the geometric mean of zero jelly tanks to each experiment
AllZoop$Control<- AllZoop3$ave[match(AllZoop$Sample.Date, AllZoop3$Sample.Date)]
Histograms of Jellyfish Density and Zooplankton Density
hist(AllZoop$ZoopDensity)
hist(AllZoop$Jelly.Density)
See if years are different
boxplot(ZoopDensity~Sample.Year, data=AllZoop)
Looks like 2019 and 2020 may be different.
Plot Jellyfish Density against zooplankton density
ggplot(AllZoop, aes(x=Jelly.Density, y=ZoopDensity)) + geom_point(aes(colour=Sample.Date))
At values of low Jellyfish density, zooplankton density has a wide
spread. However, at high jellyfish densities, values are consistently
low. The lowest valeus all belong to the same exeriment, likely
indicating that the overall zooplankton abundance was low that day. This
means that zooplankton stocking density must be incorporated into the
analysis. August 13, 2019 also had very high stocking densities. This
should be considered when interpereting analyses.
#round to integers
AllZoop$ZoopDensity_round<-round(as.numeric(AllZoop$ZoopDensity), 0)
AllZoop$Control_round<-round(as.numeric(AllZoop$Control), 0)
#run model
Allzoop_model1 <- glm.nb(ZoopDensity_round~Jelly.Density, data = AllZoop)
summary(Allzoop_model1)
##
## Call:
## glm.nb(formula = ZoopDensity_round ~ Jelly.Density, data = AllZoop,
## init.theta = 1.635194218, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.135e+01 1.320e-01 85.993 < 2e-16 ***
## Jelly.Density -1.797e-04 5.254e-05 -3.421 0.000624 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(1.6352) family taken to be 1)
##
## Null deviance: 85.379 on 64 degrees of freedom
## Residual deviance: 71.410 on 63 degrees of freedom
## AIC: 1563.5
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 1.635
## Std. Err.: 0.263
##
## 2 x log-likelihood: -1557.524
plot(Allzoop_model1)
Allzoop_model2<-glm(ZoopDensity~Jelly.Density, data= AllZoop, family="gaussian")
summary(Allzoop_model2)
##
## Call:
## glm(formula = ZoopDensity ~ Jelly.Density, family = "gaussian",
## data = AllZoop)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 89219.744 8317.827 10.73 7.56e-16 ***
## Jelly.Density -13.144 3.311 -3.97 0.000187 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 2428644725)
##
## Null deviance: 1.9128e+11 on 64 degrees of freedom
## Residual deviance: 1.5300e+11 on 63 degrees of freedom
## AIC: 1593.1
##
## Number of Fisher Scoring iterations: 2
plot(Allzoop_model2)
# Simple model, Gamma distribution
Allzoop_model3<-glm(ZoopDensity~Jelly.Density, data= AllZoop, family="Gamma")
summary(Allzoop_model3)
##
## Call:
## glm(formula = ZoopDensity ~ Jelly.Density, family = "Gamma",
## data = AllZoop)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.010e-05 1.361e-06 7.416 3.75e-10 ***
## Jelly.Density 4.710e-09 1.094e-09 4.305 5.94e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.4778059)
##
## Null deviance: 52.216 on 64 degrees of freedom
## Residual deviance: 40.530 on 63 degrees of freedom
## AIC: 1558.5
##
## Number of Fisher Scoring iterations: 6
plot(Allzoop_model3)
Gamma looks to be the best - move forward using Gamma.
Allzoop_model4<-glm(ZoopDensity~Jelly.Density, data= AllZoop, family="Gamma"(link="log"))
summary(Allzoop_model4)
##
## Call:
## glm(formula = ZoopDensity ~ Jelly.Density, family = Gamma(link = "log"),
## data = AllZoop)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.135e+01 1.203e-01 94.384 < 2e-16 ***
## Jelly.Density -1.797e-04 4.787e-05 -3.755 0.000381 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.5076544)
##
## Null deviance: 52.216 on 64 degrees of freedom
## Residual deviance: 43.673 on 63 degrees of freedom
## AIC: 1563.9
##
## Number of Fisher Scoring iterations: 10
plot(Allzoop_model4)
It looks better without log link, but I will go with log link for now
because the model can’t find valid coefficients with the offset but
without log link.
Allzoop_model3<-glm(ZoopDensity~Jelly.Density+offset(log(Control)), data= AllZoop, family="Gamma"(link="log"))
summary(Allzoop_model3)
##
## Call:
## glm(formula = ZoopDensity ~ Jelly.Density + offset(log(Control)),
## family = Gamma(link = "log"), data = AllZoop)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.902e-02 5.203e-02 -1.711 0.092 .
## Jelly.Density -2.111e-04 2.071e-05 -10.194 5.89e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.09502428)
##
## Null deviance: 16.3541 on 64 degrees of freedom
## Residual deviance: 6.6213 on 63 degrees of freedom
## AIC: 1435.2
##
## Number of Fisher Scoring iterations: 6
plot(Allzoop_model3)
# Simple model plus log link, year and offset: gamma distribution
Allzoop_model5<-glm(ZoopDensity~Sample.Year+Jelly.Density+offset(log(Control)), data= AllZoop, family="Gamma"(link="log"))
summary(Allzoop_model5)
##
## Call:
## glm(formula = ZoopDensity ~ Sample.Year + Jelly.Density + offset(log(Control)),
## family = Gamma(link = "log"), data = AllZoop)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0273616 0.0735930 -0.372 0.711
## Sample.Year2020 -0.0949337 0.0794754 -1.195 0.237
## Jelly.Density -0.0002171 0.0000214 -10.144 8.58e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.09725202)
##
## Null deviance: 16.3541 on 64 degrees of freedom
## Residual deviance: 6.4853 on 62 degrees of freedom
## AIC: 1435.8
##
## Number of Fisher Scoring iterations: 6
plot(Allzoop_model5)
Year doesn’t add much
ZoopMod<-glm(ZoopDensity~Jelly.Density+offset(log(Control)), data= AllZoop, family="Gamma"(link="log"))
summary(ZoopMod)
##
## Call:
## glm(formula = ZoopDensity ~ Jelly.Density + offset(log(Control)),
## family = Gamma(link = "log"), data = AllZoop)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.902e-02 5.203e-02 -1.711 0.092 .
## Jelly.Density -2.111e-04 2.071e-05 -10.194 5.89e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.09502428)
##
## Null deviance: 16.3541 on 64 degrees of freedom
## Residual deviance: 6.6213 on 63 degrees of freedom
## AIC: 1435.2
##
## Number of Fisher Scoring iterations: 6
plot(ZoopMod)
It looks like points 40, 41 and 60 are the outliers
Use predict function to predict model values
predictzoop<-ggpredict(
ZoopMod,
terms=c("Jelly.Density"),
ci.lvl = 0.95,
type = "fe",
typical = "mean",
condition = NULL,
back.transform = TRUE,
ppd = FALSE,
vcov.fun = NULL,
vcov.type = NULL,
vcov.args = NULL,
interval = "confidence")
Plot with original values
ZoopPlot<-plot(predictzoop,add.data = TRUE)+xlab("Jelly Density (g/m3)")+ylab("Copepod Density (#/m3)")+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black"))+theme_classic() +
geom_line(size=1.5) +
geom_ribbon( aes(ymin = conf.low, ymax = conf.high), alpha = .15) +
xlab("Aurelia Biomass (g/m3)")+ylab("Total Zooplankton (#/m3)")+
theme(axis.text=element_text(size=15,colour="black"),
legend.position=c(0.87, 0.8),
legend.text=element_text(size=15,colour="black"),
legend.title=element_text(size=15,colour="black"),
axis.title=element_text(size=20,colour="black"),
axis.line = element_line(colour = "black"))+theme(plot.title = element_blank())
ZoopPlot
save plot
setwd("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/output")
ggsave(filename = "Exp_GLM_AllZoops.png", plot = ZoopPlot, width = 10, height = 10, device='png', dpi=700)