load packages
library(MASS)
library(ggplot2)
library(ggeffects)
library(dplyr)
library(scales)
Read database into R. If current database changes, just change the path
Database <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Final_Aurelia_Database_Jan11_2023.csv")
First, read in the file that designates different samples to tanks
with jellies other, tanks with zero jellies sampled at the
beginning of the experiment remove, and tanks with zero
jellies sampled at the end of the experiment zero
Control_tanks <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Control_Tanks.csv")
Add control tank data to the database file
Database$Control_info <- Control_tanks$c1[match(Database$Sample.Code, Control_tanks$Sample.Code)]
Subset to trials with large jellies where the number of jellies in the tanks was manipulated
Exp_numb_trials<- subset(Database, Trial.Type=='Number')
Exp_numb_large<-subset(Exp_numb_trials,Jelly.Size=="Large")
remove aug 22 2019 - the jellies we used for this experiment were younger/smaller than the other experiments, so I decided to omit it from analysis
Exp_sub<-subset(Exp_numb_large,Sample.Date!="08/22/2019")
remove tanks sampled at the beginning of experiment - we decided not to use these tanks because they were different than the tanks with zero jellies sampled at the end of the experiment and didn’t add anything beneficial
Exp_sub<-subset(Exp_sub,Control_info!="remove")
#convert sample year to a factor
Exp_sub$Sample.Year<- as.factor(Exp_sub$Sample.Year)
#Rename Vars (get the dots out)
Exp_sub$Jelly.Mass<-Exp_sub$Jelly.Mass..g.
Exp_sub$Density<-Exp_sub$Density....m3.
#calculate jelly density
Exp_sub$Jelly.Density<-Exp_sub$Jelly.Mass/Exp_sub$Vol.Filtered..m3.
#add 1 to jelly density for analyses that prohibit zeroes
Exp_sub$Jelly.Density <-Exp_sub$Jelly.Density
colnames(Exp_sub)
## [1] "BugSampleID" "Project" "Sample.Code"
## [4] "Sampling.Group" "Station" "Site"
## [7] "Site.Name" "Basin" "Sub.Basin"
## [10] "Latitude" "Longitude" "Sample.Date"
## [13] "Sample.Year" "Sample.Month" "Sample.Time"
## [16] "Tow.Type" "Mesh.Size" "Station.Depth..m."
## [19] "Flow.meter..revs." "Broad.Group" "Mid.Level.Group"
## [22] "X1st.Word.Taxa" "Genus.species" "Life.History.Stage"
## [25] "Total.Ct" "Density....m3." "Vol.Filtered..m3."
## [28] "Jelly.Mass..g." "Number.of.Jellies" "Trial.Time"
## [31] "Trial.Type" "Jelly.Size" "Jelly.Density....m3."
## [34] "Jelly.Density..g.m3." "Location" "Control_info"
## [37] "Jelly.Mass" "Density" "Jelly.Density"
Subset data:The mean of jelly density is taken because this should be the same number for all taxa in each tank.
AllCop<-subset(Exp_sub, Broad.Group == "Copepod")
AllCop <- AllCop %>%
group_by(Sample.Code, Station,Sample.Date,Sample.Year,Control_info) %>%
summarise(
CopDensity = sum(Density),
Jelly.Density = mean(Jelly.Density))
Calculate the geometric mean of the zero-jelly tanks to use as a control.
AllCop$Control_info_combined <- paste(AllCop$Sample.Date, AllCop$Control_info)
AllCop2<- AllCop %>%
group_by(Control_info,Control_info_combined,Sample.Date) %>%
summarise(
ave = exp(mean(log(CopDensity))))
AllCop3<-subset(AllCop2,Control_info=="zero")
# match the geometric mean of zero jelly tanks to each experiment
AllCop$Control<- AllCop3$ave[match(AllCop$Sample.Date, AllCop3$Sample.Date)]
Histograms of Jellyfish Density and Copepod Density
hist(AllCop$CopDensity)
hist(AllCop$Jelly.Density)
See if years are different
boxplot(CopDensity~Sample.Year, data=AllCop)
The years do look different - we may want to include in the model.
Stocking densities in 2019 were higher than they were in 2020.
Plot Jellyfish Density against zooplankton density
ggplot(AllCop, aes(x=Jelly.Density, y=CopDensity)) + geom_point(aes(colour=Sample.Date))
Stocking density varies a lot from experiment to experiment, but there
are no high copepod density values at the high jelly density tanks.
Experiment days with higher stocking densities still had quite a few
copepods left over 50% at the end of the experiments in the high jelly
tanks. This could indicate that they ate to fullness. # Family Options
## Negative Binomial Distribution
#round to integers
AllCop$CopDensity_round<-round(as.numeric(AllCop$CopDensity), 0)
AllCop$Control_round<-round(as.numeric(AllCop$Control), 0)
#run model
AllCop_model1 <- glm.nb(CopDensity_round~Jelly.Density, data = AllCop)
summary(AllCop_model1)
##
## Call:
## glm.nb(formula = CopDensity_round ~ Jelly.Density, data = AllCop,
## init.theta = 1.318787584, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 11.2594948 0.1469756 76.608 < 2e-16 ***
## Jelly.Density -0.0001852 0.0000585 -3.166 0.00155 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(1.3188) family taken to be 1)
##
## Null deviance: 84.884 on 64 degrees of freedom
## Residual deviance: 72.829 on 63 degrees of freedom
## AIC: 1555.9
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 1.319
## Std. Err.: 0.209
##
## 2 x log-likelihood: -1549.868
plot(AllCop_model1)
AllCop_model2<-glm(CopDensity~Jelly.Density, data= AllCop, family="gaussian")
summary(AllCop_model2)
##
## Call:
## glm(formula = CopDensity ~ Jelly.Density, family = "gaussian",
## data = AllCop)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 81796.863 8122.809 10.070 9.53e-15 ***
## Jelly.Density -12.362 3.233 -3.824 0.000304 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 2316097082)
##
## Null deviance: 1.7978e+11 on 64 degrees of freedom
## Residual deviance: 1.4591e+11 on 63 degrees of freedom
## AIC: 1590
##
## Number of Fisher Scoring iterations: 2
plot(AllCop_model2)
AllCop_model3<-glm(CopDensity~Jelly.Density, data= AllCop, family="Gamma")
summary(AllCop_model3)
##
## Call:
## glm(formula = CopDensity ~ Jelly.Density, family = "Gamma", data = AllCop)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.094e-05 1.592e-06 6.873 3.33e-09 ***
## Jelly.Density 5.468e-09 1.320e-09 4.144 0.000104 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.5524674)
##
## Null deviance: 64.369 on 64 degrees of freedom
## Residual deviance: 51.698 on 63 degrees of freedom
## AIC: 1551.5
##
## Number of Fisher Scoring iterations: 6
plot(AllCop_model3)
Like Gamma has best AIC. QQplot looks worse than gaussian, but I think
that will be accounted for with control variable.
AllCop_model4<-glm(CopDensity~Jelly.Density, data= AllCop, family="Gamma"(link="log"))
summary(AllCop_model4)
##
## Call:
## glm(formula = CopDensity ~ Jelly.Density, family = Gamma(link = "log"),
## data = AllCop)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.126e+01 1.287e-01 87.465 < 2e-16 ***
## Jelly.Density -1.852e-04 5.124e-05 -3.614 0.000599 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.581719)
##
## Null deviance: 64.369 on 64 degrees of freedom
## Residual deviance: 55.227 on 63 degrees of freedom
## AIC: 1556.4
##
## Number of Fisher Scoring iterations: 10
plot(AllCop_model4)
AllCop_model3<-glm(CopDensity~Jelly.Density+offset(log(Control)), data= AllCop, family="Gamma"(link="log"))
summary(AllCop_model3)
##
## Call:
## glm(formula = CopDensity ~ Jelly.Density + offset(log(Control)),
## family = Gamma(link = "log"), data = AllCop)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.697e-02 5.470e-02 -1.773 0.0811 .
## Jelly.Density -2.210e-04 2.177e-05 -10.150 6.99e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.1050273)
##
## Null deviance: 18.2931 on 64 degrees of freedom
## Residual deviance: 7.5334 on 63 degrees of freedom
## AIC: 1419.1
##
## Number of Fisher Scoring iterations: 7
plot(AllCop_model3)
looks really good with control!
AllCop_model5<-glm(CopDensity~Sample.Year+Jelly.Density+offset(log(Control)), data= AllCop, family="Gamma"(link="log"))
summary(AllCop_model5)
##
## Call:
## glm(formula = CopDensity ~ Sample.Year + Jelly.Density + offset(log(Control)),
## family = Gamma(link = "log"), data = AllCop)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.648e-03 7.709e-02 -0.034 0.9727
## Sample.Year2020 -1.454e-01 8.326e-02 -1.746 0.0858 .
## Jelly.Density -2.305e-04 2.242e-05 -10.284 5e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.1067262)
##
## Null deviance: 18.2931 on 64 degrees of freedom
## Residual deviance: 7.2167 on 62 degrees of freedom
## AIC: 1418.3
##
## Number of Fisher Scoring iterations: 6
plot(AllCop_model5)
Year doesn’t improve the model much, so I’ll move on without it
AllCopMod<-glm(CopDensity~Jelly.Density+offset(log(Control)), data= AllCop, family="Gamma"(link="log"))
summary(AllCopMod)
##
## Call:
## glm(formula = CopDensity ~ Jelly.Density + offset(log(Control)),
## family = Gamma(link = "log"), data = AllCop)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.697e-02 5.470e-02 -1.773 0.0811 .
## Jelly.Density -2.210e-04 2.177e-05 -10.150 6.99e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.1050273)
##
## Null deviance: 18.2931 on 64 degrees of freedom
## Residual deviance: 7.5334 on 63 degrees of freedom
## AIC: 1419.1
##
## Number of Fisher Scoring iterations: 7
plot(AllCopMod)
Use predict function to predict model values
predictcop<-ggpredict(
AllCopMod,
terms=c("Jelly.Density"),
ci.lvl = 0.95,
type = "fe",
typical = "mean",
condition = NULL,
back.transform = TRUE,
ppd = FALSE,
vcov.fun = NULL,
vcov.type = NULL,
vcov.args = NULL,
interval = "confidence")
Plot with original values
AllCopPlot<-plot(predictcop,add.data = TRUE)+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black"))+theme_classic() +
geom_line(size=1.5) +
geom_ribbon( aes(ymin = conf.low, ymax = conf.high), alpha = .15) +
xlab(bquote('Jellyfish Biomass ( g /' ~m^3~ ')'))+ylab(bquote('Copepod Density ( individuals /'~m^3~')'))+
theme(axis.text=element_text(size=15,colour="black"),
legend.position=c(0.87, 0.8),
legend.text=element_text(size=15,colour="black"),
legend.title=element_text(size=15,colour="black"),
axis.title=element_text(size=15,colour="black"),
axis.line = element_line(colour = "black"))+theme(plot.title = element_blank())
AllCopPlot
save plot
setwd("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/output")
ggsave(filename = "Exp_GLM_AllCop.png", plot = AllCopPlot, width = 6, height = 5, device='png', dpi=700)
Plot without original values
AllCopPlot_nodata<-plot(predictcop)+xlab('Jellyfish Biomass ( g /' ~m^3~ ')')+
ylab(bquote('Copepod Density ( individuals /'~m^3~')'))+
theme(axis.line = element_line(colour = "black"))+theme_classic() +
geom_line(size=1) +
geom_ribbon( aes(ymin = conf.low, ymax = conf.high, color = NULL),
alpha = .15,show.legend=FALSE) +
scale_x_continuous(breaks=seq(0,7500,1500),expand = c(0, 20), limits = c(-10, 7500),labels=label_comma()) +
scale_y_continuous(breaks=seq(0,100000,20000),expand = c(0, 20), limits = c(-10, 100000),labels=label_comma())+
theme(axis.text=element_text(size=8,colour="black"),
axis.title=element_text(size=10),
plot.title=element_blank())+
theme(legend.position='none')+
theme(plot.margin=margin(0.5, 0.5, 0.5, 0.5, unit = "cm"))
AllCopPlot_nodata
save plot
setwd("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/output")
ggsave(filename = "Exp_GLM_AllCop_nodata.png", plot = AllCopPlot_nodata, width = 6, height = 5, device='png', dpi=700)