load packages
library(MASS)
library(ggplot2)
library(ggeffects)
library(dplyr)
library(scales)
library(forcats)
Read database into R. If current database changes, just change the path
Database <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Final_Aurelia_Database_Jan11_2023.csv")
First, read in the file that designates different samples to tanks
with jellies other, tanks with zero jellies sampled at the
beginning of the experiment remove, and tanks with zero
jellies sampled at the end of the experiment zero
Control_tanks <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Control_Tanks.csv")
Add control tank data to the database file
Database$Control_info <- Control_tanks$c1[match(Database$Sample.Code, Control_tanks$Sample.Code)]
Subset to trials with large jellies where the number of jellies in the tanks was manipulated
Exp_numb_trials<- subset(Database, Trial.Type=='Number')
Exp_numb_large<-subset(Exp_numb_trials,Jelly.Size=="Large")
remove aug 22 2019 - the jellies we used for this experiment were younger/smaller than the other experiments, so I decided to omit it from analysis
Exp_sub<-subset(Exp_numb_large,Sample.Date!="08/22/2019")
remove tanks sampled at the beginning of experiment - we decided not to use these tanks because they were different than the tanks with zero jellies sampled at the end of the experiment and didn’t add anything beneficial
Exp_sub<-subset(Exp_sub,Control_info!="remove")
#convert sample year to a factor
Exp_sub$Sample.Year<- as.factor(Exp_sub$Sample.Year)
#Rename Vars (get the dots out)
Exp_sub$Jelly.Mass<-Exp_sub$Jelly.Mass..g.
Exp_sub$Density<-Exp_sub$Density....m3.
#calculate jelly density
Exp_sub$Jelly.Density<-Exp_sub$Jelly.Mass/Exp_sub$Vol.Filtered..m3.
#add 1 to jelly density for analyses that prohibit zeroes
Exp_sub$Jelly.Density <-Exp_sub$Jelly.Density
colnames(Exp_sub)
## [1] "BugSampleID" "Project" "Sample.Code"
## [4] "Sampling.Group" "Station" "Site"
## [7] "Site.Name" "Basin" "Sub.Basin"
## [10] "Latitude" "Longitude" "Sample.Date"
## [13] "Sample.Year" "Sample.Month" "Sample.Time"
## [16] "Tow.Type" "Mesh.Size" "Station.Depth..m."
## [19] "Flow.meter..revs." "Broad.Group" "Mid.Level.Group"
## [22] "X1st.Word.Taxa" "Genus.species" "Life.History.Stage"
## [25] "Total.Ct" "Density....m3." "Vol.Filtered..m3."
## [28] "Jelly.Mass..g." "Number.of.Jellies" "Trial.Time"
## [31] "Trial.Type" "Jelly.Size" "Jelly.Density....m3."
## [34] "Jelly.Density..g.m3." "Location" "Control_info"
## [37] "Jelly.Mass" "Density" "Jelly.Density"
Combine species and life history stage
Exp_sub$Species_lifestage <- paste(Exp_sub$Genus.species, Exp_sub$Life.History.Stage, sep="_")
Recode Acartia as medium calanoids
Exp_sub <- Exp_sub %>%
mutate(Species_lifestage_combined = fct_recode(Species_lifestage,
"CALANOIDA_Medium" = "ACARTIA_Copepodite",
"CALANOIDA_Medium" = "ACARTIA_Female, Adult"))
Subset data to medium calanoids and add multiple entries per tank:The mean of jelly density is taken because this should be the same number for all taxa in each tank.
MedCal<-subset(Exp_sub, Species_lifestage_combined == "CALANOIDA_Medium")
MedCal <- MedCal %>%
group_by(Sample.Code, Station,Sample.Date,Sample.Year,Control_info) %>%
summarise(
CopDensity = sum(Density),
Jelly.Density = mean(Jelly.Density))
Calculate the geometric mean of the zero-jelly tanks to use as a control.
MedCal$Control_info_combined <- paste(MedCal$Sample.Date, MedCal$Control_info)
MedCal2<- MedCal %>%
group_by(Control_info,Control_info_combined,Sample.Date) %>%
summarise(
ave = exp(mean(log(CopDensity))))
MedCal3<-subset(MedCal2,Control_info=="zero")
# match the geometric mean of zero jelly tanks to each experiment
MedCal$Control<- MedCal3$ave[match(MedCal$Sample.Date, MedCal3$Sample.Date)]
Histograms of Jellyfish Density and Copepod Density
hist(MedCal$CopDensity)
hist(MedCal$Jelly.Density)
Both appear to have a right-skewed a log distribution
See if years are different
boxplot(CopDensity~Sample.Year, data=MedCal)
The two years do look different - may need to include this in the model
later on. There are some outliers from one experiment
Plot Jellyfish Density against zooplankton density
ggplot(MedCal, aes(x=Jelly.Density, y=CopDensity)) + geom_point(aes(colour=Sample.Date))
#round to integers
MedCal$CopDensity_round<-round(as.numeric(MedCal$CopDensity), 0)
MedCal$Control_round<-round(as.numeric(MedCal$Control), 0)
#run model
MedCal_model1 <- glm.nb(CopDensity_round~Jelly.Density, data = MedCal)
summary(MedCal_model1)
##
## Call:
## glm.nb(formula = CopDensity_round ~ Jelly.Density, data = MedCal,
## init.theta = 0.7801044884, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 9.846e+00 1.911e-01 51.52 <2e-16 ***
## Jelly.Density -1.521e-04 7.607e-05 -2.00 0.0455 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.7801) family taken to be 1)
##
## Null deviance: 82.016 on 64 degrees of freedom
## Residual deviance: 77.405 on 63 degrees of freedom
## AIC: 1379.4
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.780
## Std. Err.: 0.118
##
## 2 x log-likelihood: -1373.439
plot(MedCal_model1)
## Gaussian distribution
MedCal_model2<-glm(CopDensity~Jelly.Density, data= MedCal, family="gaussian")
summary(MedCal_model2)
##
## Call:
## glm(formula = CopDensity ~ Jelly.Density, family = "gaussian",
## data = MedCal)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19497.088 3688.811 5.285 1.67e-06 ***
## Jelly.Density -2.490 1.468 -1.696 0.0949 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 477658034)
##
## Null deviance: 3.1466e+10 on 64 degrees of freedom
## Residual deviance: 3.0092e+10 on 63 degrees of freedom
## AIC: 1487.4
##
## Number of Fisher Scoring iterations: 2
plot(MedCal_model2)
## Gamma distribution
MedCal_model3<-glm(CopDensity~Jelly.Density, data= MedCal, family="Gamma")
summary(MedCal_model3)
##
## Call:
## glm(formula = CopDensity ~ Jelly.Density, family = "Gamma", data = MedCal)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.759e-05 1.251e-05 3.804 0.000324 ***
## Jelly.Density 1.566e-08 8.720e-09 1.795 0.077396 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 1.885926)
##
## Null deviance: 105.149 on 64 degrees of freedom
## Residual deviance: 97.551 on 63 degrees of freedom
## AIC: 1379.4
##
## Number of Fisher Scoring iterations: 7
plot(MedCal_model3)
looks about the same as NB, proceed with Gamma
MedCal_model4<-glm(CopDensity~Jelly.Density, data= MedCal, family="Gamma"(link="log"))
summary(MedCal_model4)
##
## Call:
## glm(formula = CopDensity ~ Jelly.Density, family = Gamma(link = "log"),
## data = MedCal)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.846e+00 2.304e-01 42.731 <2e-16 ***
## Jelly.Density -1.521e-04 9.172e-05 -1.659 0.102
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 1.863884)
##
## Null deviance: 105.15 on 64 degrees of freedom
## Residual deviance: 99.24 on 63 degrees of freedom
## AIC: 1380.7
##
## Number of Fisher Scoring iterations: 6
plot(MedCal_model4)
about the same as the identity link. However, I have not been able to
add a control factor with the identity link, so I will proceed with log
link. ## log link and offset
MedCal_model3<-glm(CopDensity~Jelly.Density+offset(log(Control)), data= MedCal, family="Gamma"(link="log"))
summary(MedCal_model3)
##
## Call:
## glm(formula = CopDensity ~ Jelly.Density + offset(log(Control)),
## family = Gamma(link = "log"), data = MedCal)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.973e-01 6.903e-02 -2.859 0.00576 **
## Jelly.Density -1.404e-04 2.748e-05 -5.111 3.22e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.1672518)
##
## Null deviance: 16.898 on 64 degrees of freedom
## Residual deviance: 11.999 on 63 degrees of freedom
## AIC: 1229.7
##
## Number of Fisher Scoring iterations: 8
plot(MedCal_model3)
Adding control improves the AIC and distribution a lot!!
MedCal_model5<-glm(CopDensity~Sample.Year+Jelly.Density+offset(log(Control)), data= MedCal, family="Gamma"(link="log"))
summary(MedCal_model5)
##
## Call:
## glm(formula = CopDensity ~ Sample.Year + Jelly.Density + offset(log(Control)),
## family = Gamma(link = "log"), data = MedCal)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.657e-02 9.712e-02 -0.479 0.633
## Sample.Year2020 -2.287e-01 1.049e-01 -2.181 0.033 *
## Jelly.Density -1.580e-04 2.824e-05 -5.595 5.33e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.1693569)
##
## Null deviance: 16.898 on 64 degrees of freedom
## Residual deviance: 11.241 on 62 degrees of freedom
## AIC: 1227.4
##
## Number of Fisher Scoring iterations: 7
plot(MedCal_model5)
I had included year in the past, but it actually doesn’t look like it
adds much, so I will not include it moving forward. Although the years
were significantly different, I realized that adding it is a little
arbitrary, since the control parameter already accounts for starting
density. Therefore, years with lower overall copepod abundance are
already accounted for.
MedCalMod<-glm(CopDensity~Jelly.Density+offset(log(Control)), data= MedCal, family="Gamma"(link="log"))
summary(MedCalMod)
##
## Call:
## glm(formula = CopDensity ~ Jelly.Density + offset(log(Control)),
## family = Gamma(link = "log"), data = MedCal)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.973e-01 6.903e-02 -2.859 0.00576 **
## Jelly.Density -1.404e-04 2.748e-05 -5.111 3.22e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.1672518)
##
## Null deviance: 16.898 on 64 degrees of freedom
## Residual deviance: 11.999 on 63 degrees of freedom
## AIC: 1229.7
##
## Number of Fisher Scoring iterations: 8
plot(MedCalMod)
extract rate of change estimates with confidence intervals
exp(coef(MedCalMod))
## (Intercept) Jelly.Density
## 0.8209125 0.9998596
exp(confint(MedCalMod, level=.95))
## 2.5 % 97.5 %
## (Intercept) 0.7234391 0.9358889
## Jelly.Density 0.9998125 0.9999085
#get rate of change (1-exp(coefficient))
1-0.9998596
## [1] 0.0001404
Calculate rate of change
a<-exp(-1.404e-04)
a
## [1] 0.9998596
b<-a-1
c<-b*100
c
## [1] -0.01403901
Use predict function to predict model values
predictmedcal<-ggpredict(
MedCalMod,
terms=c("Jelly.Density"),
ci.lvl = 0.95,
type = "fe",
typical = "mean",
condition = NULL,
back.transform = TRUE,
ppd = FALSE,
vcov.fun = NULL,
vcov.type = NULL,
vcov.args = NULL,
interval = "confidence")
Plot with original values
MedCalPlot<-plot(predictmedcal,add.data = TRUE,dot.size=1.5,dot.alpha=0.65)+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black"))+theme_classic() +
geom_line(size=1.5) +
geom_ribbon( aes(ymin = conf.low, ymax = conf.high), alpha = .15) +
scale_x_continuous(breaks=seq(0,7500,1500),expand = c(0, 20), limits = c(-10, 7500)) +
scale_y_continuous(breaks=seq(0,125000,25000),expand = c(0, 20), limits = c(-10, 125000))+
xlab(bquote('Jellyfish Biomass ( g /' ~m^3~ ')'))+ylab(bquote('Med. Calanoid Density ( # /'~m^3~')'))+
theme(axis.text=element_text(size=15,colour="black"),
legend.position=c(0.87, 0.8),
legend.text=element_text(size=15,colour="black"),
legend.title=element_text(size=15,colour="black"),
axis.title=element_text(size=15,colour="black"),
axis.line = element_line(colour = "black"))+theme(plot.title = element_blank())+
theme(plot.margin=margin(0.7, 0.7, 0.7, 0.7, unit = "cm"))
MedCalPlot
save plot
setwd("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/output")
ggsave(filename = "Exp_GLM_MedCal.png", plot = MedCalPlot, width = 6, height = 5, device='png', dpi=700)
Plot without original values
MedCalPlot_nodata<-plot(predictmedcal)+xlab('Jellyfish Biomass ( g /' ~m^3~ ')')+
ylab(bquote('Copepod Density ( individuals /'~m^3~')'))+
theme(axis.line = element_line(colour = "black"))+theme_classic() +
geom_line(size=1) +
geom_ribbon( aes(ymin = conf.low, ymax = conf.high, color = NULL),
alpha = .15,show.legend=FALSE) +
scale_x_continuous(breaks=seq(0,7500,1500),expand = c(0, 20), limits = c(-10, 7500),labels=label_comma()) +
scale_y_continuous(breaks=seq(0,25000,5000),expand = c(0, 20), limits = c(-10, 25000),labels=label_comma())+
theme(axis.text=element_text(size=8,colour="black"),
axis.title=element_text(size=10),
plot.title=element_blank())+
theme(legend.position='none')+
theme(plot.margin=margin(0.5, 0.5, 0.5, 0.5, unit = "cm"))
MedCalPlot_nodata
save plot
setwd("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/output")
ggsave(filename = "Exp_GLM_MedCal_nodata.png", plot = MedCalPlot_nodata, width = 6, height = 5, device='png', dpi=700)