load packages
library(MASS)
library(ggplot2)
library(ggeffects)
library(dplyr)
library(scales)
Read database into R. If current database changes, just change the path
Database <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Final_Aurelia_Database_Jan11_2023.csv")
First, read in the file that designates different samples to tanks
with jellies other, tanks with zero jellies sampled at the
beginning of the experiment remove, and tanks with zero
jellies sampled at the end of the experiment zero
Control_tanks <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Control_Tanks.csv")
Add control tank data to the database file
Database$Control_info <- Control_tanks$c1[match(Database$Sample.Code, Control_tanks$Sample.Code)]
Subset to trials with large jellies where the number of jellies in the tanks was manipulated
Exp_numb_trials<- subset(Database, Trial.Type=='Number')
Exp_numb_large<-subset(Exp_numb_trials,Jelly.Size=="Large")
remove aug 22 2019 - the jellies we used for this experiment were younger/smaller than the other experiments, so I decided to omit it from analysis
Exp_sub<-subset(Exp_numb_large,Sample.Date!="08/22/2019")
remove tanks sampled at the beginning of experiment - we decided not to use these tanks because they were different than the tanks with zero jellies sampled at the end of the experiment and didn’t add anything beneficial
Exp_sub<-subset(Exp_sub,Control_info!="remove")
#convert sample year to a factor
Exp_sub$Sample.Year<- as.factor(Exp_sub$Sample.Year)
#Rename Vars (get the dots out)
Exp_sub$Jelly.Mass<-Exp_sub$Jelly.Mass..g.
Exp_sub$Density<-Exp_sub$Density....m3.
#calculate jelly density
Exp_sub$Jelly.Density<-Exp_sub$Jelly.Mass/Exp_sub$Vol.Filtered..m3.
#add 1 to jelly density for analyses that prohibit zeroes
Exp_sub$Jelly.Density <-Exp_sub$Jelly.Density
colnames(Exp_sub)
## [1] "BugSampleID" "Project" "Sample.Code"
## [4] "Sampling.Group" "Station" "Site"
## [7] "Site.Name" "Basin" "Sub.Basin"
## [10] "Latitude" "Longitude" "Sample.Date"
## [13] "Sample.Year" "Sample.Month" "Sample.Time"
## [16] "Tow.Type" "Mesh.Size" "Station.Depth..m."
## [19] "Flow.meter..revs." "Broad.Group" "Mid.Level.Group"
## [22] "X1st.Word.Taxa" "Genus.species" "Life.History.Stage"
## [25] "Total.Ct" "Density....m3." "Vol.Filtered..m3."
## [28] "Jelly.Mass..g." "Number.of.Jellies" "Trial.Time"
## [31] "Trial.Type" "Jelly.Size" "Jelly.Density....m3."
## [34] "Jelly.Density..g.m3." "Location" "Control_info"
## [37] "Jelly.Mass" "Density" "Jelly.Density"
Subset data to crabs and add multiple entries per tank:The mean of jelly density is taken because this should be the same number for all taxa in each tank.
crabs<-subset(Exp_sub, Genus.species == "CRABS")
crabs <- crabs %>%
group_by(Sample.Code, Station,Sample.Date,Sample.Year,Control_info) %>%
summarise(
CrabDensity = sum(Density),
Jelly.Density = mean(Jelly.Density))
Calculate the geometric mean of the zero-jelly tanks to use as a control.
crabs$Control_info_combined <- paste(crabs$Sample.Date, crabs$Control_info)
crabs2<- crabs %>%
group_by(Control_info,Control_info_combined,Sample.Date) %>%
summarise(
ave = exp(mean(log(CrabDensity))))
crabs3<-subset(crabs2,Control_info=="zero")
# match the geometric mean of zero jelly tanks to each experiment
crabs$Control<- crabs3$ave[match(crabs$Sample.Date, crabs3$Sample.Date)]
Histograms of Jellyfish Density and Crabepod Density
hist(crabs$CrabDensity)
hist(crabs$Jelly.Density)
log distribution for both
See if years are different
boxplot(CrabDensity~Sample.Year, data=crabs)
Plot Jellyfish Density against crab density
ggplot(crabs, aes(x=Jelly.Density, y=CrabDensity)) + geom_point(aes(colour=Sample.Date))
#round to integers
crabs$CrabDensity_round<-round(as.numeric(crabs$CrabDensity), 0)
crabs$Control_round<-round(as.numeric(crabs$Control), 0)
#run model
crabs_model1 <- glm.nb(CrabDensity_round~Jelly.Density, data = crabs)
summary(crabs_model1)
##
## Call:
## glm.nb(formula = CrabDensity_round ~ Jelly.Density, data = crabs,
## init.theta = 1.312683768, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.224e+00 1.630e-01 32.053 <2e-16 ***
## Jelly.Density -1.598e-04 6.712e-05 -2.381 0.0173 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(1.3127) family taken to be 1)
##
## Null deviance: 61.491 on 48 degrees of freedom
## Residual deviance: 54.779 on 47 degrees of freedom
## AIC: 589.89
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 1.313
## Std. Err.: 0.244
##
## 2 x log-likelihood: -583.885
plot(crabs_model1)
crabs_model2<-glm(CrabDensity~Jelly.Density, data= crabs, family="gaussian")
summary(crabs_model2)
##
## Call:
## glm(formula = CrabDensity ~ Jelly.Density, family = "gaussian",
## data = crabs)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 193.823279 24.008812 8.073 1.98e-10 ***
## Jelly.Density -0.026484 0.009855 -2.687 0.00993 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 16669.99)
##
## Null deviance: 903874 on 48 degrees of freedom
## Residual deviance: 783489 on 47 degrees of freedom
## AIC: 619.36
##
## Number of Fisher Scoring iterations: 2
plot(crabs_model2)
crabs_model3<-glm(CrabDensity~Jelly.Density, data= crabs, family="Gamma")
summary(crabs_model3)
##
## Call:
## glm(formula = CrabDensity ~ Jelly.Density, family = "Gamma",
## data = crabs)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.730e-03 8.161e-04 5.796 5.48e-07 ***
## Jelly.Density 1.920e-06 6.724e-07 2.856 0.00637 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.6614244)
##
## Null deviance: 47.944 on 48 degrees of freedom
## Residual deviance: 40.653 on 47 degrees of freedom
## AIC: 587.28
##
## Number of Fisher Scoring iterations: 6
plot(crabs_model3)
crabs_model4<-glm(CrabDensity~Jelly.Density, data= crabs, family="Gamma"(link="log"))
summary(crabs_model4)
##
## Call:
## glm(formula = CrabDensity ~ Jelly.Density, family = Gamma(link = "log"),
## data = crabs)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.223e+00 1.532e-01 34.084 <2e-16 ***
## Jelly.Density -1.594e-04 6.291e-05 -2.534 0.0147 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.6791679)
##
## Null deviance: 47.944 on 48 degrees of freedom
## Residual deviance: 42.784 on 47 degrees of freedom
## AIC: 590.12
##
## Number of Fisher Scoring iterations: 9
plot(crabs_model4)
about the same as the identity link. However, I have not been able to
add a control factor with the identity link, so I will proceed with log
link.
crabs_model3<-glm(CrabDensity~Jelly.Density+offset(log(Control)), data= crabs, family="Gamma"(link="log"))
summary(crabs_model3)
##
## Call:
## glm(formula = CrabDensity ~ Jelly.Density + offset(log(Control)),
## family = Gamma(link = "log"), data = crabs)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.494e-02 1.347e-01 0.556 0.580733
## Jelly.Density -2.112e-04 5.531e-05 -3.817 0.000394 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.5250666)
##
## Null deviance: 34.764 on 48 degrees of freedom
## Residual deviance: 27.888 on 47 degrees of freedom
## AIC: 566.77
##
## Number of Fisher Scoring iterations: 5
plot(crabs_model3)
Best fit so far! Proceed with this model.
CrabMod<-glm(CrabDensity~Jelly.Density+offset(log(Control)), data= crabs, family="Gamma"(link="log"))
summary(CrabMod)
##
## Call:
## glm(formula = CrabDensity ~ Jelly.Density + offset(log(Control)),
## family = Gamma(link = "log"), data = crabs)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.494e-02 1.347e-01 0.556 0.580733
## Jelly.Density -2.112e-04 5.531e-05 -3.817 0.000394 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.5250666)
##
## Null deviance: 34.764 on 48 degrees of freedom
## Residual deviance: 27.888 on 47 degrees of freedom
## AIC: 566.77
##
## Number of Fisher Scoring iterations: 5
plot(CrabMod)
Use predict function to predict model values
predictcrabs<-ggpredict(
CrabMod,
terms=c("Jelly.Density"),
ci.lvl = 0.95,
type = "fe",
typical = "mean",
condition = NULL,
back.transform = TRUE,
ppd = FALSE,
vcov.fun = NULL,
vcov.type = NULL,
vcov.args = NULL,
interval = "confidence")
Plot with original values
CrabPlot<-plot(predictcrabs,add.data = TRUE,dot.size=1.5,dot.alpha=0.65)+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black"))+theme_classic() +
scale_x_continuous(breaks=seq(0,7500,1500),expand = c(0, 20), limits = c(-10, 7500)) +
scale_y_continuous(breaks=seq(0,600,200),expand = c(0, 20), limits = c(-10, 600))+
geom_line(size=1.5) +
geom_ribbon( aes(ymin = conf.low, ymax = conf.high), alpha = .15) +
xlab(bquote('Jellyfish Biomass ( g /' ~m^3~ ')'))+ylab(bquote('Crabepod Density ( individuals /'~m^3~')'))+
theme(axis.text=element_text(size=15,colour="black"),
legend.position=c(0.87, 0.8),
legend.text=element_text(size=15,colour="black"),
legend.title=element_text(size=15,colour="black"),
axis.title=element_text(size=15,colour="black"),
axis.line = element_line(colour = "black"))+theme(plot.title = element_blank())
CrabPlot
save plot
setwd("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/output")
ggsave(filename = "Exp_GLM_crabs.png", plot = CrabPlot, width = 6, height = 5, device='png', dpi=700)
Plot without original values
CrabPlot_nodata<-plot(predictcrabs)+xlab('Jellyfish Biomass ( g /' ~m^3~ ')')+
ylab(bquote('Crabepod Density ( individuals /'~m^3~')'))+
theme(axis.line = element_line(colour = "black"))+theme_classic() +
geom_line(size=1) +
geom_ribbon( aes(ymin = conf.low, ymax = conf.high, color = NULL),
alpha = .15,show.legend=FALSE) +
scale_x_continuous(breaks=seq(0,7500,1500),expand = c(0, 20), limits = c(-10, 7500)) +
scale_y_continuous(breaks=seq(0,350,70),expand = c(0, 20), limits = c(-10, 350))+
theme(axis.text=element_text(size=8,colour="black"),
axis.title=element_text(size=10),
plot.title=element_blank())+
theme(legend.position='none')+
theme(plot.margin=margin(0.5, 0.5, 0.5, 0.5, unit = "cm"))
CrabPlot_nodata
save plot
setwd("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/output")
ggsave(filename = "Exp_GLM_crabs_nodata.png", plot = CrabPlot_nodata, width = 6, height = 5, device='png', dpi=700)