library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.0.6 ✓ dplyr 1.0.4
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
#Read in the processed data.
demogData <- read_csv("data/SI_demographicData.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## Year = col_double(),
## Treatment = col_character(),
## Box = col_double(),
## DoY_Incub = col_double(),
## DoY_Hatch = col_double(),
## ClutchSize = col_double(),
## EggsHatched = col_double(),
## nSurvivingDay5 = col_double(),
## nSurvivingDay7 = col_double(),
## nSurvivingDay10 = col_double(),
## nSurvivingDay15 = col_double(),
## ChicksFledged = col_double()
## )
growthData <- read_csv("data/SI_growthData.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## Year = col_double(),
## Box = col_double(),
## Treatment = col_character(),
## Ring = col_character(),
## DoY_Hatch = col_double(),
## Day = col_double(),
## TarsusLength = col_double(),
## Weight = col_double()
## )
Here I am calculating duration of incubation, hatching success (%), fledging success, the standardised hatching date and incubation duration, and standardised clutch size. I also make a matrix-column (fledgeSuccessFromHatchingBinom) for the binomial success/failure of fledging success (i.e. number fledged, number failing to fledge). Finally, I make sure that Year is a factor rather than a numeric variable.
demogData <- demogData %>%
mutate(IncubDuration = as.numeric(DoY_Hatch-DoY_Incub)) %>%
mutate(HatchingSuccess = EggsHatched/ClutchSize) %>%
mutate(FledgingSuccessFromLaying = ChicksFledged/ClutchSize) %>%
mutate(FledgingSuccessFromHatching = ChicksFledged/EggsHatched) %>%
mutate(DoY_Incub_Std = (DoY_Incub-mean(DoY_Incub,na.rm=TRUE))/sd(DoY_Incub,na.rm=TRUE)) %>%
mutate(DoY_Hatch_Std = (DoY_Hatch-mean(DoY_Hatch,na.rm=TRUE))/sd(DoY_Hatch,na.rm=TRUE)) %>%
mutate(IncubDuration_Std = (IncubDuration-mean(IncubDuration,na.rm=TRUE))/sd(IncubDuration,na.rm=TRUE)) %>%
mutate(ClutchSize_Std = (ClutchSize-mean(ClutchSize,na.rm=TRUE))/sd(ClutchSize,na.rm=TRUE))%>%
mutate(fledgeSuccessFromHatchingBinom = cbind(ChicksFledged,EggsHatched-ChicksFledged)) %>%
mutate(Year = as.factor(Year))
Before modelling, I need to select the data columns used in the model and omit any rows where there are missing values. This is required because in the modelling I will be comparing models (e.g. anova(model1, model2)) in order to check for the effect of confounding variable. To do this type of comparison, the data sets must be identical. The default behaviour of R is for models to omit rows of data if there are NAs in the variables in the model. Therefore, if model 1 has variable with an NA, but model 2 does not, then the datasets in each model will be slightly different. I use na.omit to take care of that.
So, here’s how I get the modelling data:
modellingData <- demogData %>%
select(fledgeSuccessFromHatchingBinom, Treatment, Year, IncubDuration_Std) %>%
na.omit()
First I fit a model that includes Treatment and Year and the interaction between them (Treatment:Year).
f1 <- fledgeSuccessFromHatchingBinom ~ Treatment + Year + Treatment:Year
m1 <- glm(f1, data = modellingData,family="binomial")
summary(m1)
##
## Call:
## glm(formula = f1, family = "binomial", data = modellingData)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -5.4659 -0.1260 0.8661 1.3949 2.3008
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.6994 0.2638 6.443 1.17e-10 ***
## TreatmentHeated -0.5055 0.3671 -1.377 0.16848
## Year2018 1.3372 0.4942 2.706 0.00682 **
## TreatmentHeated:Year2018 -0.4856 0.6202 -0.783 0.43371
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 204.15 on 63 degrees of freedom
## Residual deviance: 186.07 on 60 degrees of freedom
## AIC: 233.99
##
## Number of Fisher Scoring iterations: 5
This looks OK. Treatment appears not to be significantly associated with fledging success and there is a significant difference in fledging success between years.
But, we have not controlled for the potentially important confounding effects of clutch size and incubation duration. We expect that increased clutch size could lead to reduced fledging success, and increased incubation could result in increased fledging success.
I will control statistically for these two variables by adding them (each in turn) to the model. First, incubation duration:
f2 <- fledgeSuccessFromHatchingBinom ~ Treatment + Year + Treatment:Year + IncubDuration_Std
m2 <- glm(f2, data = modellingData,family="binomial")
summary(m2)
##
## Call:
## glm(formula = f2, family = "binomial", data = modellingData)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -5.4990 -0.1627 0.8667 1.4292 2.3119
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.6516 0.2701 6.115 9.63e-10 ***
## TreatmentHeated -0.4969 0.3674 -1.353 0.17620
## Year2018 1.4488 0.5174 2.800 0.00511 **
## IncubDuration_Std 0.1406 0.1998 0.704 0.48158
## TreatmentHeated:Year2018 -0.4990 0.6206 -0.804 0.42138
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 204.15 on 63 degrees of freedom
## Residual deviance: 185.54 on 59 degrees of freedom
## AIC: 235.45
##
## Number of Fisher Scoring iterations: 5
I compare the two models with anova like this:
anova(m1,m2,test="Chi")
## Analysis of Deviance Table
##
## Model 1: fledgeSuccessFromHatchingBinom ~ Treatment + Year + Treatment:Year
## Model 2: fledgeSuccessFromHatchingBinom ~ Treatment + Year + Treatment:Year +
## IncubDuration_Std
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 60 186.07
## 2 59 185.54 1 0.53453 0.4647
There is no significant effect of incubation duration. Adding the variable does not change the coefficients very much (see the coefficients and their standard errors in the two models). The anova comparison shows that overall the addition of the IncubDuration_Std term does not significantly change the amount of variance explained by the model.
modellingData2 <- demogData %>%
select(fledgeSuccessFromHatchingBinom, Treatment, Year, ClutchSize_Std) %>%
na.omit()
f1 <- fledgeSuccessFromHatchingBinom ~ Treatment + Year + Treatment:Year
m1 <- glm(f1, data = modellingData2,family="binomial")
summary(m1)
##
## Call:
## glm(formula = f1, family = "binomial", data = modellingData2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -5.4659 -0.0975 0.8661 1.4675 2.2562
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.6994 0.2638 6.443 1.17e-10 ***
## TreatmentHeated -0.4610 0.3662 -1.259 0.20801
## Year2018 1.3372 0.4942 2.706 0.00682 **
## TreatmentHeated:Year2018 -0.6381 0.6132 -1.041 0.29809
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 208.21 on 65 degrees of freedom
## Residual deviance: 191.07 on 62 degrees of freedom
## AIC: 240.95
##
## Number of Fisher Scoring iterations: 5
f3 <- fledgeSuccessFromHatchingBinom ~ Treatment + Year + Treatment:Year + ClutchSize_Std
m3 <- glm(f3, data = modellingData2,family="binomial")
summary(m3)
##
## Call:
## glm(formula = f3, family = "binomial", data = modellingData2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -5.5351 -0.1066 0.9033 1.4474 2.1513
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.69499 0.26393 6.422 1.34e-10 ***
## TreatmentHeated -0.47404 0.36706 -1.291 0.1965
## Year2018 1.28771 0.50088 2.571 0.0101 *
## ClutchSize_Std 0.09495 0.15292 0.621 0.5347
## TreatmentHeated:Year2018 -0.55721 0.62788 -0.887 0.3748
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 208.21 on 65 degrees of freedom
## Residual deviance: 190.69 on 61 degrees of freedom
## AIC: 242.56
##
## Number of Fisher Scoring iterations: 5
I compare the two models with anova like this:
anova(m1,m3,test="Chi")
## Analysis of Deviance Table
##
## Model 1: fledgeSuccessFromHatchingBinom ~ Treatment + Year + Treatment:Year
## Model 2: fledgeSuccessFromHatchingBinom ~ Treatment + Year + Treatment:Year +
## ClutchSize_Std
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 62 191.07
## 2 61 190.69 1 0.3837 0.5356
Again, the coefficients of the models are very similar. The anova shows that adding stanardised clutch size does not improve the model. Therefore, I think we can be satisfied with the model WITHOUT the controls.
Our final model is thus m1
summary(m1)
##
## Call:
## glm(formula = f1, family = "binomial", data = modellingData2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -5.4659 -0.0975 0.8661 1.4675 2.2562
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.6994 0.2638 6.443 1.17e-10 ***
## TreatmentHeated -0.4610 0.3662 -1.259 0.20801
## Year2018 1.3372 0.4942 2.706 0.00682 **
## TreatmentHeated:Year2018 -0.6381 0.6132 -1.041 0.29809
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 208.21 on 65 degrees of freedom
## Residual deviance: 191.07 on 62 degrees of freedom
## AIC: 240.95
##
## Number of Fisher Scoring iterations: 5
anova(m1,test = "Chi")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: fledgeSuccessFromHatchingBinom
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 65 208.21
## Treatment 1 5.0505 64 203.16 0.0246190 *
## Year 1 10.9684 63 192.19 0.0009268 ***
## Treatment:Year 1 1.1163 62 191.07 0.2907064
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#predict
newData<-expand.grid(Treatment = c("Control","Heated"),Year = as.factor(c(2017,2018)),IncubDuration_Std = 0,DoY_Hatch_Std = 0)
#Predict
inverseFunction <-family(m1)$linkinv
preds <- predict(m1,newdata = newData,se.fit = TRUE)
newData <- newData %>%
mutate(fit_lp = preds$fit,se_lp = preds$se.fit) %>%
mutate(fit = inverseFunction(fit_lp),
ymin = inverseFunction(fit_lp - (1.96*se_lp)),
ymax = inverseFunction(fit_lp + (1.96*se_lp)))
newData
## Treatment Year IncubDuration_Std DoY_Hatch_Std fit_lp se_lp fit
## 1 Control 2017 0 0 1.699386 0.2637717 0.8454545
## 2 Heated 2017 0 0 1.238374 0.2539542 0.7752809
## 3 Control 2018 0 0 3.036554 0.4179310 0.9541985
## 4 Heated 2018 0 0 1.937471 0.2594188 0.8740741
## ymin ymax
## 1 0.7653779 0.9017107
## 2 0.6771323 0.8501941
## 3 0.9018020 0.9792796
## 4 0.8067471 0.9202635
ggplot(newData,aes(x = Treatment, y = fit, ymin = ymin, ymax = ymax, colour = Year)) +
geom_point(position = position_dodge(width = 0.3)) +
geom_errorbar(width = 0.1,position = position_dodge(width = 0.3)) +
ylab("Fledging success")
I would use the same approach with the growth/size analysis.