Load packages

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──

## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.0.6     ✓ dplyr   1.0.4
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Import data

#Read in the processed data.
demogData <- read_csv("data/SI_demographicData.csv")

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   Year = col_double(),
##   Treatment = col_character(),
##   Box = col_double(),
##   DoY_Incub = col_double(),
##   DoY_Hatch = col_double(),
##   ClutchSize = col_double(),
##   EggsHatched = col_double(),
##   nSurvivingDay5 = col_double(),
##   nSurvivingDay7 = col_double(),
##   nSurvivingDay10 = col_double(),
##   nSurvivingDay15 = col_double(),
##   ChicksFledged = col_double()
## )

growthData <- read_csv("data/SI_growthData.csv")

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   Year = col_double(),
##   Box = col_double(),
##   Treatment = col_character(),
##   Ring = col_character(),
##   DoY_Hatch = col_double(),
##   Day = col_double(),
##   TarsusLength = col_double(),
##   Weight = col_double()
## )

Process data

Here I am calculating duration of incubation, hatching success (%), fledging success, the standardised hatching date and incubation duration, and standardised clutch size. I also make a matrix-column (fledgeSuccessFromHatchingBinom) for the binomial success/failure of fledging success (i.e. number fledged, number failing to fledge). Finally, I make sure that Year is a factor rather than a numeric variable.

demogData <- demogData %>% 
  mutate(IncubDuration = as.numeric(DoY_Hatch-DoY_Incub)) %>% 
  mutate(HatchingSuccess = EggsHatched/ClutchSize) %>%
  mutate(FledgingSuccessFromLaying = ChicksFledged/ClutchSize) %>%
  mutate(FledgingSuccessFromHatching = ChicksFledged/EggsHatched) %>%
  mutate(DoY_Incub_Std = (DoY_Incub-mean(DoY_Incub,na.rm=TRUE))/sd(DoY_Incub,na.rm=TRUE)) %>%
  mutate(DoY_Hatch_Std = (DoY_Hatch-mean(DoY_Hatch,na.rm=TRUE))/sd(DoY_Hatch,na.rm=TRUE)) %>%
  mutate(IncubDuration_Std = (IncubDuration-mean(IncubDuration,na.rm=TRUE))/sd(IncubDuration,na.rm=TRUE)) %>%
  mutate(ClutchSize_Std = (ClutchSize-mean(ClutchSize,na.rm=TRUE))/sd(ClutchSize,na.rm=TRUE))%>% 
  mutate(fledgeSuccessFromHatchingBinom = cbind(ChicksFledged,EggsHatched-ChicksFledged)) %>%
  mutate(Year = as.factor(Year))

Modelling

Before modelling, I need to select the data columns used in the model and omit any rows where there are missing values. This is required because in the modelling I will be comparing models (e.g. anova(model1, model2)) in order to check for the effect of confounding variable. To do this type of comparison, the data sets must be identical. The default behaviour of R is for models to omit rows of data if there are NAs in the variables in the model. Therefore, if model 1 has variable with an NA, but model 2 does not, then the datasets in each model will be slightly different. I use na.omit to take care of that.

So, here’s how I get the modelling data:

modellingData <- demogData %>% 
  select(fledgeSuccessFromHatchingBinom, Treatment, Year, IncubDuration_Std) %>% 
  na.omit()

Incubation duration as a confounding variable

First I fit a model that includes Treatment and Year and the interaction between them (Treatment:Year).

f1 <- fledgeSuccessFromHatchingBinom ~ Treatment + Year + Treatment:Year 
m1 <- glm(f1, data = modellingData,family="binomial")
summary(m1)

## 
## Call:
## glm(formula = f1, family = "binomial", data = modellingData)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -5.4659  -0.1260   0.8661   1.3949   2.3008  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                1.6994     0.2638   6.443 1.17e-10 ***
## TreatmentHeated           -0.5055     0.3671  -1.377  0.16848    
## Year2018                   1.3372     0.4942   2.706  0.00682 ** 
## TreatmentHeated:Year2018  -0.4856     0.6202  -0.783  0.43371    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 204.15  on 63  degrees of freedom
## Residual deviance: 186.07  on 60  degrees of freedom
## AIC: 233.99
## 
## Number of Fisher Scoring iterations: 5

This looks OK. Treatment appears not to be significantly associated with fledging success and there is a significant difference in fledging success between years.

But, we have not controlled for the potentially important confounding effects of clutch size and incubation duration. We expect that increased clutch size could lead to reduced fledging success, and increased incubation could result in increased fledging success.

I will control statistically for these two variables by adding them (each in turn) to the model. First, incubation duration:

f2 <- fledgeSuccessFromHatchingBinom ~ Treatment + Year + Treatment:Year + IncubDuration_Std
m2 <- glm(f2, data = modellingData,family="binomial")
summary(m2)

## 
## Call:
## glm(formula = f2, family = "binomial", data = modellingData)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -5.4990  -0.1627   0.8667   1.4292   2.3119  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                1.6516     0.2701   6.115 9.63e-10 ***
## TreatmentHeated           -0.4969     0.3674  -1.353  0.17620    
## Year2018                   1.4488     0.5174   2.800  0.00511 ** 
## IncubDuration_Std          0.1406     0.1998   0.704  0.48158    
## TreatmentHeated:Year2018  -0.4990     0.6206  -0.804  0.42138    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 204.15  on 63  degrees of freedom
## Residual deviance: 185.54  on 59  degrees of freedom
## AIC: 235.45
## 
## Number of Fisher Scoring iterations: 5

I compare the two models with anova like this:

anova(m1,m2,test="Chi")

## Analysis of Deviance Table
## 
## Model 1: fledgeSuccessFromHatchingBinom ~ Treatment + Year + Treatment:Year
## Model 2: fledgeSuccessFromHatchingBinom ~ Treatment + Year + Treatment:Year + 
##     IncubDuration_Std
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1        60     186.07                     
## 2        59     185.54  1  0.53453   0.4647

There is no significant effect of incubation duration. Adding the variable does not change the coefficients very much (see the coefficients and their standard errors in the two models). The anova comparison shows that overall the addition of the IncubDuration_Std term does not significantly change the amount of variance explained by the model.

Clutch size as a confounding variable

modellingData2 <- demogData %>% 
  select(fledgeSuccessFromHatchingBinom, Treatment, Year, ClutchSize_Std) %>% 
  na.omit()

f1 <- fledgeSuccessFromHatchingBinom ~ Treatment + Year + Treatment:Year 
m1 <- glm(f1, data = modellingData2,family="binomial")
summary(m1)

## 
## Call:
## glm(formula = f1, family = "binomial", data = modellingData2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -5.4659  -0.0975   0.8661   1.4675   2.2562  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                1.6994     0.2638   6.443 1.17e-10 ***
## TreatmentHeated           -0.4610     0.3662  -1.259  0.20801    
## Year2018                   1.3372     0.4942   2.706  0.00682 ** 
## TreatmentHeated:Year2018  -0.6381     0.6132  -1.041  0.29809    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 208.21  on 65  degrees of freedom
## Residual deviance: 191.07  on 62  degrees of freedom
## AIC: 240.95
## 
## Number of Fisher Scoring iterations: 5

f3 <- fledgeSuccessFromHatchingBinom ~ Treatment + Year + Treatment:Year + ClutchSize_Std
m3 <- glm(f3, data = modellingData2,family="binomial")
summary(m3)

## 
## Call:
## glm(formula = f3, family = "binomial", data = modellingData2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -5.5351  -0.1066   0.9033   1.4474   2.1513  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               1.69499    0.26393   6.422 1.34e-10 ***
## TreatmentHeated          -0.47404    0.36706  -1.291   0.1965    
## Year2018                  1.28771    0.50088   2.571   0.0101 *  
## ClutchSize_Std            0.09495    0.15292   0.621   0.5347    
## TreatmentHeated:Year2018 -0.55721    0.62788  -0.887   0.3748    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 208.21  on 65  degrees of freedom
## Residual deviance: 190.69  on 61  degrees of freedom
## AIC: 242.56
## 
## Number of Fisher Scoring iterations: 5

I compare the two models with anova like this:

anova(m1,m3,test="Chi")

## Analysis of Deviance Table
## 
## Model 1: fledgeSuccessFromHatchingBinom ~ Treatment + Year + Treatment:Year
## Model 2: fledgeSuccessFromHatchingBinom ~ Treatment + Year + Treatment:Year + 
##     ClutchSize_Std
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1        62     191.07                     
## 2        61     190.69  1   0.3837   0.5356

Again, the coefficients of the models are very similar. The anova shows that adding stanardised clutch size does not improve the model. Therefore, I think we can be satisfied with the model WITHOUT the controls.

Our final model is thus m1

summary(m1)

## 
## Call:
## glm(formula = f1, family = "binomial", data = modellingData2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -5.4659  -0.0975   0.8661   1.4675   2.2562  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                1.6994     0.2638   6.443 1.17e-10 ***
## TreatmentHeated           -0.4610     0.3662  -1.259  0.20801    
## Year2018                   1.3372     0.4942   2.706  0.00682 ** 
## TreatmentHeated:Year2018  -0.6381     0.6132  -1.041  0.29809    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 208.21  on 65  degrees of freedom
## Residual deviance: 191.07  on 62  degrees of freedom
## AIC: 240.95
## 
## Number of Fisher Scoring iterations: 5

anova(m1,test = "Chi")

## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: fledgeSuccessFromHatchingBinom
## 
## Terms added sequentially (first to last)
## 
## 
##                Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                              65     208.21              
## Treatment       1   5.0505        64     203.16 0.0246190 *  
## Year            1  10.9684        63     192.19 0.0009268 ***
## Treatment:Year  1   1.1163        62     191.07 0.2907064    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Plot the predictions from this model.

#predict

newData<-expand.grid(Treatment = c("Control","Heated"),Year = as.factor(c(2017,2018)),IncubDuration_Std = 0,DoY_Hatch_Std = 0)

#Predict
inverseFunction <-family(m1)$linkinv

preds <- predict(m1,newdata = newData,se.fit = TRUE)
newData <- newData %>% 
  mutate(fit_lp = preds$fit,se_lp = preds$se.fit) %>% 
  mutate(fit = inverseFunction(fit_lp),
         ymin = inverseFunction(fit_lp - (1.96*se_lp)), 
         ymax = inverseFunction(fit_lp + (1.96*se_lp)))

newData

##   Treatment Year IncubDuration_Std DoY_Hatch_Std   fit_lp     se_lp       fit
## 1   Control 2017                 0             0 1.699386 0.2637717 0.8454545
## 2    Heated 2017                 0             0 1.238374 0.2539542 0.7752809
## 3   Control 2018                 0             0 3.036554 0.4179310 0.9541985
## 4    Heated 2018                 0             0 1.937471 0.2594188 0.8740741
##        ymin      ymax
## 1 0.7653779 0.9017107
## 2 0.6771323 0.8501941
## 3 0.9018020 0.9792796
## 4 0.8067471 0.9202635

ggplot(newData,aes(x = Treatment, y = fit, ymin = ymin, ymax = ymax, colour = Year)) + 
  geom_point(position = position_dodge(width = 0.3)) + 
  geom_errorbar(width = 0.1,position = position_dodge(width = 0.3)) + 
  ylab("Fledging success")

I would use the same approach with the growth/size analysis.

Example analysis

Owen Jones