Model selection function: Perform model selection on a specified full model (LM, LMM, GLM, GLMM) by calculating the AICs and producing a table of the best models within a given deltaAIC. It also gives the final/reduced model as the model, within those, that has the most parameters and lowest AIC. Model selection is done sequentially using function MuMIn::dredge.
model_selection <- function (model, delta = 2){
# Model selection
AICtable_full <- MuMIn::dredge(model)
top_models_index <- which(AICtable_full[,'delta'] <= delta)
AICtable <- AICtable_full[top_models_index,]
model_list <-MuMIn::get.models(AICtable, subset = delta <= delta)
final_model_index <- which.max(unlist(lapply(lapply(model_list, coefficients), length)))
final_model <- model_list[[final_model_index]]
return(list(AIC_table = AICtable,
final_model = final_model))
}
Import data:
#make_forage()
load(file = paste0(here::here('data'),'/data_forage.Rda'))
kable(head(data_forage))
## Warning in `[<-.data.frame`(`*tmp*`, , isn, value = structure(list(Litter =
## structure(c("3", : provided 9 variables to replace 8 variables
| Population | ID | Sex | Mom_ID | Litter | Treatment | Pred_Tutor | Pred | Tutor_pop | Birth.Day | Day | Date | Time | Water | Weight | Run | Latency | Attempts | Notes | Tutor | event | time2peck | Predator |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| AH | AHF2_1G4G.3.3 | F | AHF2_1G4G | 3 | pred-/same | pred-/AH | pred- | AH | 2-Feb | 2 | 22-Mar | 15:47 | pred- | 0.133 | 1 | 16 | 17 | AH | 1 | 16 | pred- | |
| AH | AHF2_1G4G.3.3 | F | AHF2_1G4G | 3 | pred-/same | pred-/AH | pred- | AH | 2-Feb | 1 | 21-Mar | 15:06 | pred+ | 0.133 | 1 | 73 | 99 | AH | 1 | 73 | pred- | |
| AH | AHF2_1G4G.6.2 | F | AHF2_1G4G | 6 | pred-/same | pred-/AH | pred- | AH | 14-Aug | 1 | 29-Sep | 15:17 | pred- | 0.144 | 1 | 35 | 166 | first few in middle | AH | 1 | 35 | pred- |
| AH | AHF2_1G4G.6.2 | F | AHF2_1G4G | 6 | pred-/same | pred-/AH | pred- | AH | 14-Aug | 2 | 30-Sep | 15:06 | pred+ | 0.144 | 1 | 58 | 7 | AH | 1 | 58 | pred- | |
| AH | AHF2_1G4P.2.1 | F | AHF2_1G4P | 2 | pred-/same | pred-/AH | pred- | AH | 15-Jan | 1 | 2-Mar | 15:30 | pred+ | 0.128 | 1 | 39 | 55 | AH | 1 | 39 | pred- | |
| AH | AHF2_1G4P.2.1 | F | AHF2_1G4P | 2 | pred-/same | pred-/AH | pred- | AH | 15-Jan | 2 | 3-Mar | 16:04 | pred- | 0.128 | 1 | 310 | 72 | AH | 1 | 310 | pred- |
Ben Bolker (creater of LME4) recommends using glmmTMB instead of lme4::glmer.nb for negative binomial GLMMs. Nevertheless, he said that is smart to check model parameters with different implementations/algorithms to make sure the answers are consistent - he calls it the gold standard for addressing convergence warnings, so that is what we are going to do!
# data_f <- data_forage %>%
# filter(Sex == 'F') %>%
# na.omit
#
# library(lme4)
# m1 <- glmer.nb(Attempts ~ (Population + Pred + Tutor_pop + Water)^2 + (1|ID) + (1|Mom_ID), data = data_f, na.action = "na.fail", family = poisson(), control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=100000)))
# # Fails to converge, but outpute suggests that it could be false +
#
# library(glmmTMB)
# m2 <- glmmTMB(Attempts ~ (Population + Pred + Tutor_pop + Water)^2 + (1|ID) + (1|Mom_ID), na.action = "na.fail", family = 'nbinom2', data = data)
#
# library(ggplot2)
# library(dotwhisker)
# library(broom.mixed)
# dwplot(list(glmer.nb=model1,glmmTMB=model),effect="fixed") + theme_bw() +
# geom_vline(xintercept=0,lty=2)
Coefficients are similar, so either either output could be used. But let’s continue with the glmmTMB since Bolker recommends that…
data <- data_forage %>% filter(Sex == 'F') %>% na.omit
library(glmmTMB)
model_pecks <- glmmTMB(Attempts ~ (Population + Pred + Tutor_pop + Water)^2 + (1|ID) + (1|Mom_ID), na.action = "na.fail", family = 'nbinom2', data = data)
mp <- glmmTMB(Attempts ~ Population + Pred + Tutor_pop + Water + Population:Pred + Population:Water + Pred:Water + Tutor_pop:Water + (1|ID) + (1|Mom_ID), na.action = "na.fail", family = 'nbinom2', data = data)
a1 <- car::Anova(mp, type = 'III')
kable(a1)
| Chisq | Df | Pr(>Chisq) | |
|---|---|---|---|
| (Intercept) | 218.770297 | 1 | 0.0000000 |
| Population | 16.126192 | 1 | 0.0000593 |
| Pred | 0.406119 | 1 | 0.5239460 |
| Tutor_pop | 4.176731 | 1 | 0.0409827 |
| Water | 4.815794 | 1 | 0.0282001 |
| Population:Pred | 1.438652 | 1 | 0.2303576 |
| Population:Water | 1.420574 | 1 | 0.2333090 |
| Pred:Water | 6.234552 | 1 | 0.0125281 |
| Tutor_pop:Water | 2.443472 | 1 | 0.1180148 |
kable(summary(mp)$coefficients)
|
|| || || || |
|| || || || |
data2 <- data_forage %>% filter(Sex == 'M') %>% na.omit
model_pecks2 <- glmmTMB(Attempts ~ (Population + Pred + Tutor_pop + Water)^2 + (1|ID) + (1|Mom_ID), na.action = "na.fail", family = 'nbinom2', data = data2)
mp2 <- glmmTMB(Attempts ~ Population + Tutor_pop + Water + Population:Water + Tutor_pop:Water + (1|ID) + (1|Mom_ID), na.action = "na.fail", family = 'nbinom2', data = data2)
a2 <- car::Anova(mp2, type = 'III')
kable(a2)
| Chisq | Df | Pr(>Chisq) | |
|---|---|---|---|
| (Intercept) | 125.9727200 | 1 | 0.0000000 |
| Population | 2.4194959 | 1 | 0.1198335 |
| Tutor_pop | 0.8703434 | 1 | 0.3508602 |
| Water | 2.7989793 | 1 | 0.0943243 |
| Population:Water | 0.4179748 | 1 | 0.5179493 |
| Tutor_pop:Water | 7.2645051 | 1 | 0.0070331 |
kable(summary(mp2)$coefficients)
|
|| || || || |
|| || || || |
Note that it is important to remove NAs (with
na.omit) and set na.action = "na.fail" so that
the AIC works with models that have the exact same number of
datapoints.
Now we can use our home-made function
model_selection.
data <- data_forage %>% filter(Sex == 'F') %>% na.omit
library(coxme)
ml1 <- coxme(Surv(Latency, event = event) ~ (Population + Pred + Tutor_pop + Water)^2 + (1|ID) + (1|Mom_ID), data = data, na.action = 'na.fail')
models_ml1 <- model_selection(ml1)
## Registered S3 methods overwritten by 'MuMIn':
## method from
## formula.coxme coxme
## logLik.coxme coxme
## logLik.lmekin coxme
kable(models_ml1$AIC_table)
| Population | Pred | Tutor_pop | Water | Population:Pred | Population:Tutor_pop | Population:Water | Pred:Tutor_pop | Pred:Water | Tutor_pop:Water | df | logLik | AICc | delta | weight | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 6 | + | NA | + | NA | NA | NA | NA | NA | NA | NA | 60 | -1249.297 | 2651.709 | 0.000000 | 0.5375885 |
| 38 | + | NA | + | NA | NA | + | NA | NA | NA | NA | 61 | -1249.337 | 2653.392 | 1.683625 | 0.2316623 |
| 8 | + | + | + | NA | NA | NA | NA | NA | NA | NA | 61 | -1249.254 | 2653.400 | 1.691525 | 0.2307491 |
anova <- car::Anova(models_ml1$final_model, type = 'III')
summary(models_ml1$final_model)
## Cox mixed-effects model fit by maximum likelihood
## Data: data
## events, n = 262, 301
## Iterations= 15 80
## NULL Integrated Fitted
## Log-likelihood -1313.981 -1288.707 -1218.849
##
## Chisq df p AIC BIC
## Integrated loglik 50.55 5.00 1.0702e-09 40.55 22.71
## Penalized loglik 190.26 61.34 3.9968e-15 67.59 -151.29
##
## Model: Surv(Latency, event = event) ~ Population + Tutor_pop + (1 | ID) + (1 | Mom_ID) + Population:Tutor_pop
## Fixed coefficients
## coef exp(coef) se(coef) z p
## PopulationAL -0.91929447 0.3988003 0.2427049 -3.79 0.00015
## Tutor_popAL -0.34879156 0.7055402 0.2224602 -1.57 0.12000
## PopulationAL:Tutor_popAL -0.03916085 0.9615960 0.3366334 -0.12 0.91000
##
## Random effects
## Group Variable Std Dev Variance
## ID Intercept 0.63070921 0.39779411
## Mom_ID Intercept 0.14761264 0.02178949
kable(anova)
| Df | Chisq | Pr(>Chisq) | |
|---|---|---|---|
| Population | 1 | 14.3467042 | 0.0001520 |
| Tutor_pop | 1 | 2.4582569 | 0.1169085 |
| Population:Tutor_pop | 1 | 0.0135329 | 0.9073903 |
data2 <- data_forage %>% filter(Sex == 'M') %>% na.omit
ml2 <- coxme(Surv(Latency, event = event) ~ (Population + Pred + Tutor_pop + Water)^2 + (1|ID) + (1|Mom_ID), data = data2, na.action = 'na.fail')
models_ml2 <- model_selection(ml2)
kable(models_ml2$AIC_table)
| Population | Pred | Tutor_pop | Water | Population:Pred | Population:Tutor_pop | Population:Water | Pred:Tutor_pop | Pred:Water | Tutor_pop:Water | df | logLik | AICc | delta | weight | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 28 | + | + | NA | + | + | NA | NA | NA | NA | NA | 4 | -1041.389 | 2091.063 | 0.0000000 | 0.2556563 |
| 92 | + | + | NA | + | + | NA | + | NA | NA | NA | 5 | -1040.654 | 2091.676 | 0.6127019 | 0.1881958 |
| 544 | + | + | + | + | + | NA | NA | NA | NA | + | 6 | -1039.616 | 2091.730 | 0.6668614 | 0.1831679 |
| 20 | + | + | NA | NA | + | NA | NA | NA | NA | NA | 3 | -1042.875 | 2091.965 | 0.9014543 | 0.1628952 |
| 608 | + | + | + | + | + | NA | + | NA | NA | + | 7 | -1039.069 | 2092.761 | 1.6977908 | 0.1093921 |
| 284 | + | + | NA | + | + | NA | NA | NA | + | NA | 5 | -1041.280 | 2092.927 | 1.8635246 | 0.1006926 |
anova2 <- car::Anova(models_ml2$final_model, type = 'III')
summary(models_ml2$final_model)
## Cox mixed-effects model fit by maximum likelihood
## Data: data2
## events, n = 217, 252
## Iterations= 7 37
## NULL Integrated Fitted
## Log-likelihood -1052.964 -1039.11 -1039.029
##
## Chisq df p AIC BIC
## Integrated loglik 27.71 9.00 0.00106710 9.71 -20.71
## Penalized loglik 27.87 7.08 0.00024696 13.72 -10.20
##
## Model: Surv(Latency, event = event) ~ Population + Pred + Tutor_pop + Water + (1 | ID) + (1 | Mom_ID) + Population:Pred + Population:Water + Tutor_pop:Water
## Fixed coefficients
## coef exp(coef) se(coef) z p
## PopulationAL -0.1256642 0.8819109 0.2263486 -0.56 0.5800
## Predpred+ 0.5551293 1.7421662 0.1918494 2.89 0.0038
## Tutor_popAL -0.2330738 0.7920951 0.1903984 -1.22 0.2200
## Waterpred+ -0.3343386 0.7158113 0.2408074 -1.39 0.1700
## PopulationAL:Predpred+ -0.5462639 0.5791094 0.2754232 -1.98 0.0470
## PopulationAL:Waterpred+ -0.2857480 0.7514520 0.2739933 -1.04 0.3000
## Tutor_popAL:Waterpred+ 0.4868620 1.6272021 0.2742041 1.78 0.0760
##
## Random effects
## Group Variable Std Dev Variance
## ID Intercept 1.899319e-02 3.607414e-04
## Mom_ID Intercept 4.456235e-03 1.985803e-05
kable(anova2)
| Df | Chisq | Pr(>Chisq) | |
|---|---|---|---|
| Population | 1 | 0.3082247 | 0.5787716 |
| Pred | 1 | 8.3727392 | 0.0038089 |
| Tutor_pop | 1 | 1.4985114 | 0.2209005 |
| Water | 1 | 1.9276731 | 0.1650132 |
| Population:Pred | 1 | 3.9337263 | 0.0473270 |
| Population:Water | 1 | 1.0876433 | 0.2969940 |
| Tutor_pop:Water | 1 | 3.1525630 | 0.0758078 |
Male Latency
library(emmeans)
ML <- coxph(models_ml2$final_model$formulaList$fixed, data = data2, na.action = 'na.fail')
emmML1 <- emmeans(ML, specs = pairwise ~ Population:Pred)
emmML2 <- emmeans(ML, specs = pairwise ~ Population|Pred)
emmML3 <- emmeans(ML, specs = pairwise ~ Pred|Population)
emmML1$emmeans
## Population Pred emmean SE df asymp.LCL asymp.UCL
## AH pred- -0.162 0.138 Inf -0.4319 0.1080
## AL pred- -0.430 0.234 Inf -0.8889 0.0283
## AH pred+ 0.393 0.233 Inf -0.0637 0.8499
## AL pred+ -0.422 0.249 Inf -0.9092 0.0660
##
## Results are averaged over the levels of: Tutor_pop, Water
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
emmML2$emmeans
## Pred = pred-:
## Population emmean SE df asymp.LCL asymp.UCL
## AH -0.162 0.138 Inf -0.4319 0.1080
## AL -0.430 0.234 Inf -0.8889 0.0283
##
## Pred = pred+:
## Population emmean SE df asymp.LCL asymp.UCL
## AH 0.393 0.233 Inf -0.0637 0.8499
## AL -0.422 0.249 Inf -0.9092 0.0660
##
## Results are averaged over the levels of: Tutor_pop, Water
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
emmML3$emmeans
## Population = AH:
## Pred emmean SE df asymp.LCL asymp.UCL
## pred- -0.162 0.138 Inf -0.4319 0.1080
## pred+ 0.393 0.233 Inf -0.0637 0.8499
##
## Population = AL:
## Pred emmean SE df asymp.LCL asymp.UCL
## pred- -0.430 0.234 Inf -0.8889 0.0283
## pred+ -0.422 0.249 Inf -0.9092 0.0660
##
## Results are averaged over the levels of: Tutor_pop, Water
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
Male Foraging Attempts:
emmMF1 <- emmeans(model_pecks2, specs = pairwise~Tutor_pop:Water)
emmMF2 <- emmeans(model_pecks2, specs = pairwise~Tutor_pop|Water)
emmMF3 <- emmeans(model_pecks2, specs = pairwise~Water|Tutor_pop)
emmMF1$emmeans
## Tutor_pop Water emmean SE df lower.CL upper.CL
## AH pred- 2.73 0.202 238 2.33 3.13
## AL pred- 2.49 0.208 238 2.08 2.90
## AH pred+ 2.11 0.212 238 1.69 2.53
## AL pred+ 2.77 0.209 238 2.35 3.18
##
## Results are averaged over the levels of: Population, Pred
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
emmMF2$emmeans
## Water = pred-:
## Tutor_pop emmean SE df lower.CL upper.CL
## AH 2.73 0.202 238 2.33 3.13
## AL 2.49 0.208 238 2.08 2.90
##
## Water = pred+:
## Tutor_pop emmean SE df lower.CL upper.CL
## AH 2.11 0.212 238 1.69 2.53
## AL 2.77 0.209 238 2.35 3.18
##
## Results are averaged over the levels of: Population, Pred
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
emmMF3$emmeans
## Tutor_pop = AH:
## Water emmean SE df lower.CL upper.CL
## pred- 2.73 0.202 238 2.33 3.13
## pred+ 2.11 0.212 238 1.69 2.53
##
## Tutor_pop = AL:
## Water emmean SE df lower.CL upper.CL
## pred- 2.49 0.208 238 2.08 2.90
## pred+ 2.77 0.209 238 2.35 3.18
##
## Results are averaged over the levels of: Population, Pred
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
Female Foraging Attempts:
emmFF1 <- emmeans(model_pecks, specs = pairwise~Water:Pred)
emmFF2 <- emmeans(model_pecks, specs = pairwise~Water|Pred)
emmFF3 <- emmeans(model_pecks, specs = pairwise~Pred|Water)
emmFF1$emmeans
## Water Pred emmean SE df lower.CL upper.CL
## pred- pred- 2.98 0.178 287 2.63 3.33
## pred+ pred- 2.38 0.194 287 1.99 2.76
## pred- pred+ 3.02 0.191 287 2.64 3.40
## pred+ pred+ 3.21 0.182 287 2.85 3.57
##
## Results are averaged over the levels of: Population, Tutor_pop
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
emmFF2$emmeans
## Pred = pred-:
## Water emmean SE df lower.CL upper.CL
## pred- 2.98 0.178 287 2.63 3.33
## pred+ 2.38 0.194 287 1.99 2.76
##
## Pred = pred+:
## Water emmean SE df lower.CL upper.CL
## pred- 3.02 0.191 287 2.64 3.40
## pred+ 3.21 0.182 287 2.85 3.57
##
## Results are averaged over the levels of: Population, Tutor_pop
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
emmFF3$emmeans
## Water = pred-:
## Pred emmean SE df lower.CL upper.CL
## pred- 2.98 0.178 287 2.63 3.33
## pred+ 3.02 0.191 287 2.64 3.40
##
## Water = pred+:
## Pred emmean SE df lower.CL upper.CL
## pred- 2.38 0.194 287 1.99 2.76
## pred+ 3.21 0.182 287 2.85 3.57
##
## Results are averaged over the levels of: Population, Tutor_pop
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
Only population origin had a significant effect on female latency to forage. However, tutor population and the interaction between population and tutor population were included in the ‘best’ model.
Option 1
Option 2 Currently in manuscript version
Option 3
Option 4
Predation and there interaction between Predation and Population origin had a significant effect on male latency to forage.
Option 1 Currently in manuscript version
Option 2
Option 3
Option 4
Option 5
Option 6
This includes all the variables for latency that are in the model, but
it is just so hard to comprehend, that I think we go with the simplified
version above that shows the significant relationships
Population, Tutor Population, and Test Water, and the interaction between Rearing Water and Test Water had significant effects on female foraging attempts.
Option 1 Currently in manuscript version
Option 2
Option 3
Only the interaction between Tutor Population and Test Water had significant effects on male foraging attempts.
Option 1 Currently in manuscript version