source_path = "~/Desktop/git_repositories/SBB-dispersal/avbernat_working_on/Rsrc/"
script_names = c("center_flight_data.R", # Re-centers data
"clean_flight_data.R", # Loads and cleans data
"unique_flight_data.R",
"get_warnings.R",
"compare_models.R",
"regression_output.R", # Cleans regression outputs; prints in color or B&W
"AICprobabilities.R",
"multinom_functions.R")
for (script in script_names) {
path = paste0(source_path, script)
source(path)
}
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
output_col = FALSE # Change to TRUE if working in Base R or RStudio; FALSE if generating an HTML
data <- read_flight_data("data/all_flight_data-Winter2020.csv")
data_all <- data[[1]]
data_tested <- data[[2]]
d <- create_delta_data(data_tested)
## Warning: `funs()` is deprecated as of dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Strong effect of mass
tidy_regression(lm(speed_diff ~ mass_diff, data=d), is_color = output_col)
## lm speed_diff ~ mass_diff d
## AIC: -151.3224
## (Intercept) coeff: -0.0344167 Pr(>|t|): 0.002531144 *
## mass_diff coeff: -3.5049071 Pr(>|t|): 7.576785e-05 *
hist(d$speed_diff) # much more normally distributed data and later will see the residuals pass the Shapiro-Wilk test
No mass_diff
R = d$speed_diff
A = d$host_c
B = d$sex_c
C = d$sym_dist
D = d$mass_diff
X = d$population
data<-data.frame(R, A, B, C, D, X)
model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 3-FF REMLF.R")
model_comparisonsAIC(model_script)
## [,1] [,2] [,3] [,4] [,5] [,6]
## AICs -144.3183 -144.2522 -143.17 -143.1047 -142.88 -142.8391
## models 2 6 4 10 11 8
## probs 0.1771685 0.1714097 0.09978166 0.0965746 0.08631205 0.08456401
## [,7] [,8]
## AICs -142.2522 -142.1902
## models 7 14
## probs 0.06305828 0.06113514
##
## m2 R ~ B + (1 | X)
## m6 R ~ B + C + (1 | X)
## m4 R ~ A + B + (1 | X)
## m10 R ~ B * C + (1 | X)
## m11 R ~ A * B + C + (1 | X)
## m8 R ~ A * B + (1 | X)
## m7 R ~ A + B + C + (1 | X)
## m14 R ~ A * B + A * C + (1 | X)
anova(m0, m2, test="Chisq") # Adding host matters
## refitting model(s) with ML (instead of REML)
## Data: data
## Models:
## m0: R ~ (1 | X)
## m2: R ~ B + (1 | X)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m0 3 -137.26 -126.35 71.631 -143.26
## m2 4 -144.32 -129.76 76.159 -152.32 9.0559 1 0.002619 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m2, m6, test="Chisq") # Adding sym_dist does not improve fit
## Data: data
## Models:
## m2: R ~ B + (1 | X)
## m6: R ~ B + C + (1 | X)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m2 4 -144.32 -129.76 76.159 -152.32
## m6 5 -144.25 -126.06 77.126 -154.25 1.9339 1 0.1643
speed_model <- lmer(speed_diff ~ host_c + (1 | population), data=d, REML=FALSE)
## boundary (singular) fit: see ?isSingular
tidy_regression(speed_model, is_color=output_col)
## lmer speed_diff ~ host_c + (1 | population) d FALSE
## AIC: -136.2633 -136.2633
## (Intercept) coeff: -0.053711 tval: -4.120057
## host_c coeff: -0.0130537 tval: -1.001323
s.test <- paste("pval: ", shapiro.test(residuals(speed_model))$p.value)
qqnorm(resid(speed_model))
qqline(resid(speed_model))
text(-1, 0.1, s.test)
Mass_diff
data <- data %>%
filter(!is.na(D))
model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 4-FF REMLF.R")
model_comparisonsAIC(model_script)
## [,1] [,2]
## AICs -154.2555 -154.2304
## models 9 14
## probs 0.06599623 0.06517267
##
## m9 R ~ B + D + (1 | X)
## m14 R ~ B + C + D + (1 | X)
anova(m9, m14, test="Chisq") # Adding C does not improve fit
## Data: data
## Models:
## m9: R ~ B + D + (1 | X)
## m14: R ~ B + C + D + (1 | X)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m9 5 -154.26 -136.12 82.128 -164.26
## m14 6 -154.23 -132.47 83.115 -166.23 1.9749 1 0.1599
speed_model <- lmer(speed_diff ~ host_c + mass_diff + (1 | population), data=d, REML=FALSE)
## boundary (singular) fit: see ?isSingular
tidy_regression(speed_model, is_color=output_col)
## lmer speed_diff ~ host_c + mass_diff + (1 | population) d FALSE
## AIC: -148.6442 -148.6442
## (Intercept) coeff: -0.0418829 tval: -3.230437
## host_c coeff: -0.0146987 tval: -1.151097
## mass_diff coeff: -3.5532517 tval: -4.093025
Strong negative effect of mass_diff where a gain in mass leads to slower speeds.
wing2body
d <- d %>%
filter(!is.na(mass_diff))
R = d$speed_diff
A = d$host_c
B = d$sex_c
C = d$wing2body_s
D = d$mass_diff
X = d$population
data<-data.frame(R, A, B, C, D, X)
model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 4-FF REMLF.R")
model_comparisonsAIC(model_script)
## [,1]
## AICs -154.2555
## models 9
## probs 0.07515606
##
## m9 R ~ B + D + (1 | X)
Same top model and wing2body makes no difference.
So what if it was only females, since they had the greatest mass changes?
d_fem <- d[d$sex == "F",] # low sample size
No mass_diff
R = d_fem$speed_diff
A = d_fem$host_c
B = d_fem$sym_dist
C = d_fem$mass_diff
X = d_fem$population
data<-data.frame(R, A, B, C, X)
model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 2-FF REMLF.R")
model_comparisonsAIC(model_script)
## [,1] [,2] [,3] [,4] [,5]
## AICs -39.07379 -38.87856 -38.73697 -37.58076 -37.08753
## models 2 1 5 3 4
## probs 0.278053 0.2521934 0.2349571 0.1318013 0.1029951
##
## m2 R ~ B + (1 | X)
## m1 R ~ A + (1 | X)
## m0 R ~ (1 | X)
## m3 R ~ A + B + (1 | X)
## m4 R ~ A * B + (1 | X)
anova(m0, m1, test="Chisq") # Adding A does not improve fit
## Data: data
## Models:
## m0: R ~ (1 | X)
## m1: R ~ A + (1 | X)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m0 3 -38.737 -31.139 22.369 -44.737
## m1 4 -38.879 -28.748 23.439 -46.879 2.1416 1 0.1434
anova(m0, m2, test="Chisq") # Adding B does not improve fit
## Data: data
## Models:
## m0: R ~ (1 | X)
## m2: R ~ B + (1 | X)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m0 3 -38.737 -31.139 22.369 -44.737
## m2 4 -39.074 -28.943 23.537 -47.074 2.3368 1 0.1263
speed_model <- lmer(speed_diff ~ 1 + (1 | population), data=d_fem, REML=FALSE)
## boundary (singular) fit: see ?isSingular
tidy_regression(speed_model, is_color=output_col)
## lmer speed_diff ~ 1 + (1 | population) d_fem FALSE
## AIC: -38.73697 -38.73697
## 1 coeff: -0.0892473 tval: -4.524086
mass_diff
model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 3-FF REMLF.R")
model_comparisonsAIC(model_script)
## [,1] [,2] [,3] [,4] [,5] [,6]
## AICs -51.83097 -51.58628 -50.76808 -50.49779 -50.13392 -49.72637
## models 5 9 6 3 7 11
## probs 0.1917199 0.1696414 0.1126845 0.09843993 0.08206484 0.06693566
## [,7] [,8]
## AICs -49.72453 -49.28154
## models 12 14
## probs 0.06687436 0.05358764
##
## m5 R ~ A + C + (1 | X)
## m9 R ~ A * C + (1 | X)
## m6 R ~ B + C + (1 | X)
## m3 R ~ C + (1 | X)
## m7 R ~ A + B + C + (1 | X)
## m11 R ~ A * B + C + (1 | X)
## m12 R ~ A * C + B + (1 | X)
## m14 R ~ A * B + A * C + (1 | X)
anova(m0, m1, test="Chisq") # strange. get error "refitting model(s) with ML (instead of REML)" even though these have been refit to REML=FALSE...
## refitting model(s) with ML (instead of REML)
## Data: data
## Models:
## m0: R ~ (1 | X)
## m1: R ~ A + (1 | X)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m0 3 -38.737 -31.139 22.369 -44.737
## m1 4 -38.879 -28.748 23.439 -46.879 2.1416 1 0.1434
anova(m0, m3, test="Chisq") # Adding C does improve fit
## refitting model(s) with ML (instead of REML)
## Data: data
## Models:
## m0: R ~ (1 | X)
## m3: R ~ C + (1 | X)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m0 3 -38.737 -31.139 22.369 -44.737
## m3 4 -50.498 -40.367 29.249 -58.498 13.761 1 0.0002076 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m0, m2, test="Chisq") # Adding B does not improve fit
## refitting model(s) with ML (instead of REML)
## Data: data
## Models:
## m0: R ~ (1 | X)
## m2: R ~ B + (1 | X)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m0 3 -38.737 -31.139 22.369 -44.737
## m2 4 -39.074 -28.943 23.537 -47.074 2.3368 1 0.1263
speed_model <- lmer(speed_diff ~ mass_diff + (1 | population), data=d_fem, REML=FALSE)
## boundary (singular) fit: see ?isSingular
tidy_regression(speed_model, is_color=output_col)
## lmer speed_diff ~ mass_diff + (1 | population) d_fem FALSE
## AIC: -50.49779 -50.49779
## (Intercept) coeff: -0.0749062 tval: -4.006749
## mass_diff coeff: -3.4552535 tval: -3.851106
Strong negative effect of mass.
s.test <- paste("pval: ", shapiro.test(residuals(speed_model))$p.value)
qqnorm(resid(speed_model))
qqline(resid(speed_model))
text(-1, 0.1, s.test)
wing2body
R = d_fem$speed_diff
A = d_fem$host_c
B = d_fem$wing2body_s
C = d_fem$mass_diff
X = d_fem$population
data<-data.frame(R, A, B, C, X)
model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 3-FF REMLF.R")
model_comparisonsAIC(model_script)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## AICs -51.83097 -51.58628 -50.49779 -49.91554 -49.7924 -49.12472 -49.03249
## models 5 9 3 7 12 6 13
## probs 0.2282522 0.2019667 0.1171977 0.08759596 0.0823654 0.05898737 0.05632892
##
## m5 R ~ A + C + (1 | X)
## m9 R ~ A * C + (1 | X)
## m3 R ~ C + (1 | X)
## m7 R ~ A + B + C + (1 | X)
## m12 R ~ A * C + B + (1 | X)
## m6 R ~ B + C + (1 | X)
## m13 R ~ B * C + A + (1 | X)
anova(m3, m5, test="Chisq") # Adding C marginally improves fit
## Data: data
## Models:
## m3: R ~ C + (1 | X)
## m5: R ~ A + C + (1 | X)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m3 4 -50.498 -40.367 29.249 -58.498
## m5 5 -51.831 -39.168 30.916 -61.831 3.3332 1 0.0679 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m5, m9, test="Chisq") # Adding A*C does not improve fit
## Data: data
## Models:
## m5: R ~ A + C + (1 | X)
## m9: R ~ A * C + (1 | X)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m5 5 -51.831 -39.168 30.915 -61.831
## m9 6 -51.586 -36.391 31.793 -63.586 1.7553 1 0.1852
anova(m3, m6, test="Chisq") # Adding B does not improve fit
## Data: data
## Models:
## m3: R ~ C + (1 | X)
## m6: R ~ B + C + (1 | X)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m3 4 -50.498 -40.367 29.249 -58.498
## m6 5 -49.125 -36.462 29.562 -59.125 0.6269 1 0.4285
speed_model <- lmer(speed_diff ~ host_c + mass_diff + (1 | population), data=d_fem, REML=FALSE)
## boundary (singular) fit: see ?isSingular
tidy_regression(speed_model, is_color=output_col)
## lmer speed_diff ~ host_c + mass_diff + (1 | population) d_fem FALSE
## AIC: -51.83097 -51.83097
## (Intercept) coeff: -0.0928511 tval: -4.466844
## host_c coeff: -0.037953 tval: -1.842181
## mass_diff coeff: -3.5562885 tval: -4.027595
Strong negative effect of mass. Marginal negative effect of host.
s.test <- paste("pval: ", shapiro.test(residuals(speed_model))$p.value)
qqnorm(resid(speed_model))
qqline(resid(speed_model))
text(-1, 0.1, s.test)
eggs
R = d_fem$speed_diff
A = d_fem$host_c
B = d_fem$egg_case
C = d_fem$mass_diff
X = d_fem$population
data<-data.frame(R, A, B, C, X)
model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 3-FF REMLF.R")
model_comparisonsAIC(model_script)
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## [,1] [,2] [,3] [,4] [,5]
## AICs -51.83097 -51.58628 -50.49779 -49.83539 -49.58877
## models 5 9 3 7 12
## probs 0.2485817 0.2199551 0.127636 0.09165015 0.08101814
##
## m5 R ~ A + C + (1 | X)
## m9 R ~ A * C + (1 | X)
## m3 R ~ C + (1 | X)
## m7 R ~ A + B + C + (1 | X)
## m12 R ~ A * C + B + (1 | X)
anova(m5, m7, test="Chisq") # Adding B does not improve fit
## Data: data
## Models:
## m5: R ~ A + C + (1 | X)
## m7: R ~ A + B + C + (1 | X)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m5 5 -51.831 -39.168 30.916 -61.831
## m7 6 -49.835 -34.640 30.918 -61.835 0.0044 1 0.947
anova(m5, m9, test="Chisq")
## Data: data
## Models:
## m5: R ~ A + C + (1 | X)
## m9: R ~ A * C + (1 | X)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m5 5 -51.831 -39.168 30.915 -61.831
## m9 6 -51.586 -36.391 31.793 -63.586 1.7553 1 0.1852
Egg case does not matter.
plot(d$mass_diff, d$speed_diff,
xlab="Changes in Mass between T1 and T2 (g)",
ylab="Changes in Speed between T1 and T2 (m/s)")