source_path = "~/Desktop/git_repositories/SBB-dispersal/avbernat_working_on/Rsrc/"
script_names = c("center_flight_data.R", # Re-centers data
"clean_flight_data.R", # Loads and cleans data
"unique_flight_data.R",
"get_warnings.R",
"compare_models.R",
"regression_output.R", # Cleans regression outputs; prints in color or B&W
"AICprobabilities.R")
for (script in script_names) {
path = paste0(source_path, script)
source(path)
}
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
output_col = FALSE # Change to TRUE if working in Base R or RStudio; FALSE if generating an HTML
data <- read_flight_data("data/all_flight_data-Winter2020.csv")
data_all <- data[[1]]
data_tested <- data[[2]]
d <- create_delta_data(data_tested)
## Warning: `funs()` is deprecated as of dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
No mass_diff
d <- d %>%
filter(!is.na(mass_diff))
R = d$dist_diff
A = d$host_c
B = d$sex_c
C = d$wing2body
D = d$mass_diff
X = d$population
data<-data.frame(R, A, B, C, D, X)
model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 4-FF REMLF.R")
model_comparisonsAIC(model_script)
## [,1] [,2] [,3]
## AICs 5447.876 5448.226 5448.782
## models 4 9 7
## probs 0.08438142 0.07083455 0.05364102
##
## m4 R ~ D + (1 | X)
## m9 R ~ B + D + (1 | X)
## m7 R ~ A + D + (1 | X)
anova(m0, m4, test="Chisq") # mass matters
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m4: R ~ D + (1 | X)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m0 3 5450.7 5461.6 -2722.4 5444.7
## m4 4 5447.9 5462.4 -2719.9 5439.9 4.8538 1 0.02758 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m4, m9, test="Chisq")
## Data: data
## Models:
## m4: R ~ D + (1 | X)
## m9: R ~ B + D + (1 | X)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m4 4 5447.9 5462.4 -2719.9 5439.9
## m9 5 5448.2 5466.4 -2719.1 5438.2 1.65 1 0.199
anova(m4, m7, test="Chisq")
## Data: data
## Models:
## m4: R ~ D + (1 | X)
## m7: R ~ A + D + (1 | X)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m4 4 5447.9 5462.4 -2719.9 5439.9
## m7 5 5448.8 5466.9 -2719.4 5438.8 1.0939 1 0.2956
dist_model <- lmer(dist_diff ~ mass_diff + (1 | population), data=d, REML=FALSE)
tidy_regression(dist_model, is_color=output_col)
## lmer dist_diff ~ mass_diff + (1 | population) d FALSE
## AIC: 5447.876 5447.876
## (Intercept) coeff: -483.7403092 tval: -1.728892
## mass_diff coeff: -45487.1652921 tval: -2.222876
If a bug gains mass then its distance decreases between trials.
s.test <- paste("pval: ", shapiro.test(residuals(dist_model))$p.value)
qqnorm(resid(dist_model))
qqline(resid(dist_model))
text(-1, 5000, s.test)
d_fem <- d[d$sex == "F",] # very low sample size | also need to recenter
No mass_diff
R = d_fem$dist_diff
A = d_fem$egg_case
B = d_fem$wing2body
C = d_fem$mass_diff
X = d_fem$population
data<-data.frame(R, A, B, C, X)
model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 3-FF REMLF.R")
model_comparisonsAIC(model_script)
## [,1]
## AICs 1805.96
## models 18
## probs 0.966614
##
## m0 R ~ (1 | X)
Null model the best.
d_male <- d[d$sex == "M",] # need to recenter
No mass_diff
R = d_male$dist_diff
A = d_male$host_c
B = d_male$wing2body
C = d_male$mass_diff
X = d_male$population
data<-data.frame(R, A, B, C, X)
model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 3-FF REMLF.R")
model_comparisonsAIC(model_script)
## [,1]
## AICs 3620.796
## models 18
## probs 0.9977023
##
## m0 R ~ (1 | X)
Null model the best.
### Remove everyone who didn't fly (then remove distances = 0, if that didn't work fully)
data_flew <- data_tested[data_tested$flew_b == 1, ]
data_flew <- data_flew[data_flew$distance > 0, ] # 6 bugs
data_flew <- center_data(data_flew)
d <- create_delta_data(data_flew)
d$dist_diff_c = d$dist_diff - mean(d$dist_diff)
d$dist_diff_s = (d$dist_diff - mean(d$dist_diff)) / sd(d$dist_diff)
par(mfrow=c(2,2))
hist(d$dist_diff, col=4)
hist(d$mass_diff, col=4)
hist(d$wing2body_c, col=4)
hist(d$sym_dist, col=4)
**Testing mass_diff
## Test
tidy_regression(lm(dist_diff ~ sym_dist, data=d), is_color = output_col) # no effect of sym_dist
## lm dist_diff ~ sym_dist d
## AIC: 2564.668
## (Intercept) coeff: -153.3373638 Pr(>|t|): 0.7963755
## sym_dist coeff: -303.2871167 Pr(>|t|): 0.5595623
Removing sym dist and using wing2body.
No mass_diff
R = d$dist_diff
A = d$host_c
B = d$sex_c
C = d$wing2body
D = d$mass_diff
X = d$population
data<-data.frame(R, A, B, C, D, X)
model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 3-FF REMLF.R")
model_comparisonsAIC(model_script)
## [,1]
## AICs 2550.871
## models 18
## probs 0.9976969
##
## m0 R ~ (1 | X)
dist_model <- lmer(dist_diff ~ 1 + (1 | population), data=d, REML=FALSE)
## boundary (singular) fit: see ?isSingular
tidy_regression(dist_model, is_color=output_col)
## lmer dist_diff ~ 1 + (1 | population) d FALSE
## AIC: 2565.015 2565.015
## 1 coeff: -363.0690625 tval: -0.7736726
s.test <- paste("pval: ", shapiro.test(residuals(dist_model))$p.value)
qqnorm(resid(dist_model))
qqline(resid(dist_model))
text(-1, 5000, s.test)
Mass_diff
d <- d %>%
filter(!is.na(mass_diff))
R = d$dist_diff
A = d$host_c
B = d$sex_c
C = d$wing2body
D = d$mass_diff
X = d$population
data<-data.frame(R, A, B, C, D, X)
model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 4-FF REMLF.R")
model_comparisonsAIC(model_script)
## [,1] [,2] [,3] [,4] [,5]
## AICs 2565.015 2565.905 2566.167 2566.83 2566.895
## models 113 4 2 1 3
## probs 0.1337176 0.0856842 0.07518488 0.05397731 0.05223007
##
## m0 R ~ 1 + (1 | X)
## m4 R ~ D + (1 | X)
## m2 R ~ B + (1 | X)
## m1 R ~ A + (1 | X)
## m3 R ~ C + (1 | X)
anova(m0, m4, test="Chisq") # Adding D does not improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m4: R ~ D + (1 | X)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m0 3 2565.0 2573.6 -1279.5 2559.0
## m4 4 2565.9 2577.3 -1279.0 2557.9 1.1099 1 0.2921
Null is the best model again.
d_fem <- d[d$sex == "F",] # very low sample size | also need to recenter
No mass_diff
R = d_fem$dist_diff
A = d_fem$host_c
B = d_fem$wing2body
C = d_fem$mass_diff
X = d_fem$population
data<-data.frame(R, A, B, C, X)
model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 2-FF REMLF.R")
model_comparisonsAIC(model_script)
## [,1] [,2] [,3] [,4] [,5]
## AICs 535.1551 536.3418 536.5875 537.3108 538.7764
## models 5 2 1 3 4
## probs 0.392933 0.2170809 0.1919932 0.1337281 0.06426471
##
## m0 R ~ (1 | X)
## m2 R ~ B + (1 | X)
## m1 R ~ A + (1 | X)
## m3 R ~ A + B + (1 | X)
## m4 R ~ A * B + (1 | X)
anova(m0, m1, test="Chisq") # Adding A (or B) does not improve fit
## Data: data
## Models:
## m0: R ~ (1 | X)
## m1: R ~ A + (1 | X)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m0 3 535.16 538.93 -264.58 529.16
## m1 4 536.59 541.62 -264.29 528.59 0.5676 1 0.4512
mass_diff
model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 3-FF REMLF.R")
model_comparisonsAIC(model_script)
## [,1]
## AICs 519.0416
## models 18
## probs 0.9988957
##
## m0 R ~ (1 | X)
Null is the best model for both again.
dist_model <- lmer(dist_diff ~ 1 + (1 | population), data=d_fem, REML=FALSE)
## boundary (singular) fit: see ?isSingular
tidy_regression(dist_model, is_color=output_col)
## lmer dist_diff ~ 1 + (1 | population) d_fem FALSE
## AIC: 535.1551 535.1551
## 1 coeff: -1217.8142308 tval: -0.9770083
s.test <- paste("pval: ", shapiro.test(residuals(dist_model))$p.value)
qqnorm(resid(dist_model))
qqline(resid(dist_model))
text(-1, 5000, s.test)
egg_case
No mass_diff
R = d_fem$dist_diff
A = d_fem$egg_case
B = d_fem$wing2body
C = d_fem$mass_diff
X = d_fem$population
data<-data.frame(R, A, B, C, X)
model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 3-FF REMLF.R")
model_comparisonsAIC(model_script)
## [,1]
## AICs 519.0416
## models 18
## probs 0.9989612
##
## m0 R ~ (1 | X)
Null is the best fit again.
d_male <- d[d$sex == "M",] # need to recenter
No mass_diff
R = d_male$dist_diff
A = d_male$host_c
B = d_male$wing2body
C = d_male$mass_diff
X = d_male$population
data<-data.frame(R, A, B, C, X)
model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 2-FF REMLF.R")
model_comparisonsAIC(model_script)
## [,1] [,2] [,3] [,4]
## AICs 2032.329 2033.336 2034.057 2034.973
## models 5 1 2 3
## probs 0.4176423 0.2524608 0.1760721 0.1113632
##
## m0 R ~ (1 | X)
## m1 R ~ A + (1 | X)
## m2 R ~ B + (1 | X)
## m3 R ~ A + B + (1 | X)
anova(m0, m1, test="Chisq") # Adding A (or B) does not improve fit
## Data: data
## Models:
## m0: R ~ (1 | X)
## m1: R ~ A + (1 | X)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m0 3 2032.3 2040.2 -1013.2 2026.3
## m1 4 2033.3 2043.8 -1012.7 2025.3 0.9933 1 0.3189
mass_diff
model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 3-FF REMLF.R")
model_comparisonsAIC(model_script)
## [,1]
## AICs 2018.083
## models 18
## probs 0.9978775
##
## m0 R ~ (1 | X)
Null is the best model for both again.
dist_model <- lmer(dist_diff ~ 1 + (1 | population), data=d_male, REML=FALSE)
## boundary (singular) fit: see ?isSingular
tidy_regression(dist_model, is_color=output_col)
## lmer dist_diff ~ 1 + (1 | population) d_male FALSE
## AIC: 2032.329 2032.329
## 1 coeff: -145.1928431 tval: -0.294199
s.test <- paste("pval: ", shapiro.test(residuals(dist_model))$p.value)
qqnorm(resid(dist_model))
qqline(resid(dist_model))
text(-1, 5000, s.test)