Delta Distance Modeling

All Bugs

Problems Noticing

  • qqplots of all models are not normal and the best fit model is the null model for all. Might need to retransform the distance_diff data either by standardizing it or performing another transformation. (NOTE: only did analyses for bugs that flew and had a distance > 0.)

Summary

  • (For all bugs) Mass difference between trials matter (gains mass then looses distance).
  • (For bugs that flew some distance) Distance or delta distance has nothing to do with the wing2body ratio. It rarely has anything to do with sex or mass or eggs laid. But from the flight_distance.Rmd script, it seems to be heavily dependent on time of day.

All Data

source_path = "~/Desktop/git_repositories/SBB-dispersal/avbernat_working_on/Rsrc/"

script_names = c("center_flight_data.R", # Re-centers data 
                 "clean_flight_data.R", # Loads and cleans data
                 "unique_flight_data.R",
                 "get_warnings.R", 
                 "compare_models.R",
                 "regression_output.R", # Cleans regression outputs; prints in color or B&W
                 "AICprobabilities.R")

for (script in script_names) { 
  path = paste0(source_path, script)
  source(path) 
}
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
output_col = FALSE # Change to TRUE if working in Base R or RStudio; FALSE if generating an HTML

data <- read_flight_data("data/all_flight_data-Winter2020.csv")
data_all <- data[[1]]
data_tested <- data[[2]]
d <- create_delta_data(data_tested)
## Warning: `funs()` is deprecated as of dplyr 0.8.0.
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

No mass_diff

d <- d %>%
  filter(!is.na(mass_diff))

R = d$dist_diff
A = d$host_c
B = d$sex_c
C = d$wing2body
D = d$mass_diff 
X = d$population 

data<-data.frame(R, A, B, C, D, X)

model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 4-FF REMLF.R")
model_comparisonsAIC(model_script)
##        [,1]       [,2]       [,3]      
## AICs   5447.876   5448.226   5448.782  
## models 4          9          7         
## probs  0.08438142 0.07083455 0.05364102
## 
## m4   R ~ D + (1 | X)
## m9   R ~ B + D + (1 | X)
## m7   R ~ A + D + (1 | X)
anova(m0, m4, test="Chisq") # mass matters
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m4: R ~ D + (1 | X)
##    npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)  
## m0    3 5450.7 5461.6 -2722.4   5444.7                       
## m4    4 5447.9 5462.4 -2719.9   5439.9 4.8538  1    0.02758 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m4, m9, test="Chisq")
## Data: data
## Models:
## m4: R ~ D + (1 | X)
## m9: R ~ B + D + (1 | X)
##    npar    AIC    BIC  logLik deviance Chisq Df Pr(>Chisq)
## m4    4 5447.9 5462.4 -2719.9   5439.9                    
## m9    5 5448.2 5466.4 -2719.1   5438.2  1.65  1      0.199
anova(m4, m7, test="Chisq")
## Data: data
## Models:
## m4: R ~ D + (1 | X)
## m7: R ~ A + D + (1 | X)
##    npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## m4    4 5447.9 5462.4 -2719.9   5439.9                     
## m7    5 5448.8 5466.9 -2719.4   5438.8 1.0939  1     0.2956
dist_model <- lmer(dist_diff ~ mass_diff + (1 | population), data=d, REML=FALSE)
tidy_regression(dist_model, is_color=output_col)
## lmer dist_diff ~ mass_diff + (1 | population) d FALSE 
## AIC:  5447.876 5447.876 
## (Intercept)  coeff:  -483.7403092    tval:  -1.728892
## mass_diff    coeff:  -45487.1652921  tval:  -2.222876

If a bug gains mass then its distance decreases between trials.

s.test <- paste("pval: ", shapiro.test(residuals(dist_model))$p.value)
qqnorm(resid(dist_model))
qqline(resid(dist_model))
text(-1, 5000, s.test)

Females Only

d_fem <- d[d$sex == "F",] # very low sample size | also need to recenter

No mass_diff

R = d_fem$dist_diff
A = d_fem$egg_case
B = d_fem$wing2body
C = d_fem$mass_diff 
X = d_fem$population 

data<-data.frame(R, A, B, C, X)
model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 3-FF REMLF.R")
model_comparisonsAIC(model_script)
##        [,1]    
## AICs   1805.96 
## models 18      
## probs  0.966614
## 
## m0   R ~ (1 | X)

Null model the best.

Males Only

d_male <- d[d$sex == "M",] # need to recenter

No mass_diff

R = d_male$dist_diff
A = d_male$host_c
B = d_male$wing2body
C = d_male$mass_diff 
X = d_male$population 

data<-data.frame(R, A, B, C, X)
model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 3-FF REMLF.R")
model_comparisonsAIC(model_script)
##        [,1]     
## AICs   3620.796 
## models 18       
## probs  0.9977023
## 
## m0   R ~ (1 | X)

Null model the best.

Bugs That Flew Some Distance

### Remove everyone who didn't fly (then remove distances = 0, if that didn't work fully)
data_flew <- data_tested[data_tested$flew_b == 1, ]
data_flew <- data_flew[data_flew$distance > 0, ] # 6 bugs 
data_flew <- center_data(data_flew)
d <- create_delta_data(data_flew)
d$dist_diff_c = d$dist_diff - mean(d$dist_diff)
d$dist_diff_s =  (d$dist_diff - mean(d$dist_diff)) / sd(d$dist_diff)

Visualizing Distributions

par(mfrow=c(2,2))
hist(d$dist_diff, col=4)
hist(d$mass_diff, col=4)
hist(d$wing2body_c, col=4)
hist(d$sym_dist, col=4)

Delta Distance

**Testing mass_diff

## Test
tidy_regression(lm(dist_diff ~ sym_dist, data=d), is_color = output_col) # no effect of sym_dist
## lm dist_diff ~ sym_dist d 
## AIC:  2564.668 
## (Intercept)  coeff:  -153.3373638    Pr(>|t|):  0.7963755
## sym_dist     coeff:  -303.2871167    Pr(>|t|):  0.5595623

Removing sym dist and using wing2body.

No mass_diff

R = d$dist_diff
A = d$host_c
B = d$sex_c
C = d$wing2body
D = d$mass_diff 
X = d$population 

data<-data.frame(R, A, B, C, D, X)

model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 3-FF REMLF.R")
model_comparisonsAIC(model_script)
##        [,1]     
## AICs   2550.871 
## models 18       
## probs  0.9976969
## 
## m0   R ~ (1 | X)
dist_model <- lmer(dist_diff ~ 1 + (1 | population), data=d, REML=FALSE)
## boundary (singular) fit: see ?isSingular
tidy_regression(dist_model, is_color=output_col)
## lmer dist_diff ~ 1 + (1 | population) d FALSE 
## AIC:  2565.015 2565.015 
## 1    coeff:  -363.0690625    tval:  -0.7736726
s.test <- paste("pval: ", shapiro.test(residuals(dist_model))$p.value)
qqnorm(resid(dist_model))
qqline(resid(dist_model))
text(-1, 5000, s.test)

Mass_diff

d <- d %>%
  filter(!is.na(mass_diff))

R = d$dist_diff
A = d$host_c
B = d$sex_c
C = d$wing2body
D = d$mass_diff 
X = d$population 

data<-data.frame(R, A, B, C, D, X)

model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 4-FF REMLF.R")
model_comparisonsAIC(model_script)
##        [,1]      [,2]      [,3]       [,4]       [,5]      
## AICs   2565.015  2565.905  2566.167   2566.83    2566.895  
## models 113       4         2          1          3         
## probs  0.1337176 0.0856842 0.07518488 0.05397731 0.05223007
## 
## m0   R ~ 1 + (1 | X)
## m4   R ~ D + (1 | X)
## m2   R ~ B + (1 | X)
## m1   R ~ A + (1 | X)
## m3   R ~ C + (1 | X)
anova(m0, m4, test="Chisq") # Adding D does not improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m4: R ~ D + (1 | X)
##    npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## m0    3 2565.0 2573.6 -1279.5   2559.0                     
## m4    4 2565.9 2577.3 -1279.0   2557.9 1.1099  1     0.2921

Null is the best model again.

Females Only

d_fem <- d[d$sex == "F",] # very low sample size | also need to recenter

No mass_diff

R = d_fem$dist_diff
A = d_fem$host_c
B = d_fem$wing2body
C = d_fem$mass_diff 
X = d_fem$population 

data<-data.frame(R, A, B, C, X)
model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 2-FF REMLF.R")
model_comparisonsAIC(model_script)
##        [,1]     [,2]      [,3]      [,4]      [,5]      
## AICs   535.1551 536.3418  536.5875  537.3108  538.7764  
## models 5        2         1         3         4         
## probs  0.392933 0.2170809 0.1919932 0.1337281 0.06426471
## 
## m0   R ~ (1 | X)
## m2   R ~ B + (1 | X)
## m1   R ~ A + (1 | X)
## m3   R ~ A + B + (1 | X)
## m4   R ~ A * B + (1 | X)
anova(m0, m1, test="Chisq") # Adding A (or B) does not improve fit
## Data: data
## Models:
## m0: R ~ (1 | X)
## m1: R ~ A + (1 | X)
##    npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## m0    3 535.16 538.93 -264.58   529.16                     
## m1    4 536.59 541.62 -264.29   528.59 0.5676  1     0.4512

mass_diff

model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 3-FF REMLF.R")
model_comparisonsAIC(model_script)
##        [,1]     
## AICs   519.0416 
## models 18       
## probs  0.9988957
## 
## m0   R ~ (1 | X)

Null is the best model for both again.

dist_model <- lmer(dist_diff ~ 1 + (1 | population), data=d_fem, REML=FALSE)
## boundary (singular) fit: see ?isSingular
tidy_regression(dist_model, is_color=output_col)
## lmer dist_diff ~ 1 + (1 | population) d_fem FALSE 
## AIC:  535.1551 535.1551 
## 1    coeff:  -1217.8142308   tval:  -0.9770083
s.test <- paste("pval: ", shapiro.test(residuals(dist_model))$p.value)
qqnorm(resid(dist_model))
qqline(resid(dist_model))
text(-1, 5000, s.test)

egg_case

No mass_diff

R = d_fem$dist_diff
A = d_fem$egg_case
B = d_fem$wing2body
C = d_fem$mass_diff 
X = d_fem$population 

data<-data.frame(R, A, B, C, X)
model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 3-FF REMLF.R")
model_comparisonsAIC(model_script)
##        [,1]     
## AICs   519.0416 
## models 18       
## probs  0.9989612
## 
## m0   R ~ (1 | X)

Null is the best fit again.

Males Only

d_male <- d[d$sex == "M",] # need to recenter

No mass_diff

R = d_male$dist_diff
A = d_male$host_c
B = d_male$wing2body
C = d_male$mass_diff 
X = d_male$population 

data<-data.frame(R, A, B, C, X)
model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 2-FF REMLF.R")
model_comparisonsAIC(model_script)
##        [,1]      [,2]      [,3]      [,4]     
## AICs   2032.329  2033.336  2034.057  2034.973 
## models 5         1         2         3        
## probs  0.4176423 0.2524608 0.1760721 0.1113632
## 
## m0   R ~ (1 | X)
## m1   R ~ A + (1 | X)
## m2   R ~ B + (1 | X)
## m3   R ~ A + B + (1 | X)
anova(m0, m1, test="Chisq") # Adding A (or B) does not improve fit
## Data: data
## Models:
## m0: R ~ (1 | X)
## m1: R ~ A + (1 | X)
##    npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## m0    3 2032.3 2040.2 -1013.2   2026.3                     
## m1    4 2033.3 2043.8 -1012.7   2025.3 0.9933  1     0.3189

mass_diff

model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 3-FF REMLF.R")
model_comparisonsAIC(model_script)
##        [,1]     
## AICs   2018.083 
## models 18       
## probs  0.9978775
## 
## m0   R ~ (1 | X)

Null is the best model for both again.

dist_model <- lmer(dist_diff ~ 1 + (1 | population), data=d_male, REML=FALSE)
## boundary (singular) fit: see ?isSingular
tidy_regression(dist_model, is_color=output_col)
## lmer dist_diff ~ 1 + (1 | population) d_male FALSE 
## AIC:  2032.329 2032.329 
## 1    coeff:  -145.1928431    tval:  -0.294199
s.test <- paste("pval: ", shapiro.test(residuals(dist_model))$p.value)
qqnorm(resid(dist_model))
qqline(resid(dist_model))
text(-1, 5000, s.test)