Problems Noticing

Delta Speed

source_path = "~/Desktop/git_repositories/SBB-dispersal/avbernat_working_on/Rsrc/"

script_names = c("center_flight_data.R", # Re-centers data 
                 "clean_flight_data.R", # Loads and cleans data
                 "unique_flight_data.R",
                 "get_warnings.R", 
                 "compare_models.R",
                 "regression_output.R", # Cleans regression outputs; prints in color or B&W
                 "AICprobabilities.R",
                 "multinom_functions.R")

for (script in script_names) { 
  path = paste0(source_path, script)
  source(path) 
}
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
output_col = FALSE # Change to TRUE if working in Base R or RStudio; FALSE if generating an HTML

data <- read_flight_data("data/all_flight_data-Winter2020.csv")
data_all <- data[[1]]
data_tested <- data[[2]]
d <- create_delta_data(data_tested)
## Warning: `funs()` is deprecated as of dplyr 0.8.0.
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

Speed Diff ~ Mass Diff

## Strong effect of mass
tidy_regression(lm(speed_diff ~ mass_diff, data=d), is_color = output_col) 
## lm speed_diff ~ mass_diff d 
## AIC:  -151.3224 
## (Intercept)  coeff:  -0.0344167  Pr(>|t|):  0.002531144 *
## mass_diff    coeff:  -3.5049071  Pr(>|t|):  7.576785e-05 *
hist(d$speed_diff) # much more normally distributed data and later will see the residuals pass the Shapiro-Wilk test

No mass_diff

R = d$speed_diff
A = d$host_c
B = d$sex_c
C = d$sym_dist
D = d$mass_diff 
X = d$population 

data<-data.frame(R, A, B, C, D, X)

model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 3-FF REMLF.R")
model_comparisonsAIC(model_script)
##        [,1]      [,2]      [,3]       [,4]      [,5]       [,6]      
## AICs   -144.3183 -144.2522 -143.17    -143.1047 -142.88    -142.8391 
## models 2         6         4          10        11         8         
## probs  0.1771685 0.1714097 0.09978166 0.0965746 0.08631205 0.08456401
##        [,7]       [,8]      
## AICs   -142.2522  -142.1902 
## models 7          14        
## probs  0.06305828 0.06113514
## 
## m2   R ~ B + (1 | X)
## m6   R ~ B + C + (1 | X)
## m4   R ~ A + B + (1 | X)
## m10  R ~ B * C + (1 | X)
## m11  R ~ A * B + C + (1 | X)
## m8   R ~ A * B + (1 | X)
## m7   R ~ A + B + C + (1 | X)
## m14  R ~ A * B + A * C + (1 | X)
anova(m0, m2, test="Chisq") # Adding host matters
## refitting model(s) with ML (instead of REML)
## Data: data
## Models:
## m0: R ~ (1 | X)
## m2: R ~ B + (1 | X)
##    npar     AIC     BIC logLik deviance  Chisq Df Pr(>Chisq)   
## m0    3 -137.26 -126.35 71.631  -143.26                        
## m2    4 -144.32 -129.76 76.159  -152.32 9.0559  1   0.002619 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m2, m6, test="Chisq") # Adding sym_dist does not improve fit
## Data: data
## Models:
## m2: R ~ B + (1 | X)
## m6: R ~ B + C + (1 | X)
##    npar     AIC     BIC logLik deviance  Chisq Df Pr(>Chisq)
## m2    4 -144.32 -129.76 76.159  -152.32                     
## m6    5 -144.25 -126.06 77.126  -154.25 1.9339  1     0.1643
speed_model <- lmer(speed_diff ~ host_c + (1 | population), data=d, REML=FALSE)
## boundary (singular) fit: see ?isSingular
tidy_regression(speed_model, is_color=output_col)
## lmer speed_diff ~ host_c + (1 | population) d FALSE 
## AIC:  -136.2633 -136.2633 
## (Intercept)  coeff:  -0.053711   tval:  -4.120057
## host_c       coeff:  -0.0130537  tval:  -1.001323
  • no effect of host
s.test <- paste("pval: ", shapiro.test(residuals(speed_model))$p.value)
qqnorm(resid(speed_model))
qqline(resid(speed_model))
text(-1, 0.1, s.test)

Mass_diff

data <- data %>%
  filter(!is.na(D))

model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 4-FF REMLF.R")
model_comparisonsAIC(model_script)
##        [,1]       [,2]      
## AICs   -154.2555  -154.2304 
## models 9          14        
## probs  0.06599623 0.06517267
## 
## m9   R ~ B + D + (1 | X)
## m14  R ~ B + C + D + (1 | X)
anova(m9, m14, test="Chisq") # Adding C does not improve fit
## Data: data
## Models:
## m9: R ~ B + D + (1 | X)
## m14: R ~ B + C + D + (1 | X)
##     npar     AIC     BIC logLik deviance  Chisq Df Pr(>Chisq)
## m9     5 -154.26 -136.12 82.128  -164.26                     
## m14    6 -154.23 -132.47 83.115  -166.23 1.9749  1     0.1599
speed_model <- lmer(speed_diff ~ host_c + mass_diff + (1 | population), data=d, REML=FALSE)
## boundary (singular) fit: see ?isSingular
tidy_regression(speed_model, is_color=output_col)
## lmer speed_diff ~ host_c + mass_diff + (1 | population) d FALSE 
## AIC:  -148.6442 -148.6442 
## (Intercept)  coeff:  -0.0418829  tval:  -3.230437
## host_c       coeff:  -0.0146987  tval:  -1.151097
## mass_diff    coeff:  -3.5532517  tval:  -4.093025

Strong negative effect of mass_diff where a gain in mass leads to slower speeds.

wing2body

d <- d %>%
  filter(!is.na(mass_diff))

R = d$speed_diff
A = d$host_c
B = d$sex_c
C = d$wing2body_s
D = d$mass_diff 
X = d$population 

data<-data.frame(R, A, B, C, D, X)

model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 4-FF REMLF.R")
model_comparisonsAIC(model_script)
##        [,1]      
## AICs   -154.2555 
## models 9         
## probs  0.07515606
## 
## m9   R ~ B + D + (1 | X)

Same top model and wing2body makes no difference.

So what if it was only females, since they had the greatest mass changes?

Delta Speed - Females Only

d_fem <- d[d$sex == "F",] # low sample size 

No mass_diff

R = d_fem$speed_diff
A = d_fem$host_c
B = d_fem$sym_dist
C = d_fem$mass_diff 
X = d_fem$population 

data<-data.frame(R, A, B, C, X)

model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 2-FF REMLF.R")
model_comparisonsAIC(model_script)
##        [,1]      [,2]      [,3]      [,4]      [,5]     
## AICs   -39.07379 -38.87856 -38.73697 -37.58076 -37.08753
## models 2         1         5         3         4        
## probs  0.278053  0.2521934 0.2349571 0.1318013 0.1029951
## 
## m2   R ~ B + (1 | X)
## m1   R ~ A + (1 | X)
## m0   R ~ (1 | X)
## m3   R ~ A + B + (1 | X)
## m4   R ~ A * B + (1 | X)
anova(m0, m1, test="Chisq") # Adding A does not improve fit
## Data: data
## Models:
## m0: R ~ (1 | X)
## m1: R ~ A + (1 | X)
##    npar     AIC     BIC logLik deviance  Chisq Df Pr(>Chisq)
## m0    3 -38.737 -31.139 22.369  -44.737                     
## m1    4 -38.879 -28.748 23.439  -46.879 2.1416  1     0.1434
anova(m0, m2, test="Chisq") # Adding B does not improve fit
## Data: data
## Models:
## m0: R ~ (1 | X)
## m2: R ~ B + (1 | X)
##    npar     AIC     BIC logLik deviance  Chisq Df Pr(>Chisq)
## m0    3 -38.737 -31.139 22.369  -44.737                     
## m2    4 -39.074 -28.943 23.537  -47.074 2.3368  1     0.1263
speed_model <- lmer(speed_diff ~ 1 + (1 | population), data=d_fem, REML=FALSE)
## boundary (singular) fit: see ?isSingular
tidy_regression(speed_model, is_color=output_col)
## lmer speed_diff ~ 1 + (1 | population) d_fem FALSE 
## AIC:  -38.73697 -38.73697 
## 1    coeff:  -0.0892473  tval:  -4.524086

mass_diff

model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 3-FF REMLF.R")
model_comparisonsAIC(model_script)
##        [,1]      [,2]      [,3]      [,4]       [,5]       [,6]      
## AICs   -51.83097 -51.58628 -50.76808 -50.49779  -50.13392  -49.72637 
## models 5         9         6         3          7          11        
## probs  0.1917199 0.1696414 0.1126845 0.09843993 0.08206484 0.06693566
##        [,7]       [,8]      
## AICs   -49.72453  -49.28154 
## models 12         14        
## probs  0.06687436 0.05358764
## 
## m5   R ~ A + C + (1 | X)
## m9   R ~ A * C + (1 | X)
## m6   R ~ B + C + (1 | X)
## m3   R ~ C + (1 | X)
## m7   R ~ A + B + C + (1 | X)
## m11  R ~ A * B + C + (1 | X)
## m12  R ~ A * C + B + (1 | X)
## m14  R ~ A * B + A * C + (1 | X)
anova(m0, m1, test="Chisq") # strange. get error "refitting model(s) with ML (instead of REML)" even though these have been refit to REML=FALSE...
## refitting model(s) with ML (instead of REML)
## Data: data
## Models:
## m0: R ~ (1 | X)
## m1: R ~ A + (1 | X)
##    npar     AIC     BIC logLik deviance  Chisq Df Pr(>Chisq)
## m0    3 -38.737 -31.139 22.369  -44.737                     
## m1    4 -38.879 -28.748 23.439  -46.879 2.1416  1     0.1434
anova(m0, m3, test="Chisq") # Adding C does improve fit
## refitting model(s) with ML (instead of REML)
## Data: data
## Models:
## m0: R ~ (1 | X)
## m3: R ~ C + (1 | X)
##    npar     AIC     BIC logLik deviance  Chisq Df Pr(>Chisq)    
## m0    3 -38.737 -31.139 22.369  -44.737                         
## m3    4 -50.498 -40.367 29.249  -58.498 13.761  1  0.0002076 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m0, m2, test="Chisq") # Adding B does not improve fit
## refitting model(s) with ML (instead of REML)
## Data: data
## Models:
## m0: R ~ (1 | X)
## m2: R ~ B + (1 | X)
##    npar     AIC     BIC logLik deviance  Chisq Df Pr(>Chisq)
## m0    3 -38.737 -31.139 22.369  -44.737                     
## m2    4 -39.074 -28.943 23.537  -47.074 2.3368  1     0.1263
speed_model <- lmer(speed_diff ~ mass_diff + (1 | population), data=d_fem, REML=FALSE)
## boundary (singular) fit: see ?isSingular
tidy_regression(speed_model, is_color=output_col)
## lmer speed_diff ~ mass_diff + (1 | population) d_fem FALSE 
## AIC:  -50.49779 -50.49779 
## (Intercept)  coeff:  -0.0749062  tval:  -4.006749
## mass_diff    coeff:  -3.4552535  tval:  -3.851106

Strong negative effect of mass.

s.test <- paste("pval: ", shapiro.test(residuals(speed_model))$p.value)
qqnorm(resid(speed_model))
qqline(resid(speed_model))
text(-1, 0.1, s.test)

wing2body

R = d_fem$speed_diff
A = d_fem$host_c
B = d_fem$wing2body_s
C = d_fem$mass_diff 
X = d_fem$population 

data<-data.frame(R, A, B, C, X)

model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 3-FF REMLF.R")
model_comparisonsAIC(model_script)
##        [,1]      [,2]      [,3]      [,4]       [,5]      [,6]       [,7]      
## AICs   -51.83097 -51.58628 -50.49779 -49.91554  -49.7924  -49.12472  -49.03249 
## models 5         9         3         7          12        6          13        
## probs  0.2282522 0.2019667 0.1171977 0.08759596 0.0823654 0.05898737 0.05632892
## 
## m5   R ~ A + C + (1 | X)
## m9   R ~ A * C + (1 | X)
## m3   R ~ C + (1 | X)
## m7   R ~ A + B + C + (1 | X)
## m12  R ~ A * C + B + (1 | X)
## m6   R ~ B + C + (1 | X)
## m13  R ~ B * C + A + (1 | X)
anova(m3, m5, test="Chisq") # Adding C marginally improves fit
## Data: data
## Models:
## m3: R ~ C + (1 | X)
## m5: R ~ A + C + (1 | X)
##    npar     AIC     BIC logLik deviance  Chisq Df Pr(>Chisq)  
## m3    4 -50.498 -40.367 29.249  -58.498                       
## m5    5 -51.831 -39.168 30.916  -61.831 3.3332  1     0.0679 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m5, m9, test="Chisq") # Adding A*C does not improve fit
## Data: data
## Models:
## m5: R ~ A + C + (1 | X)
## m9: R ~ A * C + (1 | X)
##    npar     AIC     BIC logLik deviance  Chisq Df Pr(>Chisq)
## m5    5 -51.831 -39.168 30.915  -61.831                     
## m9    6 -51.586 -36.391 31.793  -63.586 1.7553  1     0.1852
anova(m3, m6, test="Chisq") # Adding B does not improve fit
## Data: data
## Models:
## m3: R ~ C + (1 | X)
## m6: R ~ B + C + (1 | X)
##    npar     AIC     BIC logLik deviance  Chisq Df Pr(>Chisq)
## m3    4 -50.498 -40.367 29.249  -58.498                     
## m6    5 -49.125 -36.462 29.562  -59.125 0.6269  1     0.4285
speed_model <- lmer(speed_diff ~ host_c + mass_diff + (1 | population), data=d_fem, REML=FALSE)
## boundary (singular) fit: see ?isSingular
tidy_regression(speed_model, is_color=output_col)
## lmer speed_diff ~ host_c + mass_diff + (1 | population) d_fem FALSE 
## AIC:  -51.83097 -51.83097 
## (Intercept)  coeff:  -0.0928511  tval:  -4.466844
## host_c       coeff:  -0.037953   tval:  -1.842181
## mass_diff    coeff:  -3.5562885  tval:  -4.027595

Strong negative effect of mass. Marginal negative effect of host.

s.test <- paste("pval: ", shapiro.test(residuals(speed_model))$p.value)
qqnorm(resid(speed_model))
qqline(resid(speed_model))
text(-1, 0.1, s.test)

eggs

R = d_fem$speed_diff
A = d_fem$host_c
B = d_fem$egg_case
C = d_fem$mass_diff 
X = d_fem$population 

data<-data.frame(R, A, B, C, X)

model_script = paste0(source_path,"generic models-gaussian glmer 1-RF + 3-FF REMLF.R")
model_comparisonsAIC(model_script)
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
##        [,1]      [,2]      [,3]      [,4]       [,5]      
## AICs   -51.83097 -51.58628 -50.49779 -49.83539  -49.58877 
## models 5         9         3         7          12        
## probs  0.2485817 0.2199551 0.127636  0.09165015 0.08101814
## 
## m5   R ~ A + C + (1 | X)
## m9   R ~ A * C + (1 | X)
## m3   R ~ C + (1 | X)
## m7   R ~ A + B + C + (1 | X)
## m12  R ~ A * C + B + (1 | X)
anova(m5, m7, test="Chisq") # Adding B does not improve fit
## Data: data
## Models:
## m5: R ~ A + C + (1 | X)
## m7: R ~ A + B + C + (1 | X)
##    npar     AIC     BIC logLik deviance  Chisq Df Pr(>Chisq)
## m5    5 -51.831 -39.168 30.916  -61.831                     
## m7    6 -49.835 -34.640 30.918  -61.835 0.0044  1      0.947
anova(m5, m9, test="Chisq")
## Data: data
## Models:
## m5: R ~ A + C + (1 | X)
## m9: R ~ A * C + (1 | X)
##    npar     AIC     BIC logLik deviance  Chisq Df Pr(>Chisq)
## m5    5 -51.831 -39.168 30.915  -61.831                     
## m9    6 -51.586 -36.391 31.793  -63.586 1.7553  1     0.1852

Egg case does not matter.

Plots

plot(d$mass_diff, d$speed_diff, 
     xlab="Changes in Mass between T1 and T2 (g)",
     ylab="Changes in Speed between T1 and T2 (m/s)")