Binomial Modeling: flight_yes

Testing glmer() and Covariates

Without Mass

With Mass

Morphology

Morphology + Mass + Sex

Cleaning the Data

rm(list=ls())
output_col = FALSE # Recommend changing this to TRUE if working in Base R or RStudio, and FALSE if generating an html
source("src/clean_flight_data.R") # Script that loads and cleans up the data

## 
## Attaching package: 'lubridate'

## The following object is masked from 'package:base':
## 
##     date

source("src/regression_output.R") # A script that cleans up regression outputs and prints in color or black and white
source("src/center_flight_data.R")
source("get_warnings.R")

data <- read_flight_data("data/all_flight_data-Winter2020.csv")
data_all <- data[[1]]
data_tested <- data[[2]]

d <- data_tested %>%
   group_by(ID, sex,population, site, host_plant, latitude, longitude, total_eggs, 
            beak, thorax, wing, body, w_morph, morph_notes, tested,
            host_c, sex_c, w_morph_c, lat_c, sym_dist, sym_dist_s, total_eggs_c, 
            beak_c, thorax_c, thorax_s, body_c, wing_c, wing2body, wing2body_c, wing2body_s) %>%
   summarise_all(funs(list(na.omit(.))))

## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.

d$num_flew <- 0
d$num_notflew <- 0
d$average_mass <- 0

for(row in 1:length(d$flew_b)){
  
  n_flew <- sum(d$flew_b[[row]] == 1) # total number of times did fly among trails
  d$num_flew[[row]] <- n_flew 
  
  n_notflew <- sum(d$flew_b[[row]] == 0) # total number of times did not fly among trails
  d$num_notflew[[row]] <- n_notflew
  
  avg_mass <- mean(d$mass[[row]])
  d$average_mass[[row]] <- avg_mass

}

d <- select(d, -filename, -channel_letter, -set_number)
d <- center_data(d, is_not_binded = FALSE)
d

## # A tibble: 333 x 63
## # Groups:   ID, sex, population, site, host_plant, latitude, longitude,
## #   total_eggs, beak, thorax, wing, body, w_morph, morph_notes, tested, host_c,
## #   sex_c, w_morph_c, lat_c, sym_dist, sym_dist_s, total_eggs_c, beak_c,
## #   thorax_c, thorax_s, body_c, wing_c, wing2body, wing2body_c [333]
##    ID    sex   population site  host_plant latitude longitude total_eggs  beak
##    <fct> <fct> <fct>      <fct> <fct>         <dbl>     <dbl>      <dbl> <dbl>
##  1 1     M     Plantatio… Areg… C. corind…     25.0     -80.6         NA  5.5 
##  2 2     M     Plantatio… Areg… C. corind…     25.0     -80.6         NA  5.64
##  3 3     M     Plantatio… Areg… C. corind…     25.0     -80.6         NA  5.75
##  4 4     M     Plantatio… Areg… C. corind…     25.0     -80.6         NA  6.21
##  5 5     F     Plantatio… Areg… C. corind…     25.0     -80.6        209  7.47
##  6 6     M     Plantatio… Foun… C. corind…     25.0     -80.6         NA  5.76
##  7 7     F     Plantatio… Foun… C. corind…     25.0     -80.6          8  8.19
##  8 8     F     Plantatio… Foun… C. corind…     25.0     -80.6         92  8.39
##  9 9     M     Plantatio… Foun… C. corind…     25.0     -80.6         NA  6.1 
## 10 10    M     Plantatio… Foun… C. corind…     25.0     -80.6         NA  6.09
## # … with 323 more rows, and 54 more variables: thorax <dbl>, wing <dbl>,
## #   body <dbl>, w_morph <fct>, morph_notes <fct>, tested <fct>, host_c <dbl>,
## #   sex_c <dbl>, w_morph_c <dbl>, lat_c <dbl>, sym_dist <dbl>,
## #   sym_dist_s <dbl>, total_eggs_c <dbl>, beak_c <dbl>, thorax_c <dbl>,
## #   thorax_s <dbl>, body_c <dbl>, wing_c <dbl>, wing2body <dbl>,
## #   wing2body_c <dbl>, wing2body_s <dbl>, trial_type <list>, chamber <list>,
## #   average_speed <list>, total_flight_time <list>, distance <list>,
## #   shortest_flying_bout <list>, longest_flying_bout <list>,
## #   total_duration <list>, max_speed <list>, test_date <list>,
## #   time_start <list>, time_end <list>, flew <list>, flight_type <list>,
## #   mass <list>, EWM <list>, flew_b <list>, eggs_b <list>,
## #   minute_duration <list>, minute_duration_c <list>, min_from_IncStart <dbl>,
## #   min_from_IncStart_c <dbl>, min_from_IncStart_s <dbl>,
## #   days_from_start <list>, days_from_start_c <list>, mass_c <list>,
## #   mass_s <list>, average_mass <dbl>, average_mass_c <dbl>,
## #   average_mass_s <dbl>, trial_type_og <list>, num_flew <dbl>,
## #   num_notflew <dbl>

Note

Modeling the dataset as it was before led to lots of converging issues that couldn’t be easily resolved. Now we are using a modified dataset, d, to model the data.

Testing Models and Covariates

# test mixed effect model
model <- glmer(cbind(num_flew, num_notflew)~sex_c*host_c + (1|population), data=d, family=binomial)

## boundary (singular) fit: see ?isSingular

tidy_regression(model, is_color=output_col) ####MLC: updated model, but won't converge with population as a RF

## glmer cbind(num_flew, num_notflew) ~ sex_c * host_c + (1 | population) d binomial 
## AIC:  690.6034 690.6034 
## (Intercept)      coeff:  -0.0048142  Pr(>|t|):  0.9617955
## sex_c            coeff:  -0.6587864  Pr(>|t|):  5.571849e-11 *
## host_c           coeff:  -0.0673863  Pr(>|t|):  0.5025515
## sex_c:host_c     coeff:  0.1645915   Pr(>|t|):  0.1014941

model <- glmer(cbind(num_flew, num_notflew)~sex_c*host_c + (1|site), data=d, family=binomial)
tidy_regression(model, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ sex_c * host_c + (1 | site) d binomial 
## AIC:  686.0047 686.0047 
## (Intercept)      coeff:  -0.0092744  Pr(>|t|):  0.9468471
## sex_c            coeff:  -0.703499   Pr(>|t|):  4.425505e-11 *
## host_c           coeff:  -0.0460686  Pr(>|t|):  0.7408722
## sex_c:host_c     coeff:  0.1855617   Pr(>|t|):  0.07700007 .

getME(model, "lower")

## [1] 0

## AB: What is a random factor here exactly? B/c it seems to matter which we use.
## AB: I tried this and it didn't work but is there a way to cbind() random factors so I can put in chamber, test date, or test time?

Experimental, Biological, and Morphological Effects

## glm cbind(num_flew, num_notflew) ~ average_mass_c binomial d 
## AIC:  690.0321 
## (Intercept)      coeff:  0.2314802   Pr(>|t|):  0.007290253 *
## average_mass_c   coeff:  -31.1658278 Pr(>|t|):  3.565029e-14 *

## glm cbind(num_flew, num_notflew) ~ total_eggs binomial d 
## AIC:  184.4609 
## (Intercept)  coeff:  0.0580002   Pr(>|t|):  0.8111338
## total_eggs   coeff:  -0.0117451  Pr(>|t|):  1.730675e-05 *

## glm cbind(num_flew, num_notflew) ~ beak_c binomial d 
## AIC:  734.2146 
## (Intercept)  coeff:  0.23902 Pr(>|t|):  0.003973372 *
## beak_c       coeff:  -0.4085962  Pr(>|t|):  9.565269e-07 *

## glm cbind(num_flew, num_notflew) ~ thorax_c binomial d 
## AIC:  739.1983 
## (Intercept)  coeff:  0.2385862   Pr(>|t|):  0.003890438 *
## thorax_c     coeff:  -1.2298779  Pr(>|t|):  1.111691e-05 *

## glm cbind(num_flew, num_notflew) ~ body_c binomial d 
## AIC:  750.4139 
## (Intercept)  coeff:  0.2389879   Pr(>|t|):  0.003520911 *
## body_c       coeff:  -0.2303836  Pr(>|t|):  0.003002383 *

## glm cbind(num_flew, num_notflew) ~ wing_c binomial d 
## AIC:  757.3505 
## (Intercept)  coeff:  0.2364567   Pr(>|t|):  0.003681042 *
## wing_c       coeff:  -0.1417783  Pr(>|t|):  0.1546876

Without Mass

R1 = d$num_flew
R2 = d$num_notflew
A = d$host_c
B = d$sex_c
C = d$sym_dist
D = d$average_mass_c 
X = d$population # or population get the same top model. Variance is 0 if use pop, not 0 if use site. However, variance is 0 if use site and average_mass (down below). 

data<-data.frame(R1, R2, A, B, C, D, X)

source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1 RF + 3-FF.R"))

##        [,1]      [,2]      [,3]      [,4]      [,5]       [,6]      [,7]      
## AICs   690.4785  690.6034  691.2458  692.0327  692.4669   693.2453  693.3787  
## models 2         8         4         6         11         7         15        
## probs  0.2196987 0.2063894 0.1496899 0.1010033 0.08129227 0.0550839 0.05152821
## 
## m2   cbind(R1, R2) ~ B + (1 | X)
## m8   cbind(R1, R2) ~ A * B + (1 | X)
## m4   cbind(R1, R2) ~ A + B + (1 | X)
## m6   cbind(R1, R2) ~ B + C + (1 | X)
## m11  cbind(R1, R2) ~ A * B + C + (1 | X)
## m7   cbind(R1, R2) ~ A + B + C + (1 | X)
## m15  cbind(R1, R2) ~ A * B + B * C + (1 | X)

length(errors$warnings)

## [1] 0

anova(m4, m8, test="Chisq") # Adding A*B does not improve fit if use pop as RF | marginally improves fit if use site as RF

## Data: data
## Models:
## m4: cbind(R1, R2) ~ A + B + (1 | X)
## m8: cbind(R1, R2) ~ A * B + (1 | X)
##    Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## m4  4 691.25 706.48 -341.62   683.25                         
## m8  5 690.60 709.64 -340.30   680.60 2.6424      1      0.104

anova(m2, m4, test="Chisq") # Adding A does not improve fit

## Data: data
## Models:
## m2: cbind(R1, R2) ~ B + (1 | X)
## m4: cbind(R1, R2) ~ A + B + (1 | X)
##    Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## m2  3 690.48 701.90 -342.24   684.48                         
## m4  4 691.25 706.48 -341.62   683.25 1.2326      1     0.2669

anova(m2, m6, test="Chisq") # Adding C does not improve fit

## Data: data
## Models:
## m2: cbind(R1, R2) ~ B + (1 | X)
## m6: cbind(R1, R2) ~ B + C + (1 | X)
##    Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## m2  3 690.48 701.90 -342.24   684.48                         
## m6  4 692.03 707.27 -342.02   684.03 0.4458      1     0.5043

nomass_model_all <- glmer(cbind(num_flew, num_notflew) ~ sex_c + (1|population), data=d, family=binomial)
tidy_regression(nomass_model_all, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ sex_c + (1 | population) d binomial 
## AIC:  690.4785 690.4785 
## (Intercept)  coeff:  -0.0004595  Pr(>|t|):  0.9969752
## sex_c        coeff:  -0.7362334  Pr(>|t|):  5.062086e-16 *

nomass_model_all <- glmer(cbind(num_flew, num_notflew) ~ sex_c + (1|site), data=d, family=binomial)
tidy_regression(nomass_model_all, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ sex_c + (1 | site) d binomial 
## AIC:  685.6479 685.6479 
## (Intercept)  coeff:  0.006371    Pr(>|t|):  0.9602811
## sex_c        coeff:  -0.7878123  Pr(>|t|):  6.065057e-16 *

strong negative effect of sex where if F then less number of times will fly

With Mass

source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1-RF + 4-FF.R"))

##        [,1]      [,2]      
## AICs   682.1004  682.5135  
## models 63        26        
## probs  0.1016377 0.08266881
## 
## m63  cbind(R1, R2) ~ A * D + C * D + B + (1 | X)
## m26  cbind(R1, R2) ~ A * D + B + (1 | X)

length(errors$warnings)

## [1] 0

anova(m26, m36, test="Chisq") # Adding C does not improve fit

## Data: data
## Models:
## m26: cbind(R1, R2) ~ A * D + B + (1 | X)
## m36: cbind(R1, R2) ~ A * D + B + C + (1 | X)
##     Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## m26  6 682.51 705.36 -335.26   670.51                         
## m36  7 684.32 710.98 -335.16   670.32 0.1954      1     0.6585

anova(m12, m26, test="Chisq") # Adding A*D does improve fit

## Data: data
## Models:
## m12: cbind(R1, R2) ~ A + B + D + (1 | X)
## m26: cbind(R1, R2) ~ A * D + B + (1 | X)
##     Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)  
## m12  5 685.67 704.72 -337.84   675.67                           
## m26  6 682.51 705.36 -335.26   670.51 5.1609      1     0.0231 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Wanted to check whether sym_dist is making much of an impact overall since m26 was a better fit and you can see it’s far from significant.

tidy_regression(glmer(cbind(num_flew, num_notflew) ~ sym_dist + (1|population), data=d, family=binomial), is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ sym_dist + (1 | population) d binomial 
## AIC:  760.9329 760.9329 
## (Intercept)  coeff:  0.1891344   Pr(>|t|):  0.1982647
## sym_dist     coeff:  0.0156086   Pr(>|t|):  0.8614592

mass_model_all <- glmer(cbind(num_flew, num_notflew) ~ host_c * average_mass_c + sex_c + (1|population), data=d, family=binomial)

## boundary (singular) fit: see ?isSingular

tidy_regression(mass_model_all, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ host_c * average_mass_c + sex_c + (1 | population) d binomial 
## AIC:  682.5135 682.5135 
## (Intercept)              coeff:  0.0755669   Pr(>|t|):  0.4658345
## host_c                   coeff:  -0.1293181  Pr(>|t|):  0.1868277
## average_mass_c           coeff:  -11.585146  Pr(>|t|):  0.1079791
## sex_c                    coeff:  -0.4151303  Pr(>|t|):  0.007602201 *
## host_c:average_mass_c    coeff:  10.4421094  Pr(>|t|):  0.02068315 *

Now there’s a conflicting interaction between average mass and host*average_mass…and there’s

no effect of host
no effect of average_mass_c

How do you graph these models? How would you graph cbind(num_flew, num_notflew)?

plot( cbind(num_flew, num_notflew) ~ cbind(host_c, average_mass), data=d , col=rangi2)

How does num_flew or num_notflew vary with host plant?

coplot(num_flew ~ average_mass_c | host_c, data=d)

coplot(num_notflew~ average_mass_c | host_c, data=d)

Plot the interaction between host*average_mass:

Consistent with the model and graphs: If GRT and had an average mass below the mean of the population, then you flew less times. But if were GRT and had an average mass above the mean of the population, then you flew more times.

Overall mass vs. num_flew trend:

gf_point(num_flew ~ average_mass, data=d) + 
  geom_smooth(method='lm')

## `geom_smooth()` using formula 'y ~ x'

Then break it between host and average mass:

p1 <- gf_point(num_flew ~ average_mass, col=~host_plant, data=d[d$average_mass_c < 0,], title="(a)   avgmass_c <0") + 
  geom_smooth(method='lm')
p2 <- gf_point(num_flew ~ average_mass, col=~host_plant, data=d[d$average_mass_c > 0,], title="(b)   avgmass_c >0") + 
  geom_smooth(method='lm')
grid.arrange(p1,p2, nrow=2)

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

p3 <- gf_point(num_notflew ~ average_mass, col=~host_plant, data=d[d$average_mass_c < 0,], title="(a)   avgmass_c <0") + 
  geom_smooth(method='lm')
p4 <- gf_point(num_notflew ~ average_mass, col=~host_plant, data=d[d$average_mass_c > 0,], title="(b)   avgmass_c >0") + 
  geom_smooth(method='lm')
grid.arrange(p3,p4, nrow=2)

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

GRT flyers: Even when they weighed more than the average mass of the population, they flew more times, which breaks the overall trend between number times bug flew vs. average mass which is typically negative.

Morphology

d <- d %>%
  filter(!is.na(body))
d$thorax_c <- d$thorax - mean(d$thorax)
d$wing_c <- d$wing - mean(d$wing)
d$body_c <- d$body - mean(d$body)

R1 = d$num_flew
R2 = d$num_notflew
A = d$thorax_c
B = d$body_c
C = d$wing_c 
X = d$population

data<-data.frame(R1, R2, A, B, C, X)

source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1 RF + 3-FF.R"))

##        [,1]      [,2]      [,3]     [,4]      
## AICs   696.4932  697.7959  698.4702 699.7618  
## models 15        16        17       13        
## probs  0.4471192 0.2331006 0.166388 0.08722792
## 
## m15  cbind(R1, R2) ~ A * B + B * C + (1 | X)
## m16  cbind(R1, R2) ~ A * C + B * C + (1 | X)
## m17  cbind(R1, R2) ~ A * B + A * C + B * C + (1 | X)
## m13  cbind(R1, R2) ~ B * C + A + (1 | X)

length(errors$warnings)

## [1] 1

anova(m15, m17, test="Chisq") # Adding A*C does not improve fit

## Data: data
## Models:
## m15: cbind(R1, R2) ~ A * B + B * C + (1 | X)
## m17: cbind(R1, R2) ~ A * B + A * C + B * C + (1 | X)
##     Df    AIC    BIC  logLik deviance Chisq Chi Df Pr(>Chisq)
## m15  7 696.49 723.15 -341.25   682.49                        
## m17  8 698.47 728.94 -341.24   682.47 0.023      1     0.8795

anova(m13, m15, test="Chisq") # Adding A*B does improve fit

## Data: data
## Models:
## m13: cbind(R1, R2) ~ B * C + A + (1 | X)
## m15: cbind(R1, R2) ~ A * B + B * C + (1 | X)
##     Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)  
## m13  6 699.76 722.61 -343.88   687.76                           
## m15  7 696.49 723.15 -341.25   682.49 5.2686      1    0.02171 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

morph_model_all <- glmer(cbind(num_flew, num_notflew) ~ thorax_c * body_c + body_c * wing_c + (1|population), data=d, family=binomial)
tidy_regression(morph_model_all, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ thorax_c * body_c + body_c * wing_c + (1 | population) d binomial 
## AIC:  696.4932 696.4932 
## (Intercept)      coeff:  0.2804294   Pr(>|t|):  0.07044802 .
## thorax_c         coeff:  -2.549434   Pr(>|t|):  0.003596569 *
## body_c           coeff:  -1.2548354  Pr(>|t|):  0.007228665 *
## wing_c           coeff:  2.2612361   Pr(>|t|):  6.326959e-06 *
## thorax_c:body_c  coeff:  1.3630407   Pr(>|t|):  0.02490653 *
## body_c:wing_c    coeff:  -0.6384384  Pr(>|t|):  0.003529522 *

strong negative effect of thorax where the wider the thorax, the less times the bug flies
strong negative effect of body where the longer the body, the less times a bug flies
strong positive effect of wing where the longer the wing, the more times the bug flies
strong positive effect of thorax*body where the longer the body and wider the thorax, the more times a bug flies (hm seems conflicting to the single effects)
negative effect of of body*wing where the longer the body and wing, the less times the bug flies

R1 = d$num_flew
R2 = d$num_notflew
A = d$thorax_c
B = d$wing2body_c
X = d$population

data<-data.frame(R1, R2, A, B, X)

source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1-RF + 2-FF.R"))

##        [,1]      [,2]     
## AICs   699.1221  700.8564 
## models 4         3        
## probs  0.7041422 0.2958542
## 
## m4   cbind(R1, R2) ~ A * B + (1 | X)
## m3   cbind(R1, R2) ~ A + B + (1 | X)

length(errors$warnings)

## [1] 0

anova(m3, m4, test="Chisq") # Adding A*B improves fit

## Data: data
## Models:
## m3: cbind(R1, R2) ~ A + B + (1 | X)
## m4: cbind(R1, R2) ~ A * B + (1 | X)
##    Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)  
## m3  4 700.86 716.09 -346.43   692.86                           
## m4  5 699.12 718.16 -344.56   689.12 3.7342      1    0.05331 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

morph_model_all2 <- glmer(cbind(num_flew, num_notflew) ~ wing2body_c * thorax_c + (1|population), data=d, family=binomial)
tidy_regression(morph_model_all2, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ wing2body_c * thorax_c + (1 | population) d binomial 
## AIC:  699.1221 699.1221 
## (Intercept)              coeff:  0.1644896   Pr(>|t|):  0.2796688
## wing2body_c              coeff:  30.4358638  Pr(>|t|):  3.405945e-08 *
## thorax_c                 coeff:  -1.4513019  Pr(>|t|):  8.423364e-07 *
## wing2body_c:thorax_c     coeff:  -37.3187734 Pr(>|t|):  0.05638751 .

strong positive effect of wing2body where the greater the ratio the more times a bug flies
negative effect of thorax where the the wider the thorax the less times a bug flies
marginal strong negative effect of wing2body*thorax where the wider the thorax and large the ratio, the less times a bug flies

# check for collinearity
covariates <- data.frame(d$thorax, d$wing2body, d$average_mass)
pairs(covariates)

Exponential relationships between mass and thorax. Thorax can only get so big - mass can increase but thorax caps out (top right).

covariates <- data.frame(d$thorax, d$wing, d$body, d$average_mass)
pairs(covariates)

Seems like once morphology caps out so does mass not stretch too far.

Morphology + Mass + Sex

R1 = d$num_flew
R2 = d$num_notflew
A = d$thorax_c
B = d$wing2body_c
C = d$average_mass_c
X = d$population

data<-data.frame(R1, R2, A, B, C, X)

source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1 RF + 3-FF.R"))

##        [,1]      [,2]     [,3]      
## AICs   660.5244  662.4846 665.1787  
## models 15        17       11        
## probs  0.6458782 0.242385 0.06302092
## 
## m15  cbind(R1, R2) ~ A * B + B * C + (1 | X)
## m17  cbind(R1, R2) ~ A * B + A * C + B * C + (1 | X)
## m11  cbind(R1, R2) ~ A * B + C + (1 | X)

length(errors$warnings) # 13 models failed to converge

## [1] 13

anova(m11, m15, test="Chisq") # Adding B*C improves fit

## Data: data
## Models:
## m11: cbind(R1, R2) ~ A * B + C + (1 | X)
## m15: cbind(R1, R2) ~ A * B + B * C + (1 | X)
##     Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)   
## m11  6 665.18 688.03 -326.59   653.18                            
## m15  7 660.52 687.18 -323.26   646.52 6.6543      1   0.009892 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(m15, m17, test="Chisq") # Adding A*C does not improve fit

## Data: data
## Models:
## m15: cbind(R1, R2) ~ A * B + B * C + (1 | X)
## m17: cbind(R1, R2) ~ A * B + A * C + B * C + (1 | X)
##     Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## m15  7 660.52 687.18 -323.26   646.52                         
## m17  8 662.48 692.95 -323.24   646.48 0.0398      1     0.8418

multi_model_all <- glmer(cbind(num_flew, num_notflew) ~ thorax_c * wing2body_c + wing2body_c * average_mass + (1|population), data=d, family=binomial) 
tidy_regression(multi_model_all, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ thorax_c * wing2body_c + wing2body_c * average_mass + (1 | population) d binomial 
## AIC:  660.5244 660.5244 
## (Intercept)                  coeff:  2.0903463   Pr(>|t|):  2.37968e-07 *
## thorax_c                     coeff:  0.742338    Pr(>|t|):  0.1527069
## wing2body_c                  coeff:  -39.7258582 Pr(>|t|):  9.264061e-07 *
## average_mass                 coeff:  -36.1155189 Pr(>|t|):  7.888075e-07 *
## thorax_c:wing2body_c         coeff:  -108.5821295    Pr(>|t|):  2.376879e-07 *
## wing2body_c:average_mass     coeff:  1062.7542387    Pr(>|t|):  7.274993e-27 *

coefficients are huge. Probably a collinearity issue between morph measurements * mass.

R1 = d$num_flew
R2 = d$num_notflew
A = d$thorax_c
B = d$wing2body_c
C = d$sex_c
X = d$population

data<-data.frame(R1, R2, A, B, C, X)

source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1 RF + 3-FF.R"))

##        [,1]      [,2]      [,3]     [,4]     [,5]       [,6]       [,7]      
## AICs   677.5983  678.1801  678.4836 679.3227 680.0466   680.1788   680.4729  
## models 11        6         15       14       10         7          17        
## probs  0.2571693 0.1922604 0.16519  0.108588 0.07561179 0.07077347 0.06109487
## 
## m11  cbind(R1, R2) ~ A * B + C + (1 | X)
## m6   cbind(R1, R2) ~ B + C + (1 | X)
## m15  cbind(R1, R2) ~ A * B + B * C + (1 | X)
## m14  cbind(R1, R2) ~ A * B + A * C + (1 | X)
## m10  cbind(R1, R2) ~ B * C + (1 | X)
## m7   cbind(R1, R2) ~ A + B + C + (1 | X)
## m17  cbind(R1, R2) ~ A * B + A * C + B * C + (1 | X)

length(errors$warnings) # no models failed

## [1] 0

anova(m11, m15, test="Chisq") # Adding B*C does not improve fit

## Data: data
## Models:
## m11: cbind(R1, R2) ~ A * B + C + (1 | X)
## m15: cbind(R1, R2) ~ A * B + B * C + (1 | X)
##     Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## m11  6 677.60 700.45 -332.80   665.60                         
## m15  7 678.48 705.14 -332.24   664.48 1.1147      1     0.2911

anova(m11, m14, test="Chisq") # Adding A*C does not improve fit

## Data: data
## Models:
## m11: cbind(R1, R2) ~ A * B + C + (1 | X)
## m14: cbind(R1, R2) ~ A * B + A * C + (1 | X)
##     Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## m11  6 677.60 700.45 -332.80   665.60                         
## m14  7 679.32 705.98 -332.66   665.32 0.2757      1     0.5996

anova(m7, m11, test="Chisq") # Adding A*B does improve fit

## Data: data
## Models:
## m7: cbind(R1, R2) ~ A + B + C + (1 | X)
## m11: cbind(R1, R2) ~ A * B + C + (1 | X)
##     Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)  
## m7   5 680.18 699.22 -335.09   670.18                           
## m11  6 677.60 700.45 -332.80   665.60 4.5805      1    0.03234 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

multi_model_all2 <- glmer(cbind(num_flew, num_notflew) ~ thorax_c * wing2body_c + sex_c + (1|population), data=d, family=binomial) 
tidy_regression(multi_model_all2, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ thorax_c * wing2body_c + sex_c + (1 | population) d binomial 
## AIC:  677.5983 677.5983 
## (Intercept)              coeff:  -0.0027673  Pr(>|t|):  0.9846484
## thorax_c                 coeff:  -0.0093963  Pr(>|t|):  0.981983
## wing2body_c              coeff:  17.3564108  Pr(>|t|):  0.004548248 *
## sex_c                    coeff:  -0.6549209  Pr(>|t|):  1.908829e-06 *
## thorax_c:wing2body_c     coeff:  -40.2152503 Pr(>|t|):  0.03633867 *

no effect of thorax
strong positve effect of ratio where the larger the ratio the more times a bug flew
negative effect of sex where if F then less times bug will fly
strong negative effect of thorax*ratio where the larger the ratio and wider the thorax, the less times a bug flew.

# weak positive relationship between thorax vs. wing2body 
p1 <- gf_point(thorax ~ wing2body, col=~num_flew, data=d) + 
  geom_smooth(method='lm')
p2 <- gf_point(thorax ~ wing2body, data=d[d$num_flew == 2,], title="num_flew=2") + 
  geom_smooth(method='lm')
p3 <- gf_point(thorax ~ wing2body, data=d[d$num_flew == 1,], title="num_flew=1") + 
  geom_smooth(method='lm')
p4 <- gf_point(thorax ~ wing2body, data=d[d$num_flew == 0,], title="num_flew=0") + 
  geom_smooth(method='lm')

grid.arrange(p1,p2,p3,p4, ncol=2)

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

As wing2body and thorax increases, num_flew decreases too, but its really the thorax shrinking that seems to be driving this.

gf_point(num_flew ~ thorax, data=d)  + 
  geom_smooth(method='lm')

## `geom_smooth()` using formula 'y ~ x'

gf_point(num_flew ~ wing2body, data=d)  + 
  geom_smooth(method='lm')

## `geom_smooth()` using formula 'y ~ x'

R1 = d$num_flew
R2 = d$num_notflew
A = d$thorax_c
B = d$wing2body_c
C = d$sex_c
D = d$average_mass
X = d$population

data<-data.frame(R1, R2, A, B, C, D, X)

source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1-RF + 4-FF.R"))

##        [,1]      [,2]      [,3]     [,4]       [,5]       [,6]       [,7]      
## AICs   658.3051  658.3448  659.4618 659.4827   659.5901   660.038    660.3253  
## models 106       74        58       112        94         100        97        
## probs  0.1493435 0.1464084 0.083755 0.08288477 0.07854843 0.06278998 0.05438723
## 
## m106     cbind(R1, R2) ~ A * B + A * C + A * D + B * C + B * D + (1 | 
##     X)
## m74  cbind(R1, R2) ~ A * B + B * C + B * D + (1 | X)
## m58  cbind(R1, R2) ~ A * B + B * D + C + (1 | X)
## m112     cbind(R1, R2) ~ A * B + A * C + A * D + B * C + B * D + C * D + 
##     (1 | X)
## m94  cbind(R1, R2) ~ A * B + A * C + B * C + B * D + (1 | X)
## m100     cbind(R1, R2) ~ A * B + B * C + B * D + C * D + (1 | X)
## m97  cbind(R1, R2) ~ A * B + A * D + B * C + B * D + (1 | X)

length(errors$warnings) # 68 models failed + omodels are WAY too complicated. All the top models have multiple interactions b/c I'm guessing, they're correlated in some ways.

## [1] 61

anova(m94, m106, test="Chisq") # Adding A*D marginally improves fit

## Data: data
## Models:
## m94: cbind(R1, R2) ~ A * B + A * C + B * C + B * D + (1 | X)
## m106: cbind(R1, R2) ~ A * B + A * C + A * D + B * C + B * D + (1 | 
## m106:     X)
##      Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)  
## m94  10 659.59 697.67 -319.80   639.59                           
## m106 11 658.31 700.19 -318.15   636.31 3.2851      1    0.06991 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(m97, m106, test="Chisq") # Adding A*C improves fit

## Data: data
## Models:
## m97: cbind(R1, R2) ~ A * B + A * D + B * C + B * D + (1 | X)
## m106: cbind(R1, R2) ~ A * B + A * C + A * D + B * C + B * D + (1 | 
## m106:     X)
##      Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)  
## m97  10 660.33 698.41 -320.16   640.33                           
## m106 11 658.31 700.19 -318.15   636.31 4.0202      1    0.04496 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#multi_model <- glmer(cbind(num_flew, num_notflew) ~ thorax_c * wing2body_c + thorax_c * sex_c + thorax_c * average_mass + wing2body_c * sex_c + wing2body_c * average_mass  + (1|population), data=d, family=binomial) # model failed to converge 
#tidy_regression(multi_model, is_color=output_col)

Summary

tidy_regression(nomass_model_all, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ sex_c + (1 | site) d binomial 
## AIC:  685.6479 685.6479 
## (Intercept)  coeff:  0.006371    Pr(>|t|):  0.9602811
## sex_c        coeff:  -0.7878123  Pr(>|t|):  6.065057e-16 *

tidy_regression(mass_model_all, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ host_c * average_mass_c + sex_c + (1 | population) d binomial 
## AIC:  682.5135 682.5135 
## (Intercept)              coeff:  0.0755669   Pr(>|t|):  0.4658345
## host_c                   coeff:  -0.1293181  Pr(>|t|):  0.1868277
## average_mass_c           coeff:  -11.585146  Pr(>|t|):  0.1079791
## sex_c                    coeff:  -0.4151303  Pr(>|t|):  0.007602201 *
## host_c:average_mass_c    coeff:  10.4421094  Pr(>|t|):  0.02068315 *

Why this host*average_mass interaction? GRT flyers: Even when they weighed more than the average mass of the population, they flew more times, which breaks the overall trend between number times bug flew vs. average mass which is typically negative. (Refer to plots in the “All Trials” tab)

tidy_regression(morph_model_all, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ thorax_c * body_c + body_c * wing_c + (1 | population) d binomial 
## AIC:  696.4932 696.4932 
## (Intercept)      coeff:  0.2804294   Pr(>|t|):  0.07044802 .
## thorax_c         coeff:  -2.549434   Pr(>|t|):  0.003596569 *
## body_c           coeff:  -1.2548354  Pr(>|t|):  0.007228665 *
## wing_c           coeff:  2.2612361   Pr(>|t|):  6.326959e-06 *
## thorax_c:body_c  coeff:  1.3630407   Pr(>|t|):  0.02490653 *
## body_c:wing_c    coeff:  -0.6384384  Pr(>|t|):  0.003529522 *

tidy_regression(morph_model_all2, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ wing2body_c * thorax_c + (1 | population) d binomial 
## AIC:  699.1221 699.1221 
## (Intercept)              coeff:  0.1644896   Pr(>|t|):  0.2796688
## wing2body_c              coeff:  30.4358638  Pr(>|t|):  3.405945e-08 *
## thorax_c                 coeff:  -1.4513019  Pr(>|t|):  8.423364e-07 *
## wing2body_c:thorax_c     coeff:  -37.3187734 Pr(>|t|):  0.05638751 .

Why this wing2body*thorax interaction? As wing2body and thorax increases, num_flew decreases too, but its really the thorax shrinking that seems to be driving this.

tidy_regression(multi_model_all, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ thorax_c * wing2body_c + wing2body_c * average_mass + (1 | population) d binomial 
## AIC:  660.5244 660.5244 
## (Intercept)                  coeff:  2.0903463   Pr(>|t|):  2.37968e-07 *
## thorax_c                     coeff:  0.742338    Pr(>|t|):  0.1527069
## wing2body_c                  coeff:  -39.7258582 Pr(>|t|):  9.264061e-07 *
## average_mass                 coeff:  -36.1155189 Pr(>|t|):  7.888075e-07 *
## thorax_c:wing2body_c         coeff:  -108.5821295    Pr(>|t|):  2.376879e-07 *
## wing2body_c:average_mass     coeff:  1062.7542387    Pr(>|t|):  7.274993e-27 *

tidy_regression(multi_model_all2, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ thorax_c * wing2body_c + sex_c + (1 | population) d binomial 
## AIC:  677.5983 677.5983 
## (Intercept)              coeff:  -0.0027673  Pr(>|t|):  0.9846484
## thorax_c                 coeff:  -0.0093963  Pr(>|t|):  0.981983
## wing2body_c              coeff:  17.3564108  Pr(>|t|):  0.004548248 *
## sex_c                    coeff:  -0.6549209  Pr(>|t|):  1.908829e-06 *
## thorax_c:wing2body_c     coeff:  -40.2152503 Pr(>|t|):  0.03633867 *

Huge coefficients! I’m sure this is all due to collinearity. Thoughts?

All Trials (Old Method) Summary

nomass_model_all_old<-glmer(flew_b~sex_c*host_c + (1|ID) + (1|trial_type), family=binomial, data=data_tested)
tidy_regression(nomass_model_all_old, is_color=output_col)

## glmer flew_b ~ sex_c * host_c + (1 | ID) + (1 | trial_type) data_tested binomial 
## AIC:  714.6409 714.6409 
## (Intercept)      coeff:  -0.2909258  Pr(>|t|):  0.5897965
## sex_c            coeff:  -1.8480135  Pr(>|t|):  0.0005216503 *
## host_c           coeff:  -0.1968491  Pr(>|t|):  0.4865478
## sex_c:host_c     coeff:  0.4843427   Pr(>|t|):  0.1109208

It’s possible that females from GRT are more likley to fly because they are colonizers.

# There isn't a mass_model_all_old because too many models failed to converge

data_tested <- data_tested %>%
  filter(!is.na(body))
data_tested <- center_data(data_tested)

morph_model_all_old<-glmer(flew_b~ thorax_c * body_c + body_c * wing_c + (1|ID), family=binomial, data=data_tested) 
tidy_regression(morph_model_all_old, is_color=output_col) # same morph model as the morph_model_all

## glmer flew_b ~ thorax_c * body_c + body_c * wing_c + (1 | ID) data_tested binomial 
## AIC:  739.881 739.881 
## (Intercept)      coeff:  0.657076    Pr(>|t|):  0.004401535 *
## thorax_c         coeff:  -4.203661   Pr(>|t|):  0.01688148 *
## body_c           coeff:  -2.2457843  Pr(>|t|):  0.02205122 *
## wing_c           coeff:  3.8423882   Pr(>|t|):  0.0003701166 *
## thorax_c:body_c  coeff:  2.2419667   Pr(>|t|):  0.05387419 .
## body_c:wing_c    coeff:  -1.1049738  Pr(>|t|):  0.009640642 *

morph_model_all_old2<-glmer(flew_b~ thorax_c + wing2body_c + (1|trial_type) + (1|ID), family=binomial, data=data_tested)
tidy_regression(morph_model_all_old2, is_color=output_col)

## glmer flew_b ~ thorax_c + wing2body_c + (1 | trial_type) + (1 | ID) data_tested binomial 
## AIC:  731.1757 731.1757 
## (Intercept)  coeff:  0.3960016   Pr(>|t|):  0.3805652
## thorax_c     coeff:  -3.3415059  Pr(>|t|):  0.0001888262 *
## wing2body_c  coeff:  69.1783151  Pr(>|t|):  1.838582e-05 *

This morph model is a better fit model than the morph_model_all_old model, but HUGE coefficient for wing2body_c. Why?

Delta Flight Response (T1 vs. T2)

Introduction

Clean the Data

Flew Diff ~ Mass Diff

Flew Diff ~ Egg Diff

Introduction

Some graphs I generated illustrated how changes in mass or egg-laying lead to changes in flight response, speed, and distance. Below, I run analyses on changes in flight response due to changes in mass or egg-laying that will help interpret and describe those graphs. Other graphs such as speed and distance changes will be found in the speed and distance scripts.

To perfom these analyses, a new variable will be created called “flew_diff” which calculates the flight response differences between T1 and T2. If flew_diff = -1, then the bug flew in T1 but not T2. If flew_diff = 0, then the bug either did not fly in either or flew in both T1 and T2 (no flight response). If flew_diff = 1, then the bug flew in T2 but not T1. Since this response variable is no longer binomial, I will need to use multicategorical logit models (refer to Chapter 6 of Introduction to Categorical Data Analysis by Alan Agresti). Below I’ve written out notes on performing these analyses.

What we are interested in is a multicategorical, nomial response variable. When the response variable is nominal, there is no natural order among the response variable categories (unordered categories). When the response variable is multicategorical, its multicategory models assume that the counts in the categories of \(Y\) have a multinomial distribution.

Let J = number of categories for \(Y\).

\(\pi_1,...\pi_J\) = the response probabilities where \(\sum_j \pi_j = 1\).

With \(n\) independent observations, the probability distribution for the number of outcomes of the J types is the multinomial. It specifices the probability for each possible way the \(n\) observations can fall in the J categories. Multicategory logit models simultaneously use all pairs of categories by specifying the odds of outocme in one category instead of another.

Logit models for nomial response variables pair each category with a baseline category. The choice of the baseline category is arbitrary. When the last category (J) is the baseline, the baseline-category logits are

\(\log (\frac{\pi_j}{\pi_J}), j = 1, ..., J - 1\).

Given that the response falls in category j or category J, this is the log odds that the response is j. For J = 3, for instance, the model uses \(\log(\pi_1/\pi_3)\) and \(\log(\pi_2/\pi_3)\). The baseline-category logit model with a predictor x is

\(\log \frac{\pi_j}{\pi_J} = \alpha_J + \beta_j x, j = 1, ..., J - 1\)

The model has J - 1 equations, with separate parameters for each. The effects vary according to the category paried with the baseline. When J = 2, this model simplifies to a single equaiton for \(\log(\pi_1/\pi_2) = logit(\pi_1)\), resulting in ordinary logistic regression for binary responses.

So how do these pair of categories determine equations for all other pairs of categories? Here is an arbitrary pair of categories a and b that follows the general equation above,

\(\log (\frac{\pi_a}{\pi_b}) = \log ( \frac{\pi_a/pi_J}{\pi_b/pi_J}) = \log (\frac{\pi_a}{\pi_J}) - \log (\frac{\pi_b}{\pi_J})\)

\(= (\alpha_a + \beta_a x) - (\alpha_b + \beta_b x)\)

\(= (\alpha_a - \alpha_b) + (\beta_a - \beta_b) x\)

So, the equaiton for categories a and b has the form \(\alpha + \beta x\) with intercept parameter \(\alpha = (\alpha_a - \alpha_b)\) and with slope parameter \(\beta = (\beta_a - \beta_b)\).

Let’s apply it to our data now:

Cleaning the Data

rm(list=ls())
output_col = FALSE # Recommend changing this to TRUE if working in Base R or RStudio, and FALSE if generating an html
source("src/clean_flight_data.R") # Script that loads and cleans up the data
source("src/regression_output.R") # A script that cleans up regression outputs and prints in color or black and white
source("src/center_flight_data.R")
source("get_warnings.R")

data <- read_flight_data("data/all_flight_data-Winter2020.csv")
data_all <- data[[1]]
data_tested <- data[[2]]

d <- data_tested %>%
   group_by(ID, sex,population, site, host_plant, latitude, longitude, total_eggs, 
            beak, thorax, wing, body, w_morph, morph_notes, tested,
            host_c, sex_c, w_morph_c, lat_c, sym_dist, sym_dist_s, total_eggs_c, 
            beak_c, thorax_c, thorax_s, body_c, wing_c, wing2body, wing2body_c, wing2body_s) %>%
   summarise_all(funs(list(na.omit(.))))

d$num_flew <- 0
d$num_notflew <- 0
d$average_mass <- 0

d$mass_diff <- 0
d$flew_diff <- 0
d$dist_diff <- 0
d$speed_diff <- 0

for(row in 1:length(d$flew_b)){
  
  n_flew <- sum(d$flew_b[[row]] == 1) # total number of times did fly among trails
  d$num_flew[[row]] <- n_flew 
  
  n_notflew <- sum(d$flew_b[[row]] == 0) # total number of times did not fly among trails
  d$num_notflew[[row]] <- n_notflew
  
  avg_mass <- mean(d$mass[[row]])
  d$average_mass[[row]] <- avg_mass
  
  # mass, flight, distance, and speed changes between T1 and T2
  
  d$mass_diff[[row]] <- d$mass[[row]][2] - d$mass[[row]][1]  # T2 - T1
  d$flew_diff[[row]] <- d$flew_b[[row]][2] - d$flew_b[[row]][1]  # T2 - T1
  d$dist_diff[[row]] <- d$distance[[row]][2] - d$distance[[row]][1]  # T2 - T1
  d$speed_diff[[row]] <- d$average_speed[[row]][2] - d$average_speed[[row]][1]  # T2 - T1
  d$egg_diff[[row]] <- d$eggs_b[[row]][2] - d$eggs_b[[row]][1]  # T2 - T1

}

## Warning: Unknown or uninitialised column: `egg_diff`.

d <- select(d, -filename, -channel_letter, -set_number)

d # NA's generated are good because that means it's accounted only for bugs that flew in both trials

## # A tibble: 333 x 68
## # Groups:   ID, sex, population, site, host_plant, latitude, longitude,
## #   total_eggs, beak, thorax, wing, body, w_morph, morph_notes, tested, host_c,
## #   sex_c, w_morph_c, lat_c, sym_dist, sym_dist_s, total_eggs_c, beak_c,
## #   thorax_c, thorax_s, body_c, wing_c, wing2body, wing2body_c [333]
##    ID    sex   population site  host_plant latitude longitude total_eggs  beak
##    <fct> <fct> <fct>      <fct> <fct>         <dbl>     <dbl>      <dbl> <dbl>
##  1 1     M     Plantatio… Areg… C. corind…     25.0     -80.6         NA  5.5 
##  2 2     M     Plantatio… Areg… C. corind…     25.0     -80.6         NA  5.64
##  3 3     M     Plantatio… Areg… C. corind…     25.0     -80.6         NA  5.75
##  4 4     M     Plantatio… Areg… C. corind…     25.0     -80.6         NA  6.21
##  5 5     F     Plantatio… Areg… C. corind…     25.0     -80.6        209  7.47
##  6 6     M     Plantatio… Foun… C. corind…     25.0     -80.6         NA  5.76
##  7 7     F     Plantatio… Foun… C. corind…     25.0     -80.6          8  8.19
##  8 8     F     Plantatio… Foun… C. corind…     25.0     -80.6         92  8.39
##  9 9     M     Plantatio… Foun… C. corind…     25.0     -80.6         NA  6.1 
## 10 10    M     Plantatio… Foun… C. corind…     25.0     -80.6         NA  6.09
## # … with 323 more rows, and 59 more variables: thorax <dbl>, wing <dbl>,
## #   body <dbl>, w_morph <fct>, morph_notes <fct>, tested <fct>, host_c <dbl>,
## #   sex_c <dbl>, w_morph_c <dbl>, lat_c <dbl>, sym_dist <dbl>,
## #   sym_dist_s <dbl>, total_eggs_c <dbl>, beak_c <dbl>, thorax_c <dbl>,
## #   thorax_s <dbl>, body_c <dbl>, wing_c <dbl>, wing2body <dbl>,
## #   wing2body_c <dbl>, wing2body_s <dbl>, trial_type <list>, chamber <list>,
## #   average_speed <list>, total_flight_time <list>, distance <list>,
## #   shortest_flying_bout <list>, longest_flying_bout <list>,
## #   total_duration <list>, max_speed <list>, test_date <list>,
## #   time_start <list>, time_end <list>, flew <list>, flight_type <list>,
## #   mass <list>, EWM <list>, flew_b <list>, eggs_b <list>,
## #   minute_duration <list>, minute_duration_c <list>, min_from_IncStart <list>,
## #   min_from_IncStart_c <list>, min_from_IncStart_s <list>,
## #   days_from_start <list>, days_from_start_c <list>, mass_c <list>,
## #   mass_s <list>, average_mass <dbl>, average_mass_c <list>,
## #   average_mass_s <list>, trial_type_og <list>, num_flew <dbl>,
## #   num_notflew <dbl>, mass_diff <dbl>, flew_diff <dbl>, dist_diff <dbl>,
## #   speed_diff <dbl>, egg_diff <dbl>

Below I used the multinom function from the nnet package to estimate a multinomial logistic regression model. There are other functions in other R packages capable of multinomial regression. I chose the multinom function because it does not require the data to be reshaped (as the mlogit package does) and to mirror the example code found in Hilbe’s Logistic Regression Models.

First, I need to choose the level of our outcome that we wish to use as our baseline and specify this in the relevel function. Then, I ran our model using multinom. The multinom package does not include p-value calculation for the regression coefficients, so I calculate p-values using Wald tests (here z-tests).

Flew Diff ~ Mass Diff

df <- d %>%
  filter(!is.na(mass_diff), !is.na(flew_diff)) 
  
df <- df[with(df, order(mass_diff)),]

df$flew_diff <- relevel(as.factor(df$flew_diff), ref = "0")

Null model:

null <- multinom(flew_diff ~ 1, data = df)

## # weights:  6 (2 variable)
## initial  value 305.414216 
## iter  10 value 174.928010
## iter  10 value 174.928009
## iter  10 value 174.928009
## final  value 174.928009 
## converged

Mass_diff model:

model <- multinom(flew_diff ~ mass_diff, data = df)

## # weights:  9 (4 variable)
## initial  value 305.414216 
## iter  10 value 166.730466
## final  value 166.154287 
## converged

s <- summary(model) # multinom doesn't compute pvalues so need to do it by hand.
z <- s$coefficients/s$standard.errors  #  calculate p-values using Wald tests (here z-tests)
wald <- z^2
p <- (1 - pnorm(abs(z), 0, 1)) * 2

table <- cbind(s$coefficients, c(s$edf, s$edf), s$standard.errors[,2], z[,2], wald[,2], p[,2])
colnames(table) <- c("(Intercept)"," Estimate","DF", "Std. Err.", "z", "wald", "P > |z|")
cat("\n")

cat("AIC: ", s$AIC, "\n")

## AIC:  340.3086

table

##    (Intercept)  Estimate DF Std. Err.          z        wald      P > |z|
## -1   -1.699907  52.84829  4  13.64735  3.8724203 14.99563911 0.0001077599
## 1    -3.066996  -4.57556  4  27.85556 -0.1642602  0.02698141 0.8695263283

anova(null, model, test="Chisq") # adding mass_diff improves fit

## Likelihood ratio tests of Multinomial Models
## 
## Response: flew_diff
##       Model Resid. df Resid. Dev   Test    Df LR stat.      Pr(Chi)
## 1         1       554   349.8560                                   
## 2 mass_diff       552   332.3086 1 vs 2     2 17.54744 0.0001547465

The ML prediction equations are:

\(\log(\hat{\pi_{-1}}/\hat{\pi_0}) = -1.70 + 50.85 x\) and

\(\log(\hat{\pi_1}/\hat{\pi_0}) = -3.07 - 4.58 x\)

The estimated log odds that the response is -1 (Flew in T1, not T2) rather than 1 (flew in T2, not T1) equals:

\(\log(\hat{\pi_{-1}}/\hat{\pi_1}) = (-1.70 - (-3.07)) + (50.85 - (- 4.58)) x = 1.37 + 55.43 x\) where x = mass_diff

So, the more positive the mass difference (positive (+) mass means gained mass from T1 to T2, and negative (-) mass means lost mass), the more likely to select flew_diff = -1, meaning that the bug flew in T1 but not T2. While the smaller/more negative the mass difference, the more likely to select flew_diff = 1, meaning that the bug flew in T2 but not T1.

(50.85 - (- 4.58))

## [1] 55.43

For bugs of mass_diff x + 1/50 (0.02) g, the estimated odds that flew_diff = -1 rather than flew_diff = 1 equals

times the estimated odds at mass_diff x (g).

exp(coef(model)/50)

##    (Intercept) mass_diff
## -1   0.9665733 2.8776264
## 1    0.9405035 0.9125511

The relative risk ratio for a 1/50-unit increase in the variable mass_diff is 2.90 for flew_diff = -1 (T1 flew only) rather than flew_diff = 1 (T2 flew only).

Predicted probabilities

You can also use predicted probabilities to help you understand the model. The multicategory logit model has an alternative expression in terms of the response probabilities. This is

\(\pi_j = \frac{e^{\alpha_j + \beta_j x}}{\sum_he^{\alpha_h + \beta_h x}}, j=1,...,J\)

The denominator is the same for each probability, and the numerators for various j sum to the denominator where \(\sum_j \pi_j = 1\). The parameters (\(\alpha and \beta\)) equal zero for whichever category is the baseline in the logit expressions. So these would be the equations for the model above:

\(\hat{\pi_-1} = \frac{e^{-1.70 + 50.85 x}}{1 + e^{-1.70 + 50.85x} + e^{-3.07 - 4.58 x}}, j=1,...,J\)

\(\hat{\pi_1} = \frac{e^{-3.07 - 4.58 x}}{1 + e^{-1.70 + 50.85x} + e^{-3.07 - 4.58 x}}, j=1,...,J\)

\(\hat{\pi_0} = \frac{e^{0 + 0 x} = 1}{1 + e^{-1.70 + 50.85x} + e^{-3.07 - 4.58x}}, j=1,...,J\)

x = mass_diff which can give you the estimated probabilities.

You can calculate these predicted probabilities for each of our outcome levels using the fitted function. You can start by generating the predicted probabilities for the observations in our dataset and viewing the first few rows. Then you can plot them.

head(pp <- fitted(model))

##           0         -1          1
## 1 0.9309721 0.01577063 0.05325723
## 2 0.9268548 0.02155929 0.05158587
## 3 0.9259895 0.02270809 0.05130243
## 4 0.9219192 0.02793021 0.05015061
## 5 0.9194762 0.03096174 0.04956208
## 6 0.9167257 0.03431055 0.04896370

plot(df$mass_diff, pp[,1], ylim=c(0,1), col="red", type="l", ylab="Predicted Probability", xlab="Changes in Mass From T1 to T2 (g)") # no flight_diff
points(df$mass_diff, pp[,2], col="blue", type="l") # flew in T1 but not T2
points(df$mass_diff, pp[,3], col="green", type="l") # flew in T2 but not T1
text(0.02,0.8, labels="No Flight Diff", col="red")
text(0.0,0.35, labels="Flew in T1 but not T2", col="blue")
text(0.02,0.1, labels="Flew in T2 but not T1", col="green")
abline(v=0.035, lty=2)
abline(v=-0.027, lty=2)

Summary: Those who gain mass (at least more than 0.035 g) are more likely to fly in T1 but not T2 and less likely to have no change in flight response. Those who loose mass are more likely to fly in T2 but not T1 (but need to loose significant mass - more than 0.03 (g)).

Now let’s compare some models:

data <- data.frame(R = df$flew_diff, 
         A = df$mass_diff, 
         B = df$host_c, 
         C = df$sex_c)
source("src/compare_models.R")
model_comparisonsAIC("src/generic multinomial models- multinom 1RF + 3 FF.R")

##        [,1]      [,2]      [,3]      [,4]       [,5]       [,6]      
## AICs   333.1306  333.6353  334.6408  335.332    335.5504   335.8177  
## models 5         11        13        16         7          14        
## probs  0.2715071 0.2109556 0.1275994 0.09031437 0.08097221 0.07083936
##        [,7]      
## AICs   335.9544  
## models 9         
## probs  0.06615921
## 
## m5   multinom(formula = R ~ A + C, data = data, trace = FALSE)
## m11  multinom(formula = R ~ A * B + C, data = data, trace = FALSE)
## m13  multinom(formula = R ~ B * C + A, data = data, trace = FALSE)
## m16  multinom(formula = R ~ B * C + A * B, data = data, trace = FALSE)
## m7   multinom(formula = R ~ A + B + C, data = data, trace = FALSE)
## m14  multinom(formula = R ~ A * B + A * C, data = data, trace = FALSE)
## m9   multinom(formula = R ~ A * C, data = data, trace = FALSE)

anova(m5, m7, test="Chisq") # Adding B does not improve fit

## Likelihood ratio tests of Multinomial Models
## 
## Response: R
##       Model Resid. df Resid. Dev   Test    Df LR stat.   Pr(Chi)
## 1     A + C       550   321.1306                                
## 2 A + B + C       548   319.5504 1 vs 2     2 1.580235 0.4537914

anova(m5, m9, test="Chisq") # Adding A*C does not improve fit

## Likelihood ratio tests of Multinomial Models
## 
## Response: R
##   Model Resid. df Resid. Dev   Test    Df LR stat.   Pr(Chi)
## 1 A + C       550   321.1306                                
## 2 A * C       548   319.9544 1 vs 2     2 1.176152 0.5553949

delta_mass_model <- multinom(flew_diff ~ mass_diff + sex_c, data = df)

## # weights:  12 (6 variable)
## initial  value 305.414216 
## iter  10 value 167.318267
## iter  20 value 161.019802
## iter  30 value 160.852946
## iter  40 value 160.741183
## iter  50 value 160.658113
## iter  60 value 160.603172
## iter  70 value 160.587839
## iter  80 value 160.572910
## iter  90 value 160.565452
## final  value 160.565295 
## converged

s <- summary(delta_mass_model) # multinom doesn't compute pvalues so need to do it by hand.
z <- s$coefficients/s$standard.errors  #  calculate p-values using Wald tests (here z-tests)
wald <- z^2
p <- (1 - pnorm(abs(z), 0, 1)) * 2

table <- cbind(s$coefficients, c(s$edf, s$edf), s$standard.errors[,2:3], z[,2:3], wald[,2:3], p[,2:3])
colnames(table) <- c("(Intercept)","coeff mass_diff", "coeff sex_c","DF", "MD SE", "sex SE", "MD z", "sex z", 
                     "MD wald","sex wald","MD P > |z|", "sex P > |z|")
cat("\n", "AIC: ", s$AIC, "\n", "MD = mass_diff", "\n")

## 
##  AIC:  333.1306 
##  MD = mass_diff

table

##    (Intercept) coeff mass_diff coeff sex_c DF    MD SE     sex SE       MD z
## -1   -1.863449        61.25330  -0.2881294  6 15.99454  0.1991477  3.8296384
## 1    -8.900205       -55.86612  -6.3472957  6 67.78557 89.6299371 -0.8241596
##          sex z   MD wald    sex wald   MD P > |z| sex P > |z|
## -1 -1.44681204 14.666130 2.093265087 0.0001283317   0.1479496
## 1  -0.07081669  0.679239 0.005015004 0.4098489056   0.9435436

The general ML prediction equaiton is:

\(\log(\hat{\pi_{j}}/\hat{\pi_0}) =\alpha_{j} + \beta_{-1}^{\delta mass} x_1 + \beta_{j}^{sex} x_2\) where j = -1 or 1

The individual ML prediction equations are:

\(\log(\hat{\pi_{-1}}/\hat{\pi_0}) = -1.86 + 61.25 (\delta mass) - 0.29 (sex)\) and

\(\log(\hat{\pi_1}/\hat{\pi_0}) = -8.90 - 55.87 (\delta mass) - 6.35 (sex)\)

The estimated log odds that the response is -1 (Flew in T1, not T2) rather than 1 (flew in T2, not T1) equals:

\(\log(\hat{\pi_{-1}}/\hat{\pi_1}) = (-1.86 - (-8.90)) + (61.25 - (- 55.87)) \delta mass + (-0.29 - (-6.35)) sex\)

\(\log(\hat{\pi_{-1}}/\hat{\pi_1}) = 7.04 + 117.12 \delta mass + 6.06 sex\)

For bugs of mass_diff x + 1/50 (0.02) g and female, the estimated odds that flew_diff = -1 rather than flew_diff = 1 equals

exp((117.12+6.06)/50)

## [1] 11.74702

times (almost 12 times) the estimated odds at mass_diff x (g).

For bugs of mass_diff x + 1/50 (0.02) g and male, the estimated odds that flew_diff = -1 rather than flew_diff = 1 equals

exp((117.12-6.06)/50)

## [1] 9.218386

times the estimated odds at mass_diff x (g).

head(pp <- fitted(delta_mass_model))

##           0          -1            1
## 1 0.9926638 0.007333222 2.928886e-06
## 2 0.9894420 0.010555888 2.087933e-06
## 3 0.9887828 0.011215207 1.973171e-06
## 4 0.9857139 0.014284503 1.573132e-06
## 5 0.9838824 0.016116154 1.404213e-06
## 6 0.9818204 0.018178338 1.253133e-06

plot(df$mass_diff, pp[,1], ylim=c(0,1), col="red", type="b", ylab="Predicted Probability", xlab="Changes in Mass From T1 to T2 (g)") # no flight_diff
points(df$mass_diff, pp[,2], col="blue", type="b") # flew in T1 but not T2
points(df$mass_diff, pp[,3], col="green", type="b") # flew in T2 but not T1
text(0.031,0.8, labels="No Flight Diff", col="red")
text(0.008,0.5, labels="Flew in T1 but not T2", col="blue")
text(0.052,0.1, labels="Flew in T2 but not T1", col="green")
# female line is always higher
text(0.0,0.96, labels="F", col="red")
text(0.0,0.7, labels="M", col="red")
text(0.0,0.25, labels="F", col="blue")
text(0.02,0.2, labels="M", col="blue")
text(-0.03,0.25, labels="F", col="green")
text(0.025,0.05, labels="M", col="green")
abline(v=0.03, lty=2, col="slategrey")
abline(v=0.035, lty=2, col="slategrey")
abline(v=0.04, lty=2, col="slategrey")
abline(v=-0.01, lty=2, col="slategrey")
abline(v=-0.045, lty=2, col="slategrey")

\(\log(\hat{\pi_{-1}}/\hat{\pi_1}) = 6.67 + 95.21 \delta mass + 5.45 sex\)

As females gain mass (need to gain more than males - 0.04 vs. 0.03 g) they are more likely to fly in T1 than T2. As females loose mass they are much more likely than males to fly in T2 than T1. But both are mor likely to have no mass flight response change.

More on multinomial plotting for when have more than 1 predictor variables: https://stats.idre.ucla.edu/r/dae/multinomial-logistic-regression/

Flew Diff ~ Egg Diff (females only)

df <- d %>%
  filter(!is.na(egg_diff), !is.na(flew_diff), sex_c == 1)
  
df$flew_diff <- relevel(as.factor(df$flew_diff), ref = "0")

Null model:

null <- multinom(flew_diff ~ 1, data = df)

## # weights:  2 (1 variable)
## initial  value 66.542129 
## final  value 46.327446 
## converged

Egg_diff model:

model <- multinom(flew_diff ~ egg_diff, data = df)

## # weights:  3 (2 variable)
## initial  value 66.542129 
## iter  10 value 40.203864
## iter  10 value 40.203864
## iter  10 value 40.203864
## final  value 40.203864 
## converged

s <- summary(model) # multinom doesn't compute pvalues so need to do it by hand.
z <- s$coefficients/s$standard.errors  #  calculate p-values using Wald tests (here z-tests)
wald <- z^2
p <- (1 - pnorm(abs(z), 0, 1)) * 2

table <- cbind(s$coefficients, c(s$edf, s$edf), s$standard.errors[2], z[2], wald[2], p[2])
colnames(table) <- c(" Estimate","DF", "Std. Err.", "z", "wald", "P > |z|")
cat("\n")

cat("AIC: ", s$AIC, "\n")

## AIC:  84.40773

table

##              Estimate DF Std. Err.        z     wald    P > |z|
## (Intercept) -2.219471  2 0.5486809 3.234434 10.46156 0.00121884
## egg_diff     1.774672  2 0.5486809 3.234434 10.46156 0.00121884

anova(null, model, test="Chisq") # adding egg_diff improves fit

## Likelihood ratio tests of Multinomial Models
## 
## Response: flew_diff
##      Model Resid. df Resid. Dev   Test    Df LR stat.      Pr(Chi)
## 1        1        -1   92.65489                                   
## 2 egg_diff        -2   80.40773 1 vs 2     1 12.24716 0.0004659658

The ML prediction equations are:

\(\log(\hat{\pi_{-1}}/\hat{\pi_0}) = -2.22 + 1.78 x\) and that’s it. Why? Because these comparisons have been reduced to a binomial situation. There are no flight responses = 1. None of the female bugs Fly in T2 but not T1. So we can do our regular binomial comparisons.

# stopped here
# Remove any missing masses
df <- df[!is.na(df$mass_diff),]

# Change the -1's to 1's, but still need to keep in mind 1 will then mean that all the bugs flew in T1 but not T2
df$flew_diff <- abs(as.integer(df$flew_diff)) - 1
df$flew_diff <- as.factor(df$flew_diff)

R = df$flew_diff
A = df$egg_diff
B = df$mass_diff
#C = df$sym_dist
C = df$host_c

data<-data.frame(R, A, B, C)

source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 3-FF.R")

##        [,1]     [,2]      [,3]      [,4]       [,5]       [,6]       [,7]      
## AICs   73.28097 73.75454  74.48026  75.28029   75.42404   75.67354   75.73601  
## models 7        12        13        11         16         6          14        
## probs  0.210355 0.1660043 0.1154862 0.07741166 0.07204284 0.06359357 0.06163784
## 
## m7   glm(formula = R ~ A + B + C, family = binomial, data = data)
## m12  glm(formula = R ~ A * C + B, family = binomial, data = data)
## m13  glm(formula = R ~ B * C + A, family = binomial, data = data)
## m11  glm(formula = R ~ A * B + C, family = binomial, data = data)
## m16  glm(formula = R ~ A * C + B * C, family = binomial, data = data)
## m6   glm(formula = R ~ B + C, family = binomial, data = data)
## m14  glm(formula = R ~ A * B + A * C, family = binomial, data = data)

anova(m7, m12, test="Chisq") # Adding A*C does not improve fit

## Analysis of Deviance Table
## 
## Model 1: R ~ A + B + C
## Model 2: R ~ A * C + B
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1        89     65.281                     
## 2        88     63.755  1   1.5264   0.2166

anova(m6, m7, test="Chisq") # Addin A does improve fit

## Analysis of Deviance Table
## 
## Model 1: R ~ B + C
## Model 2: R ~ A + B + C
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1        90     69.674                       
## 2        89     65.281  1   4.3926   0.0361 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

delta_egg_model <- glm(flew_diff ~ egg_diff + mass_diff + host_c, family = binomial, data = df)
tidy_regression(delta_egg_model, is_color = output_col)

## glm flew_diff ~ egg_diff + mass_diff + host_c binomial df 
## AIC:  73.28097 
## (Intercept)  coeff:  -2.1794225  Pr(>|t|):  2.381079e-06 *
## egg_diff     coeff:  1.3447603   Pr(>|t|):  0.03846687 *
## mass_diff    coeff:  40.2176192  Pr(>|t|):  0.02047179 *
## host_c       coeff:  0.7246695   Pr(>|t|):  0.02731218 *

strong effect of egg_diff where the more eggs a female bug laid the more likely a female bug flew in T1 but not T2
strong effect of mass where higher the mass gain the more likely the female bug flew in T1 but not T2. The smaller the mass_diff or if lost mass then the more likely the female bug did not change its flight response.
positive effect of host where if from GRT then more likely to have flown in T1 but not T2

gf_point(flew_diff ~ mass_diff, data=df, col=~egg_diff)

Summary

delta_mass_model

## Call:
## multinom(formula = flew_diff ~ mass_diff + sex_c, data = df)
## 
## Coefficients:
##    (Intercept) mass_diff      sex_c
## -1   -1.863449  61.25330 -0.2881294
## 1    -8.900205 -55.86612 -6.3472957
## 
## Residual Deviance: 321.1306 
## AIC: 333.1306

Look within the ‘Delta flight response’ tab to see the making of the graphs generated from this model. Here is the graph:

tidy_regression(delta_egg_model, is_color = output_col)

## glm flew_diff ~ egg_diff + mass_diff + host_c binomial df 
## AIC:  73.28097 
## (Intercept)  coeff:  -2.1794225  Pr(>|t|):  2.381079e-06 *
## egg_diff     coeff:  1.3447603   Pr(>|t|):  0.03846687 *
## mass_diff    coeff:  40.2176192  Pr(>|t|):  0.02047179 *
## host_c       coeff:  0.7246695   Pr(>|t|):  0.02731218 *

Trial 1

Testing glmer() with pop and chamber

Cleaning the Data

rm(list=ls())
output_col = FALSE 
source("src/clean_flight_data.R") 
source("src/regression_output.R") 
source("src/center_flight_data.R")
source("get_warnings.R")

data <- read_flight_data("data/all_flight_data-Winter2020.csv")
data_all <- data[[1]]
data_tested <- data[[2]]

data_T1 <-data_tested[data_tested$trial_type=="T1",]
data_T1 <- center_data(data_T1)

Experimental Set-Up Effects

## glm flew_b ~ chamber binomial data_T1 
## AIC:  458.9055 
## (Intercept)  coeff:  0.2876821   Pr(>|t|):  0.5942526
## chamberA-2   coeff:  -0.0966268  Pr(>|t|):  0.8766875
## chamberA-3   coeff:  0.117783    Pr(>|t|):  0.8474768
## chamberA-4   coeff:  0.4054651   Pr(>|t|):  0.5053931
## chamberB-1   coeff:  0.0918075   Pr(>|t|):  0.8875092
## chamberB-2   coeff:  0.2130932   Pr(>|t|):  0.7267933
## chamberB-3   coeff:  -0.074108   Pr(>|t|):  0.9040261
## chamberB-4   coeff:  0.3254224   Pr(>|t|):  0.6114074

## glm flew_b ~ days_from_start_c binomial data_T1 
## AIC:  448.1122 
## (Intercept)          coeff:  0.4298329   Pr(>|t|):  0.0001338893 *
## days_from_start_c    coeff:  -0.0339142  Pr(>|t|):  0.2615186

## glm flew_b ~ min_from_IncStart binomial data_T1 
## AIC:  449.1128 
## (Intercept)          coeff:  0.360952    Pr(>|t|):  0.03534992 *
## min_from_IncStart    coeff:  0.0004162   Pr(>|t|):  0.6064583

Biological Effects

## glm flew_b ~ mass_c binomial data_T1 
## AIC:  401.7318 
## (Intercept)  coeff:  0.4447371   Pr(>|t|):  0.0002377227 *
## mass_c       coeff:  -33.2650205 Pr(>|t|):  6.742316e-09 *

Morphology Effects

## glm flew_b ~ beak_c binomial data_T1 
## AIC:  438.1289 
## (Intercept)  coeff:  0.4391226   Pr(>|t|):  0.0001235556 *
## beak_c       coeff:  -0.368997   Pr(>|t|):  0.0009363177 *

## glm flew_b ~ thorax_c binomial data_T1 
## AIC:  441.4424 
## (Intercept)  coeff:  0.4374127   Pr(>|t|):  0.0001215489 *
## thorax_c     coeff:  -1.0468489  Pr(>|t|):  0.005435159 *

## glm flew_b ~ body_c binomial data_T1 
## AIC:  447.0143 
## (Intercept)  coeff:  0.4311034   Pr(>|t|):  0.000131644 *
## body_c       coeff:  -0.1613906  Pr(>|t|):  0.1253027

## glm flew_b ~ wing_c binomial data_T1 
## AIC:  449.2264 
## (Intercept)  coeff:  0.4283101   Pr(>|t|):  0.0001371065 *
## wing_c       coeff:  -0.053208   Pr(>|t|):  0.6957995

Glmer() Testing Notes

(1|chamber) as a random effect does not need to be included in the Trial 1 glmer() analyses because in all cases the variance is essentially 0. What pops up if you do try glmer on the best fit model? A singular fit error is raised: “When you obtain a singular fit, this is often indicating that the model is overfitted – that is, the random effects structure is too complex to be supported by the data, which naturally leads to the advice to remove the most complex part of the random effects structure (usually random slopes). The benefit of this approach is that it leads to a more parsimonious (conservative) model that is not over-fitted.” Source: https://stats.stackexchange.com/questions/378939/dealing-with-singular-fit-in-mixed-models

However, (1|population) and (1|days_from_start_c) does matter when doing the morphology analyses in Trial 1, and will not throw an error because the variance is large enough to change the fit of a model.

Without Mass

# Remove any missing masses
data_T1 <- data_T1 %>%
  filter(!is.na(mass))
data_T1 <- center_data(data_T1)

R = data_T1$flew_b
A = data_T1$host_c
B = data_T1$sex_c
C = data_T1$sym_dist
D = data_T1$mass_c 

data<-data.frame(R, A, B, C, D)

source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 3-FF.R")

##        [,1]     [,2]      [,3]      [,4]       [,5]       [,6]      
## AICs   405.1863 407.0964  407.1841  407.7049   408.2027   408.3016  
## models 8        2         11        15         4          6         
## probs  0.337383 0.1298202 0.1242531 0.09576901 0.07466518 0.07106273
## 
## m8   glm(formula = R ~ A * B, family = binomial, data = data)
## m2   glm(formula = R ~ B, family = binomial, data = data)
## m11  glm(formula = R ~ A * B + C, family = binomial, data = data)
## m15  glm(formula = R ~ A * B + B * C, family = binomial, data = data)
## m4   glm(formula = R ~ A + B, family = binomial, data = data)
## m6   glm(formula = R ~ B + C, family = binomial, data = data)

anova(m8, m11, test="Chisq") ## Adding C does not improve fit

## Analysis of Deviance Table
## 
## Model 1: R ~ A * B
## Model 2: R ~ A * B + C
##   Resid. Df Resid. Dev Df  Deviance Pr(>Chi)
## 1       325     397.19                      
## 2       324     397.18  1 0.0022039   0.9626

anova(m11, m15, test="Chisq") ## Adding B*C interaction does not improve fit

## Analysis of Deviance Table
## 
## Model 1: R ~ A * B + C
## Model 2: R ~ A * B + B * C
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1       324     397.18                     
## 2       323     395.70  1   1.4792   0.2239

anova(m11, m14, test="Chisq") ## Adding A*C interaction does not improve fit

## Analysis of Deviance Table
## 
## Model 1: R ~ A * B + C
## Model 2: R ~ A * B + A * C
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1       324     397.18                     
## 2       323     397.16  1 0.023286   0.8787

nomass_T1<-glm(flew_b~sex_c*host_c, family=binomial, data=data_T1)
tidy_regression(nomass_T1, is_color=output_col)

## glm flew_b ~ sex_c * host_c binomial data_T1 
## AIC:  405.1863 
## (Intercept)      coeff:  0.2373995   Pr(>|t|):  0.07673066 .
## sex_c            coeff:  -0.6108252  Pr(>|t|):  5.260333e-06 *
## host_c           coeff:  -0.0537428  Pr(>|t|):  0.6886482
## sex_c:host_c     coeff:  0.3020053   Pr(>|t|):  0.02434372 *

strong negative effect of sex, where if you are female you are less likely to fly.
No direct effect of host plant
strong positive effect of sex and host where if female and from GRT, then less likely to fly.

With Mass

source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 4-FF.R")

##        [,1]       [,2]       [,3]       [,4]      
## AICs   395.8828   396.098    396.5748   397.0146  
## models 51         63         26         18        
## probs  0.09542866 0.08569499 0.06751641 0.05419081
## 
## m51  glm(formula = R ~ A * D + C * D, family = binomial, data = data)
## m63  glm(formula = R ~ A * D + C * D + B, family = binomial, data = data)
## m26  glm(formula = R ~ A * D + B, family = binomial, data = data)
## m18  glm(formula = R ~ A * D, family = binomial, data = data)

R = data_T1$flew_b
A = data_T1$host_c
B = data_T1$sex_c
C = data_T1$mass_c 

data<-data.frame(R, A, B, C)

source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 3-FF.R")

##        [,1]      [,2]      [,3]     [,4]     [,5]      
## AICs   396.5748  397.0146  398.4673 398.5139 398.5501  
## models 12        9         14       11       16        
## probs  0.2662013 0.2136616 0.103341 0.100959 0.09914754
## 
## m12  glm(formula = R ~ A * C + B, family = binomial, data = data)
## m9   glm(formula = R ~ A * C, family = binomial, data = data)
## m14  glm(formula = R ~ A * B + A * C, family = binomial, data = data)
## m11  glm(formula = R ~ A * B + C, family = binomial, data = data)
## m16  glm(formula = R ~ A * C + B * C, family = binomial, data = data)

anova(m9, m12, test="Chisq") # Adding B does not improve fit

## Analysis of Deviance Table
## 
## Model 1: R ~ A * C
## Model 2: R ~ A * C + B
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1       325     389.01                     
## 2       324     386.57  1   2.4397   0.1183

anova(m51, m63, test="Chisq") # Adding B does not improve fit

## Analysis of Deviance Table
## 
## Model 1: R ~ A * D + C * D
## Model 2: R ~ A * D + C * D + B
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1       323     383.88                     
## 2       322     382.10  1   1.7848   0.1816

anova(m27, m51, test="Chisq") # Adding C*D does improve fit

## Analysis of Deviance Table
## 
## Model 1: R ~ A * D + C
## Model 2: R ~ A * D + C * D
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1       324     389.01                       
## 2       323     383.88  1   5.1242  0.02359 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# top model if don't include sym_dist
mass_T1_nosym <- glm(flew_b~host_c*mass_c, family=binomial, data=data_T1)
tidy_regression(mass_T1_nosym, is_color=output_col)

## glm flew_b ~ host_c * mass_c binomial data_T1 
## AIC:  397.0146 
## (Intercept)      coeff:  0.3972549   Pr(>|t|):  0.002534805 *
## host_c           coeff:  -0.1665608  Pr(>|t|):  0.2055597
## mass_c           coeff:  -28.0261972 Pr(>|t|):  4.143597e-06 *
## host_c:mass_c    coeff:  15.6704317  Pr(>|t|):  0.01004477 *

# top model if you do include sym_dist
mass_T1_sym<- glm(flew_b~host_c*mass_c + sym_dist*mass_c, family=binomial, data=data_T1)
tidy_regression(mass_T1_sym, is_color=output_col)

## glm flew_b ~ host_c * mass_c + sym_dist * mass_c binomial data_T1 
## AIC:  395.8828 
## (Intercept)      coeff:  0.4352287   Pr(>|t|):  0.04304544 *
## host_c           coeff:  -0.1640368  Pr(>|t|):  0.3508711
## mass_c           coeff:  -15.1464735 Pr(>|t|):  0.07961529 .
## sym_dist         coeff:  -0.1084774  Pr(>|t|):  0.5151625
## host_c:mass_c    coeff:  22.7792912  Pr(>|t|):  0.001430864 *
## mass_c:sym_dist  coeff:  -17.3605309 Pr(>|t|):  0.04408037 *

Morphology

data_T1 <-data_tested[data_tested$trial_type=="T1",]
data_T1 <- data_T1 %>%
  filter(!is.na(body))
data_T1 <- center_data(data_T1)

R = data_T1$flew_b
A = data_T1$thorax_c
B = data_T1$body_c
C = data_T1$wing_c 

data<-data.frame(R, A, B, C)

source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 3-FF.R")

##        [,1]      [,2]      [,3]      [,4]      [,5]       [,6]       [,7]      
## AICs   420.4885  421.1253  421.4696  421.5025  421.8375   421.9204   422.082   
## models 13        15        7         12        14         16         11        
## probs  0.1864269 0.1355936 0.1141476 0.1122827 0.09496689 0.09111367 0.08404131
##        [,8]      
## AICs   422.6621  
## models 17        
## probs  0.06288207
## 
## m13  glm(formula = R ~ B * C + A, family = binomial, data = data)
## m15  glm(formula = R ~ A * B + B * C, family = binomial, data = data)
## m7   glm(formula = R ~ A + B + C, family = binomial, data = data)
## m12  glm(formula = R ~ A * C + B, family = binomial, data = data)
## m14  glm(formula = R ~ A * B + A * C, family = binomial, data = data)
## m16  glm(formula = R ~ A * C + B * C, family = binomial, data = data)
## m11  glm(formula = R ~ A * B + C, family = binomial, data = data)
## m17  glm(formula = R ~ A * B + A * C + B * C, family = binomial, data = data)

anova(m7, m13, test="Chisq") # Adding B*C marginally improves fit

## Analysis of Deviance Table
## 
## Model 1: R ~ A + B + C
## Model 2: R ~ B * C + A
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1       328     413.47                       
## 2       327     410.49  1   2.9811  0.08424 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(m13, m16, test="Chisq") # Adding A*C does not improve fit

## Analysis of Deviance Table
## 
## Model 1: R ~ B * C + A
## Model 2: R ~ A * C + B * C
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1       327     410.49                     
## 2       326     409.92  1  0.56814    0.451

morph_T1 <- glm(flew_b ~ body_c * wing_c + thorax_c, family = binomial, data = data_T1)
tidy_regression(morph_T1, is_color=output_col)

## glm flew_b ~ body_c * wing_c + thorax_c binomial data_T1 
## AIC:  420.4885 
## (Intercept)      coeff:  0.6086248   Pr(>|t|):  2.775207e-05 *
## body_c           coeff:  -1.1762506  Pr(>|t|):  0.06266837 .
## wing_c           coeff:  2.350309    Pr(>|t|):  0.0004808155 *
## thorax_c         coeff:  -2.6772762  Pr(>|t|):  0.01747959 *
## body_c:wing_c    coeff:  -0.1698641  Pr(>|t|):  0.08756481 .

maringal negative effect of body wher ethe larger the body the less likely to fly
strong positive effect of wing length where the longer the wing, the more likely to fly
storng negative effect of thorax where the larger the thorasxs, the less likely to fly
no effect of body*wing (we know body and wing are closely related so they could be canceling each other out (b/c body is a proxi for mass))

morph_T1glmer <- glmer(formula = flew_b ~  body_c * wing_c + thorax_c + (1| population), family = binomial, data = data_T1) 
tidy_regression(morph_T1glmer, is_color=output_col) # did change the coefficients slightly, but not a better fit

## glmer flew_b ~ body_c * wing_c + thorax_c + (1 | population) data_T1 binomial 
## AIC:  421.8532 421.8532 
## (Intercept)      coeff:  0.5335423   Pr(>|t|):  0.005217949 *
## body_c           coeff:  -1.2372535  Pr(>|t|):  0.05374932 .
## wing_c           coeff:  2.4704559   Pr(>|t|):  0.0003676219 *
## thorax_c         coeff:  -2.792138   Pr(>|t|):  0.01517357 *
## body_c:wing_c    coeff:  -0.1651854  Pr(>|t|):  0.1002469

R = data_T1$flew_b
A = data_T1$thorax_c
B = data_T1$wing2body_c

data<-data.frame(R, A, B)

source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 2-FF.R")

##        [,1]     [,2]     
## AICs   419.9404 420.2258 
## models 4        3        
## probs  0.531513 0.4608213
## 
## m4   glm(formula = R ~ A * B, family = binomial, data = data)
## m3   glm(formula = R ~ A + B, family = binomial, data = data)

anova(m3, m4, test="Chisq") # Adding A*B does not improve fit

## Analysis of Deviance Table
## 
## Model 1: R ~ A + B
## Model 2: R ~ A * B
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1       329     414.23                     
## 2       328     411.94  1   2.2854   0.1306

morph_T1_ratio <- glm(flew_b ~ wing2body_c + thorax_c, family = binomial, data = data_T1)
tidy_regression(morph_T1_ratio, is_color=output_col)

## glm flew_b ~ wing2body_c + thorax_c binomial data_T1 
## AIC:  420.2258 
## (Intercept)  coeff:  0.4670867   Pr(>|t|):  8.08552e-05 *
## wing2body_c  coeff:  32.6671543  Pr(>|t|):  6.925235e-06 *
## thorax_c     coeff:  -1.2219033  Pr(>|t|):  0.001695651 *

Summary

tidy_regression(nomass_T1, is_color=output_col)

## glm flew_b ~ sex_c * host_c binomial data_T1 
## AIC:  405.1863 
## (Intercept)      coeff:  0.2373995   Pr(>|t|):  0.07673066 .
## sex_c            coeff:  -0.6108252  Pr(>|t|):  5.260333e-06 *
## host_c           coeff:  -0.0537428  Pr(>|t|):  0.6886482
## sex_c:host_c     coeff:  0.3020053   Pr(>|t|):  0.02434372 *

tidy_regression(mass_T1_sym, is_color=output_col)

## glm flew_b ~ host_c * mass_c + sym_dist * mass_c binomial data_T1 
## AIC:  395.8828 
## (Intercept)      coeff:  0.4352287   Pr(>|t|):  0.04304544 *
## host_c           coeff:  -0.1640368  Pr(>|t|):  0.3508711
## mass_c           coeff:  -15.1464735 Pr(>|t|):  0.07961529 .
## sym_dist         coeff:  -0.1084774  Pr(>|t|):  0.5151625
## host_c:mass_c    coeff:  22.7792912  Pr(>|t|):  0.001430864 *
## mass_c:sym_dist  coeff:  -17.3605309 Pr(>|t|):  0.04408037 *

tidy_regression(mass_T1_nosym, is_color=output_col)

## glm flew_b ~ host_c * mass_c binomial data_T1 
## AIC:  397.0146 
## (Intercept)      coeff:  0.3972549   Pr(>|t|):  0.002534805 *
## host_c           coeff:  -0.1665608  Pr(>|t|):  0.2055597
## mass_c           coeff:  -28.0261972 Pr(>|t|):  4.143597e-06 *
## host_c:mass_c    coeff:  15.6704317  Pr(>|t|):  0.01004477 *

tidy_regression(morph_T1, is_color=output_col) # glmer did not improve fit

## glm flew_b ~ body_c * wing_c + thorax_c binomial data_T1 
## AIC:  420.4885 
## (Intercept)      coeff:  0.6086248   Pr(>|t|):  2.775207e-05 *
## body_c           coeff:  -1.1762506  Pr(>|t|):  0.06266837 .
## wing_c           coeff:  2.350309    Pr(>|t|):  0.0004808155 *
## thorax_c         coeff:  -2.6772762  Pr(>|t|):  0.01747959 *
## body_c:wing_c    coeff:  -0.1698641  Pr(>|t|):  0.08756481 .

tidy_regression(morph_T1_ratio, is_color=output_col)

## glm flew_b ~ wing2body_c + thorax_c binomial data_T1 
## AIC:  420.2258 
## (Intercept)  coeff:  0.4670867   Pr(>|t|):  8.08552e-05 *
## wing2body_c  coeff:  32.6671543  Pr(>|t|):  6.925235e-06 *
## thorax_c     coeff:  -1.2219033  Pr(>|t|):  0.001695651 *

Trial 2

Testing glmer() with pop and chamber

Cleaning the Data

rm(list=ls())
output_col = FALSE 
source("src/clean_flight_data.R") 
source("src/regression_output.R") 
source("src/center_flight_data.R")
source("get_warnings.R")

data <- read_flight_data("data/all_flight_data-Winter2020.csv")
data_all <- data[[1]]
data_tested <- data[[2]]

data_T2 <-data_tested[data_tested$trial_type=="T2",]
data_T2 <- center_data(data_T2)

Experimental Set-Up Effects

## glm flew_b ~ chamber binomial data_T2 
## AIC:  393.888 
## (Intercept)  coeff:  -0.0909718  Pr(>|t|):  0.7631039
## chamberA-2   coeff:  0.3273606   Pr(>|t|):  0.4754205
## chamberA-3   coeff:  0.784119    Pr(>|t|):  0.1224819
## chamberA-4   coeff:  -0.2837217  Pr(>|t|):  0.488549
## chamberB-1   coeff:  0.784119    Pr(>|t|):  0.1559208
## chamberB-2   coeff:  0.6017974   Pr(>|t|):  0.2039747
## chamberB-3   coeff:  -0.468644   Pr(>|t|):  0.3199648
## chamberB-4   coeff:  -0.1809619  Pr(>|t|):  0.6866405

## glm flew_b ~ days_from_start_c binomial data_T2 
## AIC:  393.3056 
## (Intercept)          coeff:  0.014415    Pr(>|t|):  0.9039395
## days_from_start_c    coeff:  -0.0540142  Pr(>|t|):  0.2070645

## glm flew_b ~ min_from_IncStart binomial data_T2 
## AIC:  394.6235 
## (Intercept)          coeff:  -0.0779168  Pr(>|t|):  0.7062456
## min_from_IncStart    coeff:  0.0004647   Pr(>|t|):  0.5857749

Biological Effects

## glm flew_b ~ mass_c binomial data_T2 
## AIC:  358.9575 
## (Intercept)  coeff:  -0.0288154  Pr(>|t|):  0.8227357
## mass_c       coeff:  -33.6659527 Pr(>|t|):  1.030843e-07 *

Morphology Effects

## glm flew_b ~ beak_c binomial data_T2 
## AIC:  380.4646 
## (Intercept)  coeff:  0.0069373   Pr(>|t|):  0.9547796
## beak_c       coeff:  -0.468637   Pr(>|t|):  0.0002555501 *

## glm flew_b ~ thorax_c binomial data_T2 
## AIC:  381.7558 
## (Intercept)  coeff:  0.0109082   Pr(>|t|):  0.9287235
## thorax_c     coeff:  -1.49497    Pr(>|t|):  0.0004698825 *

## glm flew_b ~ body_c binomial data_T2 
## AIC:  387.3314 
## (Intercept)  coeff:  0.0133229   Pr(>|t|):  0.9121223
## body_c       coeff:  -0.3168907  Pr(>|t|):  0.006959575 *

## glm flew_b ~ wing_c binomial data_T2 
## AIC:  392.1204 
## (Intercept)  coeff:  0.014232    Pr(>|t|):  0.905352
## wing_c       coeff:  -0.2472874  Pr(>|t|):  0.09699422 .

Glmer() Testing Notes

(1|population) + (1|chamber) as random effects do not need to be included in the Trial 2 glmer() mass and no mass analyses because in all cases the variance is essentially 0. What pops up if you do try glmer on the best fit model? A singular fit error is raised. Refer to the reasoning in Trial 1.

However, no error is thrown when included in the morphology analyses.

Without Mass

R = data_T2$flew_b
A = data_T2$host_c
B = data_T2$sex_c
C = data_T2$sym_dist
D = data_T2$mass_c # as a covariate 

data<-data.frame(R, A, B, C, D)

source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 3-FF.R")

##        [,1]      [,2]      [,3]     [,4]       [,5]       [,6]      
## AICs   361.1644  362.5404  363.0369 364.4845   364.5336   364.8156  
## models 2         4         6        7          8          10        
## probs  0.3641601 0.1830193 0.142783 0.06923625 0.06756002 0.05867243
## 
## m2   glm(formula = R ~ B, family = binomial, data = data)
## m4   glm(formula = R ~ A + B, family = binomial, data = data)
## m6   glm(formula = R ~ B + C, family = binomial, data = data)
## m7   glm(formula = R ~ A + B + C, family = binomial, data = data)
## m8   glm(formula = R ~ A * B, family = binomial, data = data)
## m10  glm(formula = R ~ B * C, family = binomial, data = data)

anova(m2, m4, test="Chisq") # Adding A does not improve fit

## Analysis of Deviance Table
## 
## Model 1: R ~ B
## Model 2: R ~ A + B
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1       280     357.16                     
## 2       279     356.54  1    0.624   0.4296

anova(m2, m6, test="Chisq") # Adding C does not improve fit

## Analysis of Deviance Table
## 
## Model 1: R ~ B
## Model 2: R ~ B + C
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1       280     357.16                     
## 2       279     357.04  1  0.12746   0.7211

nomass_T2 <-glm(flew_b~sex_c, family=binomial, data=data_T2)
tidy_regression(nomass_T2, is_color=output_col)

## glm flew_b ~ sex_c binomial data_T2 
## AIC:  361.1644 
## (Intercept)  coeff:  -0.2425498  Pr(>|t|):  0.07779975 .
## sex_c        coeff:  -0.7620335  Pr(>|t|):  3.010899e-08 *

sex is the only significant and strong effect, so that if you are female you are much less likely to disperse/fly

With Mass

source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 4-FF.R")

##        [,1]       [,2]       [,3]      [,4]      
## AICs   358.8454   358.9575   359.2153  359.563   
## models 9          4          7         12        
## probs  0.08891803 0.08406952 0.0739026 0.06210944
## 
## m9   glm(formula = R ~ B + D, family = binomial, data = data)
## m4   glm(formula = R ~ D, family = binomial, data = data)
## m7   glm(formula = R ~ A + D, family = binomial, data = data)
## m12  glm(formula = R ~ A + B + D, family = binomial, data = data)

anova(m4, m7, test="Chisq") # Adding A does not improve fit

## Analysis of Deviance Table
## 
## Model 1: R ~ D
## Model 2: R ~ A + D
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1       280     354.96                     
## 2       279     353.22  1   1.7422   0.1869

anova(m7, m12, test="Chisq") # Adding B does not improve fit

## Analysis of Deviance Table
## 
## Model 1: R ~ A + D
## Model 2: R ~ A + B + D
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1       279     353.22                     
## 2       278     351.56  1   1.6523   0.1986

anova(m4, m9, test="Chisq") # Adding B does not improve fit

## Analysis of Deviance Table
## 
## Model 1: R ~ D
## Model 2: R ~ B + D
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1       280     354.96                     
## 2       279     352.85  1   2.1121   0.1461

mass_T2 <-glm(flew_b~mass_c, family=binomial, data=data_T2)
tidy_regression(mass_T2, is_color=output_col)

## glm flew_b ~ mass_c binomial data_T2 
## AIC:  358.9575 
## (Intercept)  coeff:  -0.0288154  Pr(>|t|):  0.8227357
## mass_c       coeff:  -33.6659527 Pr(>|t|):  1.030843e-07 *

mass is the only significant and extremely strong effect, so that if you are heavy you are much less likely to disperse/fly

Mass is really dominating everything, so let’s split by sex after looking at Trial 2.

Morphology

# Remove any missing masses and morphology measurements
data_T2 <- data_T2 %>%
  filter(!is.na(mass))  %>%
  filter(!is.na(body))

data_T2 <- center_data(data_T2)

R = data_T2$flew_b
A = data_T2$thorax_c
B = data_T2$body_c
C = data_T2$wing_c 

data<-data.frame(R, A, B, C)

source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 3-FF.R")

##        [,1]      [,2]      [,3]      [,4]      
## AICs   364.316   365.2947  366.2914  368.3788  
## models 16        15        17        13        
## probs  0.4230217 0.2593179 0.1575417 0.05547893
## 
## m16  glm(formula = R ~ A * C + B * C, family = binomial, data = data)
## m15  glm(formula = R ~ A * B + B * C, family = binomial, data = data)
## m17  glm(formula = R ~ A * B + A * C + B * C, family = binomial, data = data)
## m13  glm(formula = R ~ B * C + A, family = binomial, data = data)

anova(m13, m16, test="Chisq") # Adding A*C improves fit

## Analysis of Deviance Table
## 
## Model 1: R ~ B * C + A
## Model 2: R ~ A * C + B * C
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1       277     358.38                       
## 2       276     352.32  1   6.0628  0.01381 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

morph_T2 <- glm(flew_b ~ thorax_c * wing_c + body_c * wing_c, family = binomial, data = data_T2)
tidy_regression(morph_T2, is_color=output_col)

## glm flew_b ~ thorax_c * wing_c + body_c * wing_c binomial data_T2 
## AIC:  364.316 
## (Intercept)      coeff:  0.1914188   Pr(>|t|):  0.2140046
## thorax_c         coeff:  -2.3082152  Pr(>|t|):  0.0828982 .
## wing_c           coeff:  1.9063552   Pr(>|t|):  0.008518188 *
## body_c           coeff:  -1.1472691  Pr(>|t|):  0.09990423 .
## thorax_c:wing_c  coeff:  3.692847    Pr(>|t|):  0.0199121 *
## wing_c:body_c    coeff:  -1.216138   Pr(>|t|):  0.006636944 *

negative marginal effect of thorax, where the wider the thorax, the then bug less likely to fly
positive effect of wing length, where if wing longer then bug more likely to fly
negative effect of body, where the longer the body the less likely the bug is to fly
positive effect of thorax:wing where the wider the thorax and wing the more likely the bug is to fly (this encapsulates overall flight power in a way b/c thorax muscles and wing length)
negative effect of wing and body where the longer the wing and body the less likely the bug is to fly (this encapsulates mass limitations)

R = data_T2$flew_b
A = data_T2$thorax_c
B = data_T2$wing2body_c

data<-data.frame(R, A, B)

source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 2-FF.R")

##        [,1]      [,2]     
## AICs   367.6219  368.6156 
## models 3         4        
## probs  0.6206869 0.3776612
## 
## m3   glm(formula = R ~ A + B, family = binomial, data = data)
## m4   glm(formula = R ~ A * B, family = binomial, data = data)

anova(m3, m4, test="Chisq") # Adding A*B does not improve fit

## Analysis of Deviance Table
## 
## Model 1: R ~ A + B
## Model 2: R ~ A * B
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1       279     361.62                     
## 2       278     360.62  1   1.0063   0.3158

morph_T2_ratio <- glm(flew_b ~ thorax_c + wing2body_c, family = binomial, data = data_T2)
tidy_regression(morph_T2_ratio, is_color=output_col)

## glm flew_b ~ thorax_c + wing2body_c binomial data_T2 
## AIC:  367.6219 
## (Intercept)  coeff:  0.0109675   Pr(>|t|):  0.9303663
## thorax_c     coeff:  -1.6132431  Pr(>|t|):  0.0002366193 *
## wing2body_c  coeff:  28.5706239  Pr(>|t|):  0.0001653885 *

Summary

tidy_regression(nomass_T2, is_color=output_col)

## glm flew_b ~ sex_c binomial data_T2 
## AIC:  361.1644 
## (Intercept)  coeff:  -0.2425498  Pr(>|t|):  0.07779975 .
## sex_c        coeff:  -0.7620335  Pr(>|t|):  3.010899e-08 *

tidy_regression(mass_T2, is_color=output_col)

## glm flew_b ~ mass_c binomial data_T2 
## AIC:  358.9575 
## (Intercept)  coeff:  -0.0288154  Pr(>|t|):  0.8227357
## mass_c       coeff:  -33.6659527 Pr(>|t|):  1.030843e-07 *

tidy_regression(morph_T2, is_color=output_col) # glmer did not improve fit

## glm flew_b ~ thorax_c * wing_c + body_c * wing_c binomial data_T2 
## AIC:  364.316 
## (Intercept)      coeff:  0.1914188   Pr(>|t|):  0.2140046
## thorax_c         coeff:  -2.3082152  Pr(>|t|):  0.0828982 .
## wing_c           coeff:  1.9063552   Pr(>|t|):  0.008518188 *
## body_c           coeff:  -1.1472691  Pr(>|t|):  0.09990423 .
## thorax_c:wing_c  coeff:  3.692847    Pr(>|t|):  0.0199121 *
## wing_c:body_c    coeff:  -1.216138   Pr(>|t|):  0.006636944 *

tidy_regression(morph_T2_ratio, is_color=output_col)

## glm flew_b ~ thorax_c + wing2body_c binomial data_T2 
## AIC:  367.6219 
## (Intercept)  coeff:  0.0109675   Pr(>|t|):  0.9303663
## thorax_c     coeff:  -1.6132431  Pr(>|t|):  0.0002366193 *
## wing2body_c  coeff:  28.5706239  Pr(>|t|):  0.0001653885 *

Females

Eggs Modeling

Cleaning the Data

rm(list=ls())
output_col = FALSE 
source("src/clean_flight_data.R") 
source("src/regression_output.R") 
source("src/center_flight_data.R")
source("get_warnings.R")

data <- read_flight_data("data/all_flight_data-Winter2020.csv")
data_all <- data[[1]]
data_tested <- data[[2]]

d <- data_tested %>%
   group_by(ID, sex,population, site, host_plant, latitude, longitude, total_eggs, 
            beak, thorax, wing, body, w_morph, morph_notes, tested,
            host_c, sex_c, w_morph_c, lat_c, sym_dist, sym_dist_s, total_eggs_c, 
            beak_c, thorax_c, thorax_s, body_c, wing_c, wing2body, wing2body_c, wing2body_s) %>%
   summarise_all(funs(list(na.omit(.))))

d$num_flew <- 0
d$num_notflew <- 0
d$average_mass <- 0

for(row in 1:length(d$flew_b)){
  
  n_flew <- sum(d$flew_b[[row]] == 1) # total number of times did fly among trails
  d$num_flew[[row]] <- n_flew 
  
  n_notflew <- sum(d$flew_b[[row]] == 0) # total number of times did not fly among trails
  d$num_notflew[[row]] <- n_notflew
  
  avg_mass <- mean(d$mass[[row]])
  d$average_mass[[row]] <- avg_mass

}

d <- select(d, -filename, -channel_letter, -set_number)

data_fem <- d[d$sex=="F",]
data_fem <- center_data(data_fem, is_not_binded = FALSE)
data_fem

## # A tibble: 119 x 63
## # Groups:   ID, sex, population, site, host_plant, latitude, longitude,
## #   total_eggs, beak, thorax, wing, body, w_morph, morph_notes, tested, host_c,
## #   sex_c, w_morph_c, lat_c, sym_dist, sym_dist_s, total_eggs_c, beak_c,
## #   thorax_c, thorax_s, body_c, wing_c, wing2body, wing2body_c [119]
##    ID    sex   population site  host_plant latitude longitude total_eggs  beak
##    <fct> <fct> <fct>      <fct> <fct>         <dbl>     <dbl>      <dbl> <dbl>
##  1 5     F     Plantatio… Areg… C. corind…     25.0     -80.6        209  7.47
##  2 7     F     Plantatio… Foun… C. corind…     25.0     -80.6          8  8.19
##  3 8     F     Plantatio… Foun… C. corind…     25.0     -80.6         92  8.39
##  4 12    F     Key Largo  KLMRL C. corind…     25.1     -80.4        329  8.79
##  5 13    F     Key Largo  KLMRL C. corind…     25.1     -80.4        296  6.99
##  6 16    F     Key Largo  JP    C. corind…     25.1     -80.4          1  6.33
##  7 19    F     Key Largo  JP    C. corind…     25.1     -80.4         NA  7.54
##  8 20    F     Key Largo  JP    C. corind…     25.1     -80.4         55  8.73
##  9 25    F     North Key… DD f… C. corind…     25.3     -80.3         NA  8.03
## 10 26    F     North Key… Char… C. corind…     25.2     -80.3         33  7.15
## # … with 109 more rows, and 54 more variables: thorax <dbl>, wing <dbl>,
## #   body <dbl>, w_morph <fct>, morph_notes <fct>, tested <fct>, host_c <dbl>,
## #   sex_c <dbl>, w_morph_c <dbl>, lat_c <dbl>, sym_dist <dbl>,
## #   sym_dist_s <dbl>, total_eggs_c <dbl>, beak_c <dbl>, thorax_c <dbl>,
## #   thorax_s <dbl>, body_c <dbl>, wing_c <dbl>, wing2body <dbl>,
## #   wing2body_c <dbl>, wing2body_s <dbl>, trial_type <list>, chamber <list>,
## #   average_speed <list>, total_flight_time <list>, distance <list>,
## #   shortest_flying_bout <list>, longest_flying_bout <list>,
## #   total_duration <list>, max_speed <list>, test_date <list>,
## #   time_start <list>, time_end <list>, flew <list>, flight_type <list>,
## #   mass <list>, EWM <list>, flew_b <list>, eggs_b <list>,
## #   minute_duration <list>, minute_duration_c <list>, min_from_IncStart <dbl>,
## #   min_from_IncStart_c <dbl>, min_from_IncStart_s <dbl>,
## #   days_from_start <list>, days_from_start_c <list>, mass_c <list>,
## #   mass_s <list>, average_mass <dbl>, average_mass_c <dbl>,
## #   average_mass_s <dbl>, trial_type_og <list>, num_flew <dbl>,
## #   num_notflew <dbl>

Experimental, Biological, and Morphological Effects

## glm cbind(num_flew, num_notflew) ~ average_mass binomial data_fem 
## AIC:  244.0145 
## (Intercept)      coeff:  0.8199527   Pr(>|t|):  0.1567534
## average_mass     coeff:  -19.7615291 Pr(>|t|):  0.007607435 *

## glm cbind(num_flew, num_notflew) ~ total_eggs binomial data_fem 
## AIC:  180.1734 
## (Intercept)  coeff:  -0.000878   Pr(>|t|):  0.9971877
## total_eggs   coeff:  -0.0113041  Pr(>|t|):  3.560437e-05 *

## glm cbind(num_flew, num_notflew) ~ beak_c binomial data_fem 
## AIC:  243.4425 
## (Intercept)  coeff:  -0.7413431  Pr(>|t|):  7.132613e-07 *
## beak_c       coeff:  0.5176271   Pr(>|t|):  0.004779854 *

## glm cbind(num_flew, num_notflew) ~ thorax_c binomial data_fem 
## AIC:  249.8961 
## (Intercept)  coeff:  -0.7144045  Pr(>|t|):  9.943685e-07 *
## thorax_c     coeff:  0.7277155   Pr(>|t|):  0.1679599

## glm cbind(num_flew, num_notflew) ~ body_c binomial data_fem 
## AIC:  245.9068 
## (Intercept)  coeff:  -0.7328335  Pr(>|t|):  7.928047e-07 *
## body_c       coeff:  0.3564223   Pr(>|t|):  0.01817676 *

## glm cbind(num_flew, num_notflew) ~ wing_c binomial data_fem 
## AIC:  245.7877 
## (Intercept)  coeff:  -0.7311905  Pr(>|t|):  8.381109e-07 *
## wing_c       coeff:  0.4265058   Pr(>|t|):  0.01764792 *

Without Mass

R1 = data_fem$num_flew
R2 = data_fem$num_notflew
A = data_fem$host_c
B = data_fem$wing_c
C = data_fem$average_mass 
X = data_fem$population # can't do trial type anymore

data<-data.frame(R1, R2, A, B, C, X)

source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1 RF + 2-FF.R"))

##        [,1]      [,2]      [,3]      [,4]      
## AICs   247.7877  248.9599  249.678   251.8143  
## models 2         4         3         5         
## probs  0.4678333 0.2603472 0.1818148 0.06247934
## 
## m2   cbind(R1, R2) ~ B + (1 | X)
## m4   cbind(R1, R2) ~ A * B + (1 | X)
## m3   cbind(R1, R2) ~ A + B + (1 | X)
## m0   cbind(R1, R2) ~ 1 + (1 | X)

length(errors$warnings)

## [1] 0

anova(m0, m2, test="Chisq") # Adding B improves fit

## Data: data
## Models:
## m0: cbind(R1, R2) ~ 1 + (1 | X)
## m2: cbind(R1, R2) ~ B + (1 | X)
##    Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)  
## m0  2 251.81 257.37 -123.91   247.81                           
## m2  3 247.79 256.12 -120.89   241.79 6.0266      1    0.01409 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(m2, m3, test="Chisq") # Adding A does not improve fit

## Data: data
## Models:
## m2: cbind(R1, R2) ~ B + (1 | X)
## m3: cbind(R1, R2) ~ A + B + (1 | X)
##    Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## m2  3 247.79 256.12 -120.89   241.79                         
## m3  4 249.68 260.79 -120.84   241.68 0.1098      1     0.7404

nomass_fem <- glmer(cbind(num_flew, num_notflew) ~ wing_c + (1|population), data=data_fem, family=binomial)

## boundary (singular) fit: see ?isSingular

tidy_regression(nomass_fem, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ wing_c + (1 | population) data_fem binomial 
## AIC:  247.7877 247.7877 
## (Intercept)  coeff:  -0.7311905  Pr(>|t|):  8.382439e-07 *
## wing_c       coeff:  0.4265058   Pr(>|t|):  0.01764923 *

positive effect of wing length where the longer the wing the more times the bug will fly

With Mass

source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1 RF + 3-FF.R"))

##        [,1]      [,2]      [,3]     [,4]      [,5]      [,6]      [,7]      
## AICs   230.6699  231.793   231.8642 232.3348  232.6675  233.7873  233.8316  
## models 12        14        11       6         16        17        15        
## probs  0.2649127 0.1510885 0.145803 0.1152337 0.0975719 0.0557406 0.05451835
##        [,8]      
## AICs   233.906   
## models 7         
## probs  0.05252892
## 
## m12  cbind(R1, R2) ~ A * C + B + (1 | X)
## m14  cbind(R1, R2) ~ A * B + A * C + (1 | X)
## m11  cbind(R1, R2) ~ A * B + C + (1 | X)
## m6   cbind(R1, R2) ~ B + C + (1 | X)
## m16  cbind(R1, R2) ~ A * C + B * C + (1 | X)
## m17  cbind(R1, R2) ~ A * B + A * C + B * C + (1 | X)
## m15  cbind(R1, R2) ~ A * B + B * C + (1 | X)
## m7   cbind(R1, R2) ~ A + B + C + (1 | X)

length(errors$warnings)

## [1] 0

anova(m7, m12, test="Chisq") # Adding A*C improves fit

## Data: data
## Models:
## m7: cbind(R1, R2) ~ A + B + C + (1 | X)
## m12: cbind(R1, R2) ~ A * C + B + (1 | X)
##     Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)  
## m7   5 233.91 247.80 -111.95   223.91                           
## m12  6 230.67 247.34 -109.33   218.67 5.2361      1    0.02212 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

mass_fem <- glmer(cbind(num_flew, num_notflew) ~ host_c * average_mass + wing_c + (1|population), data=data_fem, family=binomial)

## boundary (singular) fit: see ?isSingular

tidy_regression(mass_fem, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ host_c * average_mass + wing_c + (1 | population) data_fem binomial 
## AIC:  230.6699 230.6699 
## (Intercept)          coeff:  1.0497932   Pr(>|t|):  0.1635618
## host_c               coeff:  -1.6597665  Pr(>|t|):  0.01871997 *
## average_mass         coeff:  -22.9687138 Pr(>|t|):  0.02197948 *
## wing_c               coeff:  0.8179845   Pr(>|t|):  0.0001948365 *
## host_c:average_mass  coeff:  21.0529026  Pr(>|t|):  0.0219996 *

negative effect of host where if from GRT the less times a bug will fly
strong negative effect of average mass where the heavier the less times a bug will fly
positive effect of wing length where the longer the wing the more times the bug will fly
strong positive effect of host*average mass where the heavier the bug and from GRT the more times the bug will fly (I’ve seen these strong conflicting interactions a lot with host and something else)

# check for collinearity between mass and wing length
data<-data.frame(data_fem$host_c, data_fem$wing_c, data_fem$average_mass )
colnames(data) <- c("Host Plant", "Wing Length", "Average Mass")
pairs(data)

GRT bugs weigh less.

Eggs

R1 = data_fem$num_flew
R2 = data_fem$num_notflew
A = data_fem$host_c
B = data_fem$wing_c
C = data_fem$average_mass 
D = data_fem$total_eggs_c # can't use eggs_b anymore
X = data_fem$population 

data<-data.frame(R1, R2, A, B, C, D, X)

source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1-RF + 4-FF.R"))

##        [,1]       [,2]      
## AICs   177.1075   177.3402  
## models 9          20        
## probs  0.09166895 0.08160042
## 
## m9   cbind(R1, R2) ~ B + D + (1 | X)
## m20  cbind(R1, R2) ~ B * D + (1 | X)

length(errors$warnings) # 83 models failed

## [1] 83

anova(m9, m20, test="Chisq") # Adding B*D does not improve fit

## Data: data
## Models:
## m9: cbind(R1, R2) ~ B + D + (1 | X)
## m20: cbind(R1, R2) ~ B * D + (1 | X)
##     Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## m9   4 177.11 187.65 -84.554   169.11                         
## m20  5 177.34 190.51 -83.670   167.34 1.7673      1     0.1837

egg_model <- glmer(cbind(num_flew, num_notflew) ~ wing_c + total_eggs_c + (1|population), data=data_fem, family=binomial)

## boundary (singular) fit: see ?isSingular

tidy_regression(egg_model, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ wing_c + total_eggs_c + (1 | population) data_fem binomial 
## AIC:  177.1075 177.1075 
## (Intercept)      coeff:  -1.1895793  Pr(>|t|):  5.069879e-09 *
## wing_c           coeff:  0.5576031   Pr(>|t|):  0.01181402 *
## total_eggs_c     coeff:  -0.0110275  Pr(>|t|):  4.517167e-05 *

positive effect of wing length
negative effect of total eggs

Morphology

d <- data_fem %>%
  filter(!is.na(body))
d$thorax_c <- d$thorax - mean(d$thorax)
d$wing_c <- d$wing - mean(d$wing)
d$body_c <- d$body - mean(d$body)

R1 = d$num_flew
R2 = d$num_notflew
A = d$thorax_c
B = d$body_c
C = d$wing_c 
X = d$population

data<-data.frame(R1, R2, A, B, C, X)

source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1 RF + 3-FF.R"))

##        [,1]      [,2]      [,3]      [,4]       [,5]       [,6]      
## AICs   244.8068  245.5373  246.3075  246.7777   247.4988   247.752   
## models 16        15        4         17         5          13        
## probs  0.2213596 0.1536284 0.1045263 0.08262519 0.05761315 0.05076343
## 
## m16  cbind(R1, R2) ~ A * C + B * C + (1 | X)
## m15  cbind(R1, R2) ~ A * B + B * C + (1 | X)
## m4   cbind(R1, R2) ~ A + B + (1 | X)
## m17  cbind(R1, R2) ~ A * B + A * C + B * C + (1 | X)
## m5   cbind(R1, R2) ~ A + C + (1 | X)
## m13  cbind(R1, R2) ~ B * C + A + (1 | X)

length(errors$warnings)

## [1] 0

anova(m15, m16, test="Chisq") # replacing A*B with A*C improves fit

## Data: data
## Models:
## m15: cbind(R1, R2) ~ A * B + B * C + (1 | X)
## m16: cbind(R1, R2) ~ A * C + B * C + (1 | X)
##     Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
## m15  7 245.54 264.99 -115.77   231.54                             
## m16  7 244.81 264.26 -115.40   230.81 0.7305      0  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(m13, m16, test="Chisq") # Adding B*C improves fit

## Data: data
## Models:
## m13: cbind(R1, R2) ~ B * C + A + (1 | X)
## m16: cbind(R1, R2) ~ A * C + B * C + (1 | X)
##     Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)  
## m13  6 247.75 264.43 -117.88   235.75                           
## m16  7 244.81 264.26 -115.40   230.81 4.9452      1    0.02616 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

morph_fem <- glmer(cbind(num_flew, num_notflew) ~ thorax_c * wing_c + body_c * wing_c + (1 | population), data=d, family=binomial)

## boundary (singular) fit: see ?isSingular

tidy_regression(morph_fem, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ thorax_c * wing_c + body_c * wing_c + (1 | population) d binomial 
## AIC:  244.8068 244.8068 
## (Intercept)      coeff:  -0.5721164  Pr(>|t|):  0.00361911 *
## thorax_c         coeff:  -3.9771602  Pr(>|t|):  0.02124539 *
## wing_c           coeff:  -0.1206764  Pr(>|t|):  0.896578
## body_c           coeff:  1.5567268   Pr(>|t|):  0.1033466
## thorax_c:wing_c  coeff:  4.5495185   Pr(>|t|):  0.03910877 *
## wing_c:body_c    coeff:  -1.5315868  Pr(>|t|):  0.02345094 *

strong negative effect of thorax where the wider the thorax, the less times the bug flies
no effect of wing length
no effect of body length
strong positive (marginal) effect of thorax*body where the longer the body and wider the thorax, the more times a bug flies (seems to conflict the single effects)
negative effect of of body*wing where the longer the body and wing, the less times the bug flies

R1 = d$num_flew
R2 = d$num_notflew
A = d$thorax_c
B = d$wing2body_c
X = d$population

data<-data.frame(R1, R2, A, B, X)

source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1 RF + 2-FF.R"))

##        [,1]      [,2]      [,3]      [,4]      [,5]     
## AICs   250.0524  250.5642  251.7468  251.8143  251.8961 
## models 2         4         3         5         1        
## probs  0.3316744 0.2567869 0.1421605 0.1374432 0.1319351
## 
## m2   cbind(R1, R2) ~ B + (1 | X)
## m4   cbind(R1, R2) ~ A * B + (1 | X)
## m3   cbind(R1, R2) ~ A + B + (1 | X)
## m0   cbind(R1, R2) ~ 1 + (1 | X)
## m1   cbind(R1, R2) ~ A + (1 | X)

length(errors$warnings)

## [1] 0

anova(m0, m2, test="Chisq") # Adding B marginally improves fit

## Data: data
## Models:
## m0: cbind(R1, R2) ~ 1 + (1 | X)
## m2: cbind(R1, R2) ~ B + (1 | X)
##    Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)  
## m0  2 251.81 257.37 -123.91   247.81                           
## m2  3 250.05 258.39 -122.03   244.05 3.7619      1    0.05243 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(m2, m3, test="Chisq") # Adding B does not improve fit

## Data: data
## Models:
## m2: cbind(R1, R2) ~ B + (1 | X)
## m3: cbind(R1, R2) ~ A + B + (1 | X)
##    Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## m2  3 250.05 258.39 -122.03   244.05                         
## m3  4 251.75 262.86 -121.87   243.75 0.3056      1     0.5804

morph_fem2 <- glmer(cbind(num_flew, num_notflew) ~ wing2body_c + (1 | population), data=d, family=binomial)

## boundary (singular) fit: see ?isSingular

tidy_regression(morph_fem2, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ wing2body_c + (1 | population) d binomial 
## AIC:  250.0524 250.0524 
## (Intercept)  coeff:  -0.7145386  Pr(>|t|):  1.121718e-06 *
## wing2body_c  coeff:  16.3816527  Pr(>|t|):  0.06120233 .

marginal strong positive effect of wing:body where the longer the body and heavier the body, the more times a female bug flew. (hm)

Summary

tidy_regression(nomass_fem, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ wing_c + (1 | population) data_fem binomial 
## AIC:  247.7877 247.7877 
## (Intercept)  coeff:  -0.7311905  Pr(>|t|):  8.382439e-07 *
## wing_c       coeff:  0.4265058   Pr(>|t|):  0.01764923 *

tidy_regression(mass_fem, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ host_c * average_mass + wing_c + (1 | population) data_fem binomial 
## AIC:  230.6699 230.6699 
## (Intercept)          coeff:  1.0497932   Pr(>|t|):  0.1635618
## host_c               coeff:  -1.6597665  Pr(>|t|):  0.01871997 *
## average_mass         coeff:  -22.9687138 Pr(>|t|):  0.02197948 *
## wing_c               coeff:  0.8179845   Pr(>|t|):  0.0001948365 *
## host_c:average_mass  coeff:  21.0529026  Pr(>|t|):  0.0219996 *

tidy_regression(egg_model, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ wing_c + total_eggs_c + (1 | population) data_fem binomial 
## AIC:  177.1075 177.1075 
## (Intercept)      coeff:  -1.1895793  Pr(>|t|):  5.069879e-09 *
## wing_c           coeff:  0.5576031   Pr(>|t|):  0.01181402 *
## total_eggs_c     coeff:  -0.0110275  Pr(>|t|):  4.517167e-05 *

tidy_regression(morph_fem, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ thorax_c * wing_c + body_c * wing_c + (1 | population) d binomial 
## AIC:  244.8068 244.8068 
## (Intercept)      coeff:  -0.5721164  Pr(>|t|):  0.00361911 *
## thorax_c         coeff:  -3.9771602  Pr(>|t|):  0.02124539 *
## wing_c           coeff:  -0.1206764  Pr(>|t|):  0.896578
## body_c           coeff:  1.5567268   Pr(>|t|):  0.1033466
## thorax_c:wing_c  coeff:  4.5495185   Pr(>|t|):  0.03910877 *
## wing_c:body_c    coeff:  -1.5315868  Pr(>|t|):  0.02345094 *

tidy_regression(morph_fem2, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ wing2body_c + (1 | population) d binomial 
## AIC:  250.0524 250.0524 
## (Intercept)  coeff:  -0.7145386  Pr(>|t|):  1.121718e-06 *
## wing2body_c  coeff:  16.3816527  Pr(>|t|):  0.06120233 .

Females - Old Method

glmer() A=host, B=mass, C=wing, X=ID

Eggs Modeling

glmer() A=host, B=wing_c, C=mass, D=eggs_b, X=ID

glmer() A=thorax, B=body, C=wing X=trial_type

Cleaning the Data

Experimental Set-Up Effects

## glm flew_b ~ chamber binomial data_fem 
## AIC:  282.7963 
## (Intercept)  coeff:  -1.6094379  Pr(>|t|):  0.01093576 *
## chamberA-2   coeff:  0.8109302   Pr(>|t|):  0.2789959
## chamberA-3   coeff:  1.0498221   Pr(>|t|):  0.1740305
## chamberA-4   coeff:  1.0388931   Pr(>|t|):  0.1498307
## chamberB-1   coeff:  1.2417131   Pr(>|t|):  0.1053886
## chamberB-2   coeff:  0.8556661   Pr(>|t|):  0.2627736
## chamberB-3   coeff:  0.4700036   Pr(>|t|):  0.5317849
## chamberB-4   coeff:  1.3411739   Pr(>|t|):  0.06690121 .

## glm flew_b ~ days_from_start binomial data_fem 
## AIC:  273.9376 
## (Intercept)      coeff:  -0.3131225  Pr(>|t|):  0.2499367
## days_from_start  coeff:  -0.035226   Pr(>|t|):  0.09466742 .

## glm flew_b ~ min_from_IncStart binomial data_fem 
## AIC:  276.4128 
## (Intercept)          coeff:  -0.595335   Pr(>|t|):  0.01168785 *
## min_from_IncStart    coeff:  -0.0005925  Pr(>|t|):  0.5530668

Biological Effects

## glm flew_b ~ mass_c binomial data_fem 
## AIC:  258.5349 
## (Intercept)  coeff:  -0.7752137  Pr(>|t|):  6.296854e-07 *
## mass_c       coeff:  -25.7670872 Pr(>|t|):  0.0004286196 *

## glm flew_b ~ total_eggs_c binomial data_fem 
## AIC:  203.7404 
## (Intercept)      coeff:  -1.1924462  Pr(>|t|):  4.467475e-09 *
## total_eggs_c     coeff:  -0.0113041  Pr(>|t|):  3.560418e-05 *

## glm flew_b ~ eggs_b binomial data_fem 
## AIC:  224.3271 
## (Intercept)  coeff:  0.5959834   Pr(>|t|):  0.01289674 *
## eggs_b       coeff:  -2.2671149  Pr(>|t|):  1.112852e-11 *

Morphology Effects

## glm flew_b ~ beak_c binomial data_fem 
## AIC:  268.3958 
## (Intercept)  coeff:  -0.7370404  Pr(>|t|):  7.970136e-07 *
## beak_c       coeff:  0.5176271   Pr(>|t|):  0.004779927 *

## glm flew_b ~ thorax_c binomial data_fem 
## AIC:  274.8494 
## (Intercept)  coeff:  -0.7139042  Pr(>|t|):  1.007652e-06 *
## thorax_c     coeff:  0.7277155   Pr(>|t|):  0.1679606

## glm flew_b ~ body_c binomial data_fem 
## AIC:  270.8601 
## (Intercept)  coeff:  -0.7302927  Pr(>|t|):  8.444718e-07 *
## body_c       coeff:  0.3564223   Pr(>|t|):  0.01817797 *

## glm flew_b ~ wing_c binomial data_fem 
## AIC:  270.741 
## (Intercept)  coeff:  -0.7314942  Pr(>|t|):  8.319902e-07 *
## wing_c       coeff:  0.4265058   Pr(>|t|):  0.01764921 *

With Mass

data_fem <- data_tested[data_tested$sex=="F",]
data_fem <- data_fem %>%
  filter(!is.na(mass))
data_fem <- center_data(data_fem)

R = data_fem$flew_b
A = data_fem$host_c
B = data_fem$wing_c # used to be sym_dis
C = data_fem$mass_c

data<-data.frame(R, A, B, C)

source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 3-FF.R")

##        [,1]      [,2]      [,3]      [,4]      [,5]       [,6]       [,7]      
## AICs   242.0988  242.1587  243.0503  243.4488  243.7701   244.0985   244.1586  
## models 6         11        12        14        7          10         15        
## probs  0.2065913 0.2004959 0.1283766 0.1051884 0.08957379 0.07601309 0.07376069
## 
## m6   glm(formula = R ~ B + C, family = binomial, data = data)
## m11  glm(formula = R ~ A * B + C, family = binomial, data = data)
## m12  glm(formula = R ~ A * C + B, family = binomial, data = data)
## m14  glm(formula = R ~ A * B + A * C, family = binomial, data = data)
## m7   glm(formula = R ~ A + B + C, family = binomial, data = data)
## m10  glm(formula = R ~ B * C, family = binomial, data = data)
## m15  glm(formula = R ~ A * B + B * C, family = binomial, data = data)

anova(m3, m6, test="Chisq") # Adding B does improve fit

## Analysis of Deviance Table
## 
## Model 1: R ~ C
## Model 2: R ~ B + C
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1       210     254.53                          
## 2       209     236.10  1   18.436 1.757e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(m3, m5, test="Chisq") # Adding A does not improve fit

## Analysis of Deviance Table
## 
## Model 1: R ~ C
## Model 2: R ~ A + C
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1       210     254.53                     
## 2       209     254.49  1 0.040648   0.8402

anova(m6, m7, test="Chisq") # Adding A does not improve fit

## Analysis of Deviance Table
## 
## Model 1: R ~ B + C
## Model 2: R ~ A + B + C
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1       209     236.10                     
## 2       208     235.77  1  0.32864   0.5665

anova(m6, m10, test="Chisq") # Adding B*C does not improve fit

## Analysis of Deviance Table
## 
## Model 1: R ~ B + C
## Model 2: R ~ B * C
##   Resid. Df Resid. Dev Df   Deviance Pr(>Chi)
## 1       209      236.1                       
## 2       208      236.1  1 0.00032619   0.9856

## glm flew_b ~ wing_c + mass_c binomial data_fem 
## AIC:  242.0988 
## (Intercept)  coeff:  -0.8614654  Pr(>|t|):  2.942928e-07 *
## wing_c       coeff:  0.8692786   Pr(>|t|):  7.878961e-05 *
## mass_c       coeff:  -38.2356966 Pr(>|t|):  3.658768e-06 *

large negative effect of mass, where the heavier the bug is the less likely the bus will fly/disperse.
positve effect of wing morph where the longer the wing then more likely to disperse.

Consider for females: A=host, B=mass, C=wing, X=ID

R = data_fem$flew_b
A = data_fem$host_c
B = data_fem$mass_c
C = data_fem$wing_c # replaced sym_dist
X = data_fem$ID  # variance is zero if use trial_type | 3 models fail to converge if use ID

data<-data.frame(R, A, B, C, X)

source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glmer 1 RF + 3-FF.R")

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.29003 (tol = 0.001, component 1)

##        [,1]      [,2]      [,3]       [,4]       [,5]      
## AICs   202.6123  203.7209  206.0113   206.1216   206.2997  
## models 14        17        6          11         18        
## probs  0.3725183 0.2140005 0.06808972 0.06443456 0.05894485
## 
## m14  R ~ A * B + A * C + (1 | X)
## m17  R ~ A * B + A * C + B * C + (1 | X)
## m6   R ~ B + C + (1 | X)
## m11  R ~ A * B + C + (1 | X)
## m18  R ~ A * B * C + (1 | X)

# top model if use ID:
multi_glmer_fem <-glmer(flew_b~host_c * mass_c + host_c * wing_c + (1 | ID), family=binomial, data=data_fem)
tidy_regression(multi_glmer_fem, is_color=output_col)

## glmer flew_b ~ host_c * mass_c + host_c * wing_c + (1 | ID) data_fem binomial 
## AIC:  202.6123 202.6123 
## (Intercept)      coeff:  -14.3528925 Pr(>|t|):  0.0001404174 *
## host_c           coeff:  -3.5218926  Pr(>|t|):  0.1015445
## mass_c           coeff:  -357.1011952    Pr(>|t|):  0.0004263504 *
## wing_c           coeff:  8.6042524   Pr(>|t|):  0.002471981 *
## host_c:mass_c    coeff:  -189.3281934    Pr(>|t|):  0.01570222 *
## host_c:wing_c    coeff:  6.7278037   Pr(>|t|):  0.01681285 *

# top model if use trial_type:
multi_glmer_fem2 <-glmer(flew_b~mass_c + wing_c + (1|trial_type), family=binomial, data=data_fem)

## boundary (singular) fit: see ?isSingular

# if you replace trial_type with ID then you'll see the coefficients are way too huge and at different scales. Why?
tidy_regression(multi_glmer_fem2, is_color=output_col)

## glmer flew_b ~ mass_c + wing_c + (1 | trial_type) data_fem binomial 
## AIC:  244.0988 244.0988 
## (Intercept)  coeff:  -0.8614654  Pr(>|t|):  2.941513e-07 *
## mass_c       coeff:  -38.2356966 Pr(>|t|):  3.640896e-06 *
## wing_c       coeff:  0.8692786   Pr(>|t|):  7.874479e-05 *

Eggs

R = data_fem$flew_b
A = data_fem$host_c
B = data_fem$wing_c
C = data_fem$mass_c
D = data_fem$eggs_b
  
data<-data.frame(R, A, B, C, D)

source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 4-FF.R")

##        [,1]      [,2]       [,3]      
## AICs   214.6841  214.9507   214.9984  
## models 14        35         34        
## probs  0.0668611 0.05851743 0.05713751
## 
## m14  glm(formula = R ~ B + C + D, family = binomial, data = data)
## m35  glm(formula = R ~ A * C + B + D, family = binomial, data = data)
## m34  glm(formula = R ~ A * B + C + D, family = binomial, data = data)

egg_model2 <-glm(flew_b~wing_c + mass_c + eggs_b, family=binomial, data=data_fem)
tidy_regression(egg_model2, is_color=output_col)

## glm flew_b ~ wing_c + mass_c + eggs_b binomial data_fem 
## AIC:  214.6841 
## (Intercept)  coeff:  0.360869    Pr(>|t|):  0.2011714
## wing_c       coeff:  0.6411567   Pr(>|t|):  0.007192596 *
## mass_c       coeff:  -18.0024358 Pr(>|t|):  0.05131856 .
## eggs_b       coeff:  -1.9676327  Pr(>|t|):  1.88186e-07 *

Strong negative effect if laid eggs that day
strong negative effect of mass
positive effect of wing length

egg_glmer <-glmer(flew_b~wing_c + mass_c + eggs_b + (1|ID), family=binomial, data=data_fem) 
tidy_regression(egg_glmer, is_color=output_col) # model fits better

## glmer flew_b ~ wing_c + mass_c + eggs_b + (1 | ID) data_fem binomial 
## AIC:  172.286 172.286 
## (Intercept)  coeff:  5.8613659   Pr(>|t|):  0.01023977 *
## wing_c       coeff:  1.7946948   Pr(>|t|):  0.2609419
## mass_c       coeff:  -83.2034793 Pr(>|t|):  0.1559541
## eggs_b       coeff:  -15.9949936 Pr(>|t|):  7.729145e-08 *

still a strong negative effect if laid eggs that day but mass and wing length are no longer significant…

glmer() A=host, B=wing_c, C=mass, D=eggs_b, X=ID

R = data_fem$flew_b
A = data_fem$host_c
B = data_fem$wing_c
C = data_fem$mass_c
D = data_fem$eggs_b
X = data_fem$ID
  
data<-data.frame(R, A, B, C, D, X)

source("src/compare_models.R")
model_comparisonsAIC("src/generic models-gaussian glmer 1-RF + 4-FF.R") # only 1 model failed to converge

##        [,1]      [,2]      [,3]       [,4]      
## AICs   192.2485  195.0665  195.2866   195.2868  
## models 53        21        86         33        
## probs  0.4532078 0.1107595 0.09921775 0.09920911
## 
## m53  R ~ B * C + C * D + (1 | X)
## m21  R ~ C * D + (1 | X)
## m86  R ~ B * C + B * D + C * D + (1 | X)
## m33  R ~ C * D + B + (1 | X)

anova(m21, m33, test="Chisq") # Adding B improves fits

## refitting model(s) with ML (instead of REML)

## Data: data
## Models:
## m21: R ~ C * D + (1 | X)
## m33: R ~ C * D + B + (1 | X)
##     Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)  
## m21  6 192.24 212.38 -90.121   180.24                           
## m33  7 187.83 211.33 -86.916   173.83 6.4111      1    0.01134 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(m33, m53, test="Chisq") # Adding B*C does not improve fit

## refitting model(s) with ML (instead of REML)

## Data: data
## Models:
## m33: R ~ C * D + B + (1 | X)
## m53: R ~ B * C + C * D + (1 | X)
##     Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## m33  7 187.83 211.33 -86.916   173.83                         
## m53  8 187.44 214.30 -85.722   171.44 2.3881      1     0.1223

egg_glmer2<-glmer(flew_b~mass_c *eggs_b + wing_c + (1|ID), family=binomial, data=data_fem) 
tidy_regression(egg_glmer2, is_color=output_col) # model fits better

## glmer flew_b ~ mass_c * eggs_b + wing_c + (1 | ID) data_fem binomial 
## AIC:  168.0517 168.0517 
## (Intercept)      coeff:  -1.3549037  Pr(>|t|):  0.6132529
## mass_c           coeff:  -434.4925354    Pr(>|t|):  0.000529944 *
## eggs_b           coeff:  -9.2584415  Pr(>|t|):  0.0002528496 *
## wing_c           coeff:  2.5051483   Pr(>|t|):  0.2992479
## mass_c:eggs_b    coeff:  372.4847126 Pr(>|t|):  0.002918668 *

coefficients are extremely out of scale. * negative effect of mass * negative effect of eggs positive effect of wing positivie effect mass*eggs? doesn’t really make sense. –> collinearity

Morphology

R = data_fem$flew_b
A = data_fem$thorax_c
B = data_fem$body_c
C = data_fem$wing_c 

data<-data.frame(R, A, B, C)

source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 3-FF.R")

##        [,1]      [,2]      [,3]       [,4]       [,5]       [,6]      
## AICs   264.8145  264.9062  265.8203   266.1749   266.5372   266.6123  
## models 16        15        4          5          3          17        
## probs  0.1615492 0.1543053 0.09769975 0.08182416 0.06826939 0.06575111
##        [,7]       [,8]      
## AICs   267.0072   267.0447  
## models 2          10        
## probs  0.05397086 0.05296782
## 
## m16  glm(formula = R ~ A * C + B * C, family = binomial, data = data)
## m15  glm(formula = R ~ A * B + B * C, family = binomial, data = data)
## m4   glm(formula = R ~ A + B, family = binomial, data = data)
## m5   glm(formula = R ~ A + C, family = binomial, data = data)
## m3   glm(formula = R ~ C, family = binomial, data = data)
## m17  glm(formula = R ~ A * B + A * C + B * C, family = binomial, data = data)
## m2   glm(formula = R ~ B, family = binomial, data = data)
## m10  glm(formula = R ~ B * C, family = binomial, data = data)

anova(m13, m16, test="Chisq") # Adding A*C improves fit

## Analysis of Deviance Table
## 
## Model 1: R ~ B * C + A
## Model 2: R ~ A * C + B * C
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1       207     257.16                       
## 2       206     252.81  1   4.3481  0.03705 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

morph_fem2 <- glm(flew_b ~ thorax_c * wing_c + body_c * wing_c, data=data_fem)
tidy_regression(morph_fem2, is_color=output_col)

## glm flew_b ~ thorax_c * wing_c + body_c * wing_c data_fem 
## AIC:  286.161 
## (Intercept)      coeff:  0.3459579   Pr(>|t|):  7.445235e-15 *
## thorax_c         coeff:  -0.3650122  Pr(>|t|):  0.2063331
## wing_c           coeff:  0.0121891   Pr(>|t|):  0.9523256
## body_c           coeff:  0.1402803   Pr(>|t|):  0.4768686
## thorax_c:wing_c  coeff:  0.1125674   Pr(>|t|):  0.6577591
## wing_c:body_c    coeff:  -0.0443685  Pr(>|t|):  0.4358603

no significant effects

morph_fem_glmer <- glmer(flew_b ~ thorax_c * wing_c + body_c * wing_c + (1|ID), data=data_fem, family=binomial)
tidy_regression(morph_fem_glmer, is_color=output_col) #even better fitted model; effects no longer significant and coefficients changed.

## glmer flew_b ~ thorax_c * wing_c + body_c * wing_c + (1 | ID) data_fem binomial 
## AIC:  237.6908 237.6908 
## (Intercept)      coeff:  -6.3578121  Pr(>|t|):  5.347574e-05 *
## thorax_c         coeff:  -7.0364568  Pr(>|t|):  0.374903
## wing_c           coeff:  0.1961325   Pr(>|t|):  0.9568734
## body_c           coeff:  2.4508614   Pr(>|t|):  0.5362512
## thorax_c:wing_c  coeff:  8.012155    Pr(>|t|):  0.4452285
## wing_c:body_c    coeff:  -2.6935872  Pr(>|t|):  0.4002898

Trying wing2body

R = data_fem$flew_b
A = data_fem$thorax_c
B = data_fem$wing2body_c

data<-data.frame(R, A, B)

source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 2-FF.R")

##        [,1]      [,2]     [,3]      [,4]       [,5]      
## AICs   267.9101  268.2296 269.6979  270.8674   270.9484  
## models 2         4        3         1          5         
## probs  0.3692413 0.314723 0.1510409 0.08416619 0.08082861
## 
## m2   glm(formula = R ~ B, family = binomial, data = data)
## m4   glm(formula = R ~ A * B, family = binomial, data = data)
## m3   glm(formula = R ~ A + B, family = binomial, data = data)
## m1   glm(formula = R ~ A, family = binomial, data = data)
## m0   glm(formula = R ~ 1, family = binomial, data = data)

anova(m0, m2, test="Chisq") # Adding B improves fit

## Analysis of Deviance Table
## 
## Model 1: R ~ 1
## Model 2: R ~ B
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
## 1       211     268.95                       
## 2       210     263.91  1   5.0382  0.02479 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(m2, m3, test="Chisq") # Adding A does not improve fit

## Analysis of Deviance Table
## 
## Model 1: R ~ B
## Model 2: R ~ A + B
##   Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1       210     263.91                     
## 2       209     263.70  1   0.2122    0.645

morph_fem3 <- glm(flew_b ~ wing2body_c, data=data_fem, family=binomial) # variance of population and days_from_start_c = 0
tidy_regression(morph_fem3, is_color=output_col)

## glm flew_b ~ wing2body_c binomial data_fem 
## AIC:  267.9101 
## (Intercept)  coeff:  -0.7286364  Pr(>|t|):  1.010644e-06 *
## wing2body_c  coeff:  19.4519307  Pr(>|t|):  0.03183424 *

strong positive effect of wing2body where the larger the ration (the more wing than body), the more likely the female bug disperses

Worth running a glmer() script.

Consider for females morph: A=thorax, B=body, C=wing X=trial_type

R = data_fem$flew_b
A = data_fem$thorax_c
B = data_fem$body_c
C = data_fem$wing_c
X = data_fem$trial_type # you get a different set of top models if you use trial_type or ID, you get the null model if ID and the usual model if trial_type

data<-data.frame(R, A, B, C, X)

source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glmer 1 RF + 3-FF.R")

##        [,1]      [,2]      [,3]       [,4]       [,5]       [,6]      
## AICs   266.442   266.563   267.499    267.9407   268.2505   268.277   
## models 16        15        4          5          17         3         
## probs  0.1623081 0.1527796 0.09568086 0.07671927 0.06570907 0.06484419
##        [,7]       [,8]      
## AICs   268.7106   268.7662  
## models 2          10        
## probs  0.05220543 0.05077523
## 
## m16  R ~ A * C + B * C + (1 | X)
## m15  R ~ A * B + B * C + (1 | X)
## m4   R ~ A + B + (1 | X)
## m5   R ~ A + C + (1 | X)
## m17  R ~ A * B + A * C + B * C + (1 | X)
## m3   R ~ C + (1 | X)
## m2   R ~ B + (1 | X)
## m10  R ~ B * C + (1 | X)

anova(m0, m3, test="Chisq") # Adding C does not improve fit

## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m3: R ~ C + (1 | X)
##    Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)  
## m0  2 272.72 279.43 -134.36   268.72                           
## m3  3 268.28 278.35 -131.14   262.28 6.4406      1    0.01115 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(m0, m2, test="Chisq") # Adding B does not improve fit

## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m2: R ~ B + (1 | X)
##    Df    AIC    BIC  logLik deviance Chisq Chi Df Pr(>Chisq)  
## m0  2 272.72 279.43 -134.36   268.72                          
## m2  3 268.71 278.78 -131.35   262.71 6.007      1    0.01425 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(m0, m1, test="Chisq") # Adding A does not improve fit

## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m1: R ~ A + (1 | X)
##    Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## m0  2 272.72 279.43 -134.36   268.72                         
## m1  3 272.61 282.68 -133.30   266.61 2.1097      1     0.1464

# when use ID and trial type as RFs get the null model as the top model
morph_fem_glmer3 <- glmer(flew_b ~  1 + (1|ID), data=data_fem, family=binomial)
tidy_regression(morph_fem_glmer3 , is_color=output_col)

## glmer flew_b ~ 1 + (1 | ID) data_fem binomial 
## AIC:  230.106 230.106 
## 1    coeff:  -7.3268741  Pr(>|t|):  2.291958e-11 *

# when use only trial type as RF:
morph_fem_glmer4 <- glmer(flew_b ~  thorax_c * wing_c + body_c * wing_c + (1|trial_type), data=data_fem, family=binomial) 
tidy_regression(morph_fem_glmer4 , is_color=output_col)

## glmer flew_b ~ thorax_c * wing_c + body_c * wing_c + (1 | trial_type) data_fem binomial 
## AIC:  266.442 266.442 
## (Intercept)      coeff:  -0.5761341  Pr(>|t|):  0.01623514 *
## thorax_c         coeff:  -3.7064606  Pr(>|t|):  0.03308087 *
## wing_c           coeff:  0.11423 Pr(>|t|):  0.9051479
## body_c           coeff:  1.2869096   Pr(>|t|):  0.1925277
## thorax_c:wing_c  coeff:  4.3295685   Pr(>|t|):  0.05106715 .
## wing_c:body_c    coeff:  -1.471794   Pr(>|t|):  0.03012126 *

Trying wing2body

R = data_fem$flew_b
A = data_fem$thorax_c
B = data_fem$wing2body_c
X = data_fem$trial_type # you get a different set of top models if you use trial_type or ID

data<-data.frame(R, A, B, X)

source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glmer 1-RF + 2-FF.R")

##        [,1]      [,2]     [,3]      [,4]       [,5]     
## AICs   269.7551  270.0063 271.5272  272.608    272.7176 
## models 2         4        3         1          5        
## probs  0.3620922 0.319347 0.1492806 0.08696059 0.0823197
## 
## m2   R ~ B + (1 | X)
## m4   R ~ A * B + (1 | X)
## m3   R ~ A + B + (1 | X)
## m1   R ~ A + (1 | X)
## m0   R ~ 1 + (1 | X)

anova(m0, m2, test="Chisq") # Adding A marginally imprvoes fit

## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m2: R ~ B + (1 | X)
##    Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)  
## m0  2 272.72 279.43 -134.36   268.72                           
## m2  3 269.75 279.82 -131.88   263.75 4.9626      1     0.0259 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(m0, m1, test="Chisq")

## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m1: R ~ A + (1 | X)
##    Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## m0  2 272.72 279.43 -134.36   268.72                         
## m1  3 272.61 282.68 -133.30   266.61 2.1097      1     0.1464

# best fit model if only use trial type as RF:
morph_fem_glmer5 <- glmer(flew_b ~  wing2body_c + (1|trial_type), data=data_fem, family=binomial) 
tidy_regression(morph_fem_glmer5 , is_color=output_col)

## glmer flew_b ~ wing2body_c + (1 | trial_type) data_fem binomial 
## AIC:  269.7551 269.7551 
## (Intercept)  coeff:  -0.7387311  Pr(>|t|):  4.812048e-05 *
## wing2body_c  coeff:  19.3571342  Pr(>|t|):  0.0329741 *

# best fit model is the null if use both trial type and ID as RFs: 
morph_fem_glmer6 <- glmer(flew_b ~  1 + (1|ID), data=data_fem, family=binomial)
tidy_regression(morph_fem_glmer6, is_color=output_col)

## glmer flew_b ~ 1 + (1 | ID) data_fem binomial 
## AIC:  230.106 230.106 
## 1    coeff:  -7.3268741  Pr(>|t|):  2.291958e-11 *

Summary

nomass_fem2 <-glm(flew_b~wing_c, family=binomial, data=data_fem)
tidy_regression(nomass_fem2, is_color=output_col)

## glm flew_b ~ wing_c binomial data_fem 
## AIC:  266.5372 
## (Intercept)  coeff:  -0.7338294  Pr(>|t|):  9.640784e-07 *
## wing_c       coeff:  0.4423614   Pr(>|t|):  0.01462542 *

tidy_regression(mass_fem2, is_color=output_col)

## glm flew_b ~ wing_c + mass_c binomial data_fem 
## AIC:  242.0988 
## (Intercept)  coeff:  -0.8614654  Pr(>|t|):  2.942928e-07 *
## wing_c       coeff:  0.8692786   Pr(>|t|):  7.878961e-05 *
## mass_c       coeff:  -38.2356966 Pr(>|t|):  3.658768e-06 *

tidy_regression(multi_glmer_fem, is_color=output_col)

## glmer flew_b ~ host_c * mass_c + host_c * wing_c + (1 | ID) data_fem binomial 
## AIC:  202.6123 202.6123 
## (Intercept)      coeff:  -14.3528925 Pr(>|t|):  0.0001404174 *
## host_c           coeff:  -3.5218926  Pr(>|t|):  0.1015445
## mass_c           coeff:  -357.1011952    Pr(>|t|):  0.0004263504 *
## wing_c           coeff:  8.6042524   Pr(>|t|):  0.002471981 *
## host_c:mass_c    coeff:  -189.3281934    Pr(>|t|):  0.01570222 *
## host_c:wing_c    coeff:  6.7278037   Pr(>|t|):  0.01681285 *

tidy_regression(multi_glmer_fem2, is_color=output_col)

## glmer flew_b ~ mass_c + wing_c + (1 | trial_type) data_fem binomial 
## AIC:  244.0988 244.0988 
## (Intercept)  coeff:  -0.8614654  Pr(>|t|):  2.941513e-07 *
## mass_c       coeff:  -38.2356966 Pr(>|t|):  3.640896e-06 *
## wing_c       coeff:  0.8692786   Pr(>|t|):  7.874479e-05 *

tidy_regression(egg_model2, is_color=output_col)

## glm flew_b ~ wing_c + mass_c + eggs_b binomial data_fem 
## AIC:  214.6841 
## (Intercept)  coeff:  0.360869    Pr(>|t|):  0.2011714
## wing_c       coeff:  0.6411567   Pr(>|t|):  0.007192596 *
## mass_c       coeff:  -18.0024358 Pr(>|t|):  0.05131856 .
## eggs_b       coeff:  -1.9676327  Pr(>|t|):  1.88186e-07 *

tidy_regression(egg_glmer, is_color=output_col)

## glmer flew_b ~ wing_c + mass_c + eggs_b + (1 | ID) data_fem binomial 
## AIC:  172.286 172.286 
## (Intercept)  coeff:  5.8613659   Pr(>|t|):  0.01023977 *
## wing_c       coeff:  1.7946948   Pr(>|t|):  0.2609419
## mass_c       coeff:  -83.2034793 Pr(>|t|):  0.1559541
## eggs_b       coeff:  -15.9949936 Pr(>|t|):  7.729145e-08 *

tidy_regression(egg_glmer2, is_color=output_col)

## glmer flew_b ~ mass_c * eggs_b + wing_c + (1 | ID) data_fem binomial 
## AIC:  168.0517 168.0517 
## (Intercept)      coeff:  -1.3549037  Pr(>|t|):  0.6132529
## mass_c           coeff:  -434.4925354    Pr(>|t|):  0.000529944 *
## eggs_b           coeff:  -9.2584415  Pr(>|t|):  0.0002528496 *
## wing_c           coeff:  2.5051483   Pr(>|t|):  0.2992479
## mass_c:eggs_b    coeff:  372.4847126 Pr(>|t|):  0.002918668 *

All the morph models below had an issue between using ID vs. trial type as the RF. When I used trial_type I got reasonable results. When I used ID I always got the null model. Why is this?

tidy_regression(morph_fem2, is_color=output_col)

## glm flew_b ~ thorax_c * wing_c + body_c * wing_c data_fem 
## AIC:  286.161 
## (Intercept)      coeff:  0.3459579   Pr(>|t|):  7.445235e-15 *
## thorax_c         coeff:  -0.3650122  Pr(>|t|):  0.2063331
## wing_c           coeff:  0.0121891   Pr(>|t|):  0.9523256
## body_c           coeff:  0.1402803   Pr(>|t|):  0.4768686
## thorax_c:wing_c  coeff:  0.1125674   Pr(>|t|):  0.6577591
## wing_c:body_c    coeff:  -0.0443685  Pr(>|t|):  0.4358603

tidy_regression(morph_fem_glmer, is_color=output_col)

## glmer flew_b ~ thorax_c * wing_c + body_c * wing_c + (1 | ID) data_fem binomial 
## AIC:  237.6908 237.6908 
## (Intercept)      coeff:  -6.3578121  Pr(>|t|):  5.347574e-05 *
## thorax_c         coeff:  -7.0364568  Pr(>|t|):  0.374903
## wing_c           coeff:  0.1961325   Pr(>|t|):  0.9568734
## body_c           coeff:  2.4508614   Pr(>|t|):  0.5362512
## thorax_c:wing_c  coeff:  8.012155    Pr(>|t|):  0.4452285
## wing_c:body_c    coeff:  -2.6935872  Pr(>|t|):  0.4002898

tidy_regression(morph_fem3, is_color=output_col)

## glm flew_b ~ wing2body_c binomial data_fem 
## AIC:  267.9101 
## (Intercept)  coeff:  -0.7286364  Pr(>|t|):  1.010644e-06 *
## wing2body_c  coeff:  19.4519307  Pr(>|t|):  0.03183424 *

tidy_regression(morph_fem_glmer3 , is_color=output_col)

## glmer flew_b ~ 1 + (1 | ID) data_fem binomial 
## AIC:  230.106 230.106 
## 1    coeff:  -7.3268741  Pr(>|t|):  2.291958e-11 *

tidy_regression(morph_fem_glmer4 , is_color=output_col)

## glmer flew_b ~ thorax_c * wing_c + body_c * wing_c + (1 | trial_type) data_fem binomial 
## AIC:  266.442 266.442 
## (Intercept)      coeff:  -0.5761341  Pr(>|t|):  0.01623514 *
## thorax_c         coeff:  -3.7064606  Pr(>|t|):  0.03308087 *
## wing_c           coeff:  0.11423 Pr(>|t|):  0.9051479
## body_c           coeff:  1.2869096   Pr(>|t|):  0.1925277
## thorax_c:wing_c  coeff:  4.3295685   Pr(>|t|):  0.05106715 .
## wing_c:body_c    coeff:  -1.471794   Pr(>|t|):  0.03012126 *

tidy_regression(morph_fem_glmer5 , is_color=output_col)

## glmer flew_b ~ wing2body_c + (1 | trial_type) data_fem binomial 
## AIC:  269.7551 269.7551 
## (Intercept)  coeff:  -0.7387311  Pr(>|t|):  4.812048e-05 *
## wing2body_c  coeff:  19.3571342  Pr(>|t|):  0.0329741 *

tidy_regression(morph_fem_glmer6, is_color=output_col)

## glmer flew_b ~ 1 + (1 | ID) data_fem binomial 
## AIC:  230.106 230.106 
## 1    coeff:  -7.3268741  Pr(>|t|):  2.291958e-11 *

Males

Cleaning the Data

rm(list=ls())
output_col = FALSE 
source("src/clean_flight_data.R") 
source("src/regression_output.R") 
source("src/center_flight_data.R")
source("get_warnings.R")

data <- read_flight_data("data/all_flight_data-Winter2020.csv")
data_all <- data[[1]]
data_tested <- data[[2]]

d <- data_tested %>%
   group_by(ID, sex,population, site, host_plant, latitude, longitude, total_eggs, 
            beak, thorax, wing, body, w_morph, morph_notes, tested,
            host_c, sex_c, w_morph_c, lat_c, sym_dist, sym_dist_s, total_eggs_c, 
            beak_c, thorax_c, thorax_s, body_c, wing_c, wing2body, wing2body_c, wing2body_s) %>%
   summarise_all(funs(list(na.omit(.))))

d$num_flew <- 0
d$num_notflew <- 0
d$average_mass <- 0

for(row in 1:length(d$flew_b)){
  
  n_flew <- sum(d$flew_b[[row]] == 1) # total number of times did fly among trails
  d$num_flew[[row]] <- n_flew 
  
  n_notflew <- sum(d$flew_b[[row]] == 0) # total number of times did not fly among trails
  d$num_notflew[[row]] <- n_notflew
  
  avg_mass <- mean(d$mass[[row]])
  d$average_mass[[row]] <- avg_mass

}

d <- select(d, -filename, -channel_letter, -set_number)

data_male <- d[d$sex=="M",]
data_male <- center_data(data_male, is_not_binded = FALSE) 
data_male

## # A tibble: 214 x 63
## # Groups:   ID, sex, population, site, host_plant, latitude, longitude,
## #   total_eggs, beak, thorax, wing, body, w_morph, morph_notes, tested, host_c,
## #   sex_c, w_morph_c, lat_c, sym_dist, sym_dist_s, total_eggs_c, beak_c,
## #   thorax_c, thorax_s, body_c, wing_c, wing2body, wing2body_c [214]
##    ID    sex   population site  host_plant latitude longitude total_eggs  beak
##    <fct> <fct> <fct>      <fct> <fct>         <dbl>     <dbl>      <dbl> <dbl>
##  1 1     M     Plantatio… Areg… C. corind…     25.0     -80.6         NA  5.5 
##  2 2     M     Plantatio… Areg… C. corind…     25.0     -80.6         NA  5.64
##  3 3     M     Plantatio… Areg… C. corind…     25.0     -80.6         NA  5.75
##  4 4     M     Plantatio… Areg… C. corind…     25.0     -80.6         NA  6.21
##  5 6     M     Plantatio… Foun… C. corind…     25.0     -80.6         NA  5.76
##  6 9     M     Plantatio… Foun… C. corind…     25.0     -80.6         NA  6.1 
##  7 10    M     Plantatio… Foun… C. corind…     25.0     -80.6         NA  6.09
##  8 11    M     Key Largo  KLMRL C. corind…     25.1     -80.4         NA  5.88
##  9 14    M     Key Largo  KLMRL C. corind…     25.1     -80.4         NA  5.25
## 10 15    M     Key Largo  JP    C. corind…     25.1     -80.4         NA  6.53
## # … with 204 more rows, and 54 more variables: thorax <dbl>, wing <dbl>,
## #   body <dbl>, w_morph <fct>, morph_notes <fct>, tested <fct>, host_c <dbl>,
## #   sex_c <dbl>, w_morph_c <dbl>, lat_c <dbl>, sym_dist <dbl>,
## #   sym_dist_s <dbl>, total_eggs_c <dbl>, beak_c <dbl>, thorax_c <dbl>,
## #   thorax_s <dbl>, body_c <dbl>, wing_c <dbl>, wing2body <dbl>,
## #   wing2body_c <dbl>, wing2body_s <dbl>, trial_type <list>, chamber <list>,
## #   average_speed <list>, total_flight_time <list>, distance <list>,
## #   shortest_flying_bout <list>, longest_flying_bout <list>,
## #   total_duration <list>, max_speed <list>, test_date <list>,
## #   time_start <list>, time_end <list>, flew <list>, flight_type <list>,
## #   mass <list>, EWM <list>, flew_b <list>, eggs_b <list>,
## #   minute_duration <list>, minute_duration_c <list>, min_from_IncStart <dbl>,
## #   min_from_IncStart_c <dbl>, min_from_IncStart_s <dbl>,
## #   days_from_start <list>, days_from_start_c <list>, mass_c <list>,
## #   mass_s <list>, average_mass <dbl>, average_mass_c <dbl>,
## #   average_mass_s <dbl>, trial_type_og <list>, num_flew <dbl>,
## #   num_notflew <dbl>

Experimental, Biological, and Morphological Effects

## glm cbind(num_flew, num_notflew) ~ average_mass binomial data_male 
## AIC:  440.8166 
## (Intercept)      coeff:  0.6948251   Pr(>|t|):  0.2717353
## average_mass     coeff:  1.7230351   Pr(>|t|):  0.9146541

## glm cbind(num_flew, num_notflew) ~ beak_c binomial data_male 
## AIC:  438.5285 
## (Intercept)  coeff:  0.7624726   Pr(>|t|):  1.555005e-12 *
## beak_c       coeff:  0.4003301   Pr(>|t|):  0.1339901

## glm cbind(num_flew, num_notflew) ~ thorax_c binomial data_male 
## AIC:  440.6291 
## (Intercept)  coeff:  0.7618644   Pr(>|t|):  1.376158e-12 *
## thorax_c     coeff:  0.237372    Pr(>|t|):  0.6555153

## glm cbind(num_flew, num_notflew) ~ body_c binomial data_male 
## AIC:  436.6678 
## (Intercept)  coeff:  0.7653895   Pr(>|t|):  1.502199e-12 *
## body_c       coeff:  0.3148292   Pr(>|t|):  0.04156653 *

## glm cbind(num_flew, num_notflew) ~ wing_c binomial data_male 
## AIC:  434.1313 
## (Intercept)  coeff:  0.7693616   Pr(>|t|):  1.439954e-12 *
## wing_c       coeff:  0.4672408   Pr(>|t|):  0.009983293 *

Without Mass

R1 = data_male$num_flew
R2 = data_male$num_notflew
A = data_male$host_c
B = data_male$wing_c
C = data_male$average_mass 
X = data_male$population # can't do trial type anymore

data<-data.frame(R1, R2, A, B, C, X)

source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1 RF + 2-FF.R"))

##        [,1]     [,2]      [,3]     
## AICs   429.694  431.3445  432.2158 
## models 2        3         4        
## probs  0.545637 0.2390621 0.1546389
## 
## m2   cbind(R1, R2) ~ B + (1 | X)
## m3   cbind(R1, R2) ~ A + B + (1 | X)
## m4   cbind(R1, R2) ~ A * B + (1 | X)

length(errors$warnings)

## [1] 0

anova(m2, m3, test="Chisq") # Adding A does not improve fit

## Data: data
## Models:
## m2: cbind(R1, R2) ~ B + (1 | X)
## m3: cbind(R1, R2) ~ A + B + (1 | X)
##    Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## m2  3 429.69 439.79 -211.85   423.69                         
## m3  4 431.34 444.81 -211.67   423.34 0.3495      1     0.5544

nomass_male <- glmer(cbind(num_flew, num_notflew) ~ wing_c + (1|population), data=data_male, family=binomial)
tidy_regression(nomass_male, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ wing_c + (1 | population) data_male binomial 
## AIC:  429.694 429.694 
## (Intercept)  coeff:  0.6501987   Pr(>|t|):  0.001137608 *
## wing_c       coeff:  0.5219421   Pr(>|t|):  0.007476693 *

positive effect of wing

With Mass

source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1 RF + 3-FF.R"))

##        [,1]      [,2]      [,3]      [,4]      [,5]       [,6]      [,7]      
## AICs   425.2395  425.3462  426.4034  426.4553  426.5348   426.8988  427.3874  
## models 10        6         7         13        17         14        16        
## probs  0.1864691 0.1767786 0.1042012 0.1015297 0.09757337 0.0813386 0.06370849
##        [,8]     
## AICs   427.6089 
## models 11       
## probs  0.0570286
## 
## m10  cbind(R1, R2) ~ B * C + (1 | X)
## m6   cbind(R1, R2) ~ B + C + (1 | X)
## m7   cbind(R1, R2) ~ A + B + C + (1 | X)
## m13  cbind(R1, R2) ~ B * C + A + (1 | X)
## m17  cbind(R1, R2) ~ A * B + A * C + B * C + (1 | X)
## m14  cbind(R1, R2) ~ A * B + A * C + (1 | X)
## m16  cbind(R1, R2) ~ A * C + B * C + (1 | X)
## m11  cbind(R1, R2) ~ A * B + C + (1 | X)

length(errors$warnings)

## [1] 0

anova(m6, m10, test="Chisq") # Adding B*C does not improve fit

## Data: data
## Models:
## m6: cbind(R1, R2) ~ B + C + (1 | X)
## m10: cbind(R1, R2) ~ B * C + (1 | X)
##     Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## m6   4 425.35 438.81 -208.67   417.35                         
## m10  5 425.24 442.07 -207.62   415.24 2.1067      1     0.1467

anova(m6, m7, test="Chisq") # Adding A does not improve fot

## Data: data
## Models:
## m6: cbind(R1, R2) ~ B + C + (1 | X)
## m7: cbind(R1, R2) ~ A + B + C + (1 | X)
##    Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## m6  4 425.35 438.81 -208.67   417.35                         
## m7  5 426.40 443.23 -208.20   416.40 0.9428      1     0.3315

mass_male <- glmer(cbind(num_flew, num_notflew) ~ wing_c + average_mass + (1|population), data=data_male, family=binomial)
tidy_regression(mass_male, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ wing_c + average_mass + (1 | population) data_male binomial 
## AIC:  425.3462 425.3462 
## (Intercept)      coeff:  2.8185733   Pr(>|t|):  0.001597233 *
## wing_c           coeff:  0.9611493   Pr(>|t|):  0.00032735 *
## average_mass     coeff:  -56.8399391 Pr(>|t|):  0.01224168 *

strong positive effect of wing
strong negative effect of average mass

# check for collinearity between mass and wing length
data<-data.frame(data_male$host_c, data_male$wing_c, data_male$average_mass)
colnames(data) <- c("Host Plant", "Wing Length", "Average Mass")
pairs(data)

Morphology

d <- data_male %>%
  filter(!is.na(body))
d$thorax_c <- d$thorax - mean(d$thorax)
d$wing_c <- d$wing - mean(d$wing)
d$body_c <- d$body - mean(d$body)

R1 = d$num_flew
R2 = d$num_notflew
A = d$thorax_c
B = d$body_c
C = d$wing_c 
X = d$population

data<-data.frame(R1, R2, A, B, C, X)

source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1 RF + 3-FF.R"))

##        [,1]      [,2]      [,3]      [,4]      [,5]      [,6]       [,7]      
## AICs   417.7065  418.3341  419.3638  419.4253  419.6429  420.7335   420.7853  
## models 13        10        9         16        15        12         11        
## probs  0.2603437 0.1902239 0.1136751 0.1102345 0.0988691 0.05730978 0.05584524
## 
## m13  cbind(R1, R2) ~ B * C + A + (1 | X)
## m10  cbind(R1, R2) ~ B * C + (1 | X)
## m9   cbind(R1, R2) ~ A * C + (1 | X)
## m16  cbind(R1, R2) ~ A * C + B * C + (1 | X)
## m15  cbind(R1, R2) ~ A * B + B * C + (1 | X)
## m12  cbind(R1, R2) ~ A * C + B + (1 | X)
## m11  cbind(R1, R2) ~ A * B + C + (1 | X)

length(errors$warnings)

## [1] 0

anova(m13, m16, test="Chisq") # Adding B*C does not improve fit

## Data: data
## Models:
## m13: cbind(R1, R2) ~ B * C + A + (1 | X)
## m16: cbind(R1, R2) ~ A * C + B * C + (1 | X)
##     Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## m13  6 417.71 437.90 -202.85   405.71                         
## m16  7 419.43 442.99 -202.71   405.43 0.2812      1     0.5959

anova(m10, m13, test="Chisq") # Adding A does not improve fit

## Data: data
## Models:
## m10: cbind(R1, R2) ~ B * C + (1 | X)
## m13: cbind(R1, R2) ~ B * C + A + (1 | X)
##     Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## m10  5 418.33 435.16 -204.17   408.33                         
## m13  6 417.71 437.90 -202.85   405.71 2.6276      1      0.105

morph_male <- glmer(cbind(num_flew, num_notflew) ~ body_c * wing_c + thorax_c + (1 | population), data=d, family=binomial)
tidy_regression(morph_male, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ body_c * wing_c + thorax_c + (1 | population) d binomial 
## AIC:  417.7065 417.7065 
## (Intercept)      coeff:  0.9005518   Pr(>|t|):  8.927614e-05 *
## body_c           coeff:  -0.5749452  Pr(>|t|):  0.4359261
## wing_c           coeff:  1.4278926   Pr(>|t|):  0.0544712 .
## thorax_c         coeff:  -1.8911886  Pr(>|t|):  0.106149
## body_c:wing_c    coeff:  -0.6554036  Pr(>|t|):  0.004827235 *

no effect of body
positive effect of wing
no effect of thorax
negative wing:body ratio where the more wing to body the less times a male bug flies.

R1 = d$num_flew
R2 = d$num_notflew
A = d$thorax_c
B = d$wing2body_c
X = d$population

data<-data.frame(R1, R2, A, B, X)

source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1 RF + 2-FF.R"))

##        [,1]      [,2]      [,3]     
## AICs   424.3553  425.5079  426.0182 
## models 2         4         3        
## probs  0.4988961 0.2803679 0.2172206
## 
## m2   cbind(R1, R2) ~ B + (1 | X)
## m4   cbind(R1, R2) ~ A * B + (1 | X)
## m3   cbind(R1, R2) ~ A + B + (1 | X)

length(errors$warnings)

## [1] 0

anova(m2, m3, test="Chisq") # Adding B does not improve fit

## Data: data
## Models:
## m2: cbind(R1, R2) ~ B + (1 | X)
## m3: cbind(R1, R2) ~ A + B + (1 | X)
##    Df    AIC    BIC  logLik deviance Chisq Chi Df Pr(>Chisq)
## m2  3 424.36 434.45 -209.18   418.36                        
## m3  4 426.02 439.48 -209.01   418.02 0.337      1     0.5615

morph_male2 <- glmer(cbind(num_flew, num_notflew) ~ wing2body_c + (1 | population), data=d, family=binomial)
tidy_regression(morph_male2, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ wing2body_c + (1 | population) d binomial 
## AIC:  424.3553 424.3553 
## (Intercept)  coeff:  0.6428367   Pr(>|t|):  0.004005262 *
## wing2body_c  coeff:  24.3101404  Pr(>|t|):  0.00056135 *

strong positive effect of wing2body where the more wing2body the more times a male bug flies.

# check for collinearity between mass and morhpology
data<-data.frame(data_male$thorax_c, data_male$wing_c, data_male$body_c, data_male$average_mass)
colnames(data) <- c("Thorax Length", "Wing Length", "Body Length", "Average Mass")
pairs(data)

data_male <- data_tested[data_tested$sex=="M",]
data_male <- center_data(data_male) 

data<-data.frame(data_male$thorax_c, data_male$wing_c, data_male$body_c, data_male$mass_c, data_male$days_from_start_c)
colnames(data) <- c("Thorax Length", "Wing Length", "Body Length", "Average Mass", "Days from Start")
pairs(data)

m <- lm(mass ~ days_from_start, data=data_male)
summary(m)$coefficients

##                     Estimate   Std. Error   t value      Pr(>|t|)
## (Intercept)     0.0365303349 6.852946e-04 53.306032 4.856096e-183
## days_from_start 0.0001970102 5.128275e-05  3.841646  1.422310e-04

plot(mass ~ days_from_start, data=data_male)
abline(m, col="red")
#plot(body ~ days_from_start, data=data_male)
text(mass ~ days_from_start, labels=data_male$ID, data=data_male, cex=0.5, font=2, pos=4) # maybe the smaller males died faster? that's why it looks like.

Summary

tidy_regression(nomass_male, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ wing_c + (1 | population) data_male binomial 
## AIC:  429.694 429.694 
## (Intercept)  coeff:  0.6501987   Pr(>|t|):  0.001137608 *
## wing_c       coeff:  0.5219421   Pr(>|t|):  0.007476693 *

tidy_regression(mass_male, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ wing_c + average_mass + (1 | population) data_male binomial 
## AIC:  425.3462 425.3462 
## (Intercept)      coeff:  2.8185733   Pr(>|t|):  0.001597233 *
## wing_c           coeff:  0.9611493   Pr(>|t|):  0.00032735 *
## average_mass     coeff:  -56.8399391 Pr(>|t|):  0.01224168 *

tidy_regression(morph_male, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ body_c * wing_c + thorax_c + (1 | population) d binomial 
## AIC:  417.7065 417.7065 
## (Intercept)      coeff:  0.9005518   Pr(>|t|):  8.927614e-05 *
## body_c           coeff:  -0.5749452  Pr(>|t|):  0.4359261
## wing_c           coeff:  1.4278926   Pr(>|t|):  0.0544712 .
## thorax_c         coeff:  -1.8911886  Pr(>|t|):  0.106149
## body_c:wing_c    coeff:  -0.6554036  Pr(>|t|):  0.004827235 *

tidy_regression(morph_male2, is_color=output_col)

## glmer cbind(num_flew, num_notflew) ~ wing2body_c + (1 | population) d binomial 
## AIC:  424.3553 424.3553 
## (Intercept)  coeff:  0.6428367   Pr(>|t|):  0.004005262 *
## wing2body_c  coeff:  24.3101404  Pr(>|t|):  0.00056135 *

Males - Old Method

Without Mass Modeling + Days From Start

With Mass Modeling + Days From Start