Flight Trials Winter 2020 Dataset was conducted from 2/17/2020 - 3/10/2020. Soapberry bugs were flight tested twice for multiple hours in the flight mill and observed from 8 AM to (5-8 PM) each day. Used multivariate (glm) and mixed effect modeling (glmer) to analyze the flight results.
Testing glmer() and Covariates
Cleaning the Data
rm(list=ls())
output_col = FALSE # Recommend changing this to TRUE if working in Base R or RStudio, and FALSE if generating an html
source("src/clean_flight_data.R") # Script that loads and cleans up the data
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
source("src/regression_output.R") # A script that cleans up regression outputs and prints in color or black and white
source("src/center_flight_data.R")
source("get_warnings.R")
data <- read_flight_data("data/all_flight_data-Winter2020.csv")
data_all <- data[[1]]
data_tested <- data[[2]]
d <- data_tested %>%
group_by(ID, sex,population, site, host_plant, latitude, longitude, total_eggs,
beak, thorax, wing, body, w_morph, morph_notes, tested,
host_c, sex_c, w_morph_c, lat_c, sym_dist, sym_dist_s, total_eggs_c,
beak_c, thorax_c, thorax_s, body_c, wing_c, wing2body, wing2body_c, wing2body_s) %>%
summarise_all(funs(list(na.omit(.))))
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.
d$num_flew <- 0
d$num_notflew <- 0
d$average_mass <- 0
for(row in 1:length(d$flew_b)){
n_flew <- sum(d$flew_b[[row]] == 1) # total number of times did fly among trails
d$num_flew[[row]] <- n_flew
n_notflew <- sum(d$flew_b[[row]] == 0) # total number of times did not fly among trails
d$num_notflew[[row]] <- n_notflew
avg_mass <- mean(d$mass[[row]])
d$average_mass[[row]] <- avg_mass
}
d <- select(d, -filename, -channel_letter, -set_number)
d <- center_data(d, is_not_binded = FALSE)
d
## # A tibble: 333 x 63
## # Groups: ID, sex, population, site, host_plant, latitude, longitude,
## # total_eggs, beak, thorax, wing, body, w_morph, morph_notes, tested, host_c,
## # sex_c, w_morph_c, lat_c, sym_dist, sym_dist_s, total_eggs_c, beak_c,
## # thorax_c, thorax_s, body_c, wing_c, wing2body, wing2body_c [333]
## ID sex population site host_plant latitude longitude total_eggs beak
## <fct> <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 1 M Plantatio… Areg… C. corind… 25.0 -80.6 NA 5.5
## 2 2 M Plantatio… Areg… C. corind… 25.0 -80.6 NA 5.64
## 3 3 M Plantatio… Areg… C. corind… 25.0 -80.6 NA 5.75
## 4 4 M Plantatio… Areg… C. corind… 25.0 -80.6 NA 6.21
## 5 5 F Plantatio… Areg… C. corind… 25.0 -80.6 209 7.47
## 6 6 M Plantatio… Foun… C. corind… 25.0 -80.6 NA 5.76
## 7 7 F Plantatio… Foun… C. corind… 25.0 -80.6 8 8.19
## 8 8 F Plantatio… Foun… C. corind… 25.0 -80.6 92 8.39
## 9 9 M Plantatio… Foun… C. corind… 25.0 -80.6 NA 6.1
## 10 10 M Plantatio… Foun… C. corind… 25.0 -80.6 NA 6.09
## # … with 323 more rows, and 54 more variables: thorax <dbl>, wing <dbl>,
## # body <dbl>, w_morph <fct>, morph_notes <fct>, tested <fct>, host_c <dbl>,
## # sex_c <dbl>, w_morph_c <dbl>, lat_c <dbl>, sym_dist <dbl>,
## # sym_dist_s <dbl>, total_eggs_c <dbl>, beak_c <dbl>, thorax_c <dbl>,
## # thorax_s <dbl>, body_c <dbl>, wing_c <dbl>, wing2body <dbl>,
## # wing2body_c <dbl>, wing2body_s <dbl>, trial_type <list>, chamber <list>,
## # average_speed <list>, total_flight_time <list>, distance <list>,
## # shortest_flying_bout <list>, longest_flying_bout <list>,
## # total_duration <list>, max_speed <list>, test_date <list>,
## # time_start <list>, time_end <list>, flew <list>, flight_type <list>,
## # mass <list>, EWM <list>, flew_b <list>, eggs_b <list>,
## # minute_duration <list>, minute_duration_c <list>, min_from_IncStart <dbl>,
## # min_from_IncStart_c <dbl>, min_from_IncStart_s <dbl>,
## # days_from_start <list>, days_from_start_c <list>, mass_c <list>,
## # mass_s <list>, average_mass <dbl>, average_mass_c <dbl>,
## # average_mass_s <dbl>, trial_type_og <list>, num_flew <dbl>,
## # num_notflew <dbl>
Note
Modeling the dataset as it was before led to lots of converging issues that couldn’t be easily resolved. Now we are using a modified dataset, d, to model the data.
Testing Models and Covariates
# test mixed effect model
model <- glmer(cbind(num_flew, num_notflew)~sex_c*host_c + (1|population), data=d, family=binomial)
## boundary (singular) fit: see ?isSingular
tidy_regression(model, is_color=output_col) ####MLC: updated model, but won't converge with population as a RF
## glmer cbind(num_flew, num_notflew) ~ sex_c * host_c + (1 | population) d binomial
## AIC: 690.6034 690.6034
## (Intercept) coeff: -0.0048142 Pr(>|t|): 0.9617955
## sex_c coeff: -0.6587864 Pr(>|t|): 5.571849e-11 *
## host_c coeff: -0.0673863 Pr(>|t|): 0.5025515
## sex_c:host_c coeff: 0.1645915 Pr(>|t|): 0.1014941
model <- glmer(cbind(num_flew, num_notflew)~sex_c*host_c + (1|site), data=d, family=binomial)
tidy_regression(model, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ sex_c * host_c + (1 | site) d binomial
## AIC: 686.0047 686.0047
## (Intercept) coeff: -0.0092744 Pr(>|t|): 0.9468471
## sex_c coeff: -0.703499 Pr(>|t|): 4.425505e-11 *
## host_c coeff: -0.0460686 Pr(>|t|): 0.7408722
## sex_c:host_c coeff: 0.1855617 Pr(>|t|): 0.07700007 .
getME(model, "lower")
## [1] 0
## AB: What is a random factor here exactly? B/c it seems to matter which we use.
## AB: I tried this and it didn't work but is there a way to cbind() random factors so I can put in chamber, test date, or test time?
Experimental, Biological, and Morphological Effects
## glm cbind(num_flew, num_notflew) ~ average_mass_c binomial d
## AIC: 690.0321
## (Intercept) coeff: 0.2314802 Pr(>|t|): 0.007290253 *
## average_mass_c coeff: -31.1658278 Pr(>|t|): 3.565029e-14 *
## glm cbind(num_flew, num_notflew) ~ total_eggs binomial d
## AIC: 184.4609
## (Intercept) coeff: 0.0580002 Pr(>|t|): 0.8111338
## total_eggs coeff: -0.0117451 Pr(>|t|): 1.730675e-05 *
## glm cbind(num_flew, num_notflew) ~ beak_c binomial d
## AIC: 734.2146
## (Intercept) coeff: 0.23902 Pr(>|t|): 0.003973372 *
## beak_c coeff: -0.4085962 Pr(>|t|): 9.565269e-07 *
## glm cbind(num_flew, num_notflew) ~ thorax_c binomial d
## AIC: 739.1983
## (Intercept) coeff: 0.2385862 Pr(>|t|): 0.003890438 *
## thorax_c coeff: -1.2298779 Pr(>|t|): 1.111691e-05 *
## glm cbind(num_flew, num_notflew) ~ body_c binomial d
## AIC: 750.4139
## (Intercept) coeff: 0.2389879 Pr(>|t|): 0.003520911 *
## body_c coeff: -0.2303836 Pr(>|t|): 0.003002383 *
## glm cbind(num_flew, num_notflew) ~ wing_c binomial d
## AIC: 757.3505
## (Intercept) coeff: 0.2364567 Pr(>|t|): 0.003681042 *
## wing_c coeff: -0.1417783 Pr(>|t|): 0.1546876
R1 = d$num_flew
R2 = d$num_notflew
A = d$host_c
B = d$sex_c
C = d$sym_dist
D = d$average_mass_c
X = d$population # or population get the same top model. Variance is 0 if use pop, not 0 if use site. However, variance is 0 if use site and average_mass (down below).
data<-data.frame(R1, R2, A, B, C, D, X)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1 RF + 3-FF.R"))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## AICs 690.4785 690.6034 691.2458 692.0327 692.4669 693.2453 693.3787
## models 2 8 4 6 11 7 15
## probs 0.2196987 0.2063894 0.1496899 0.1010033 0.08129227 0.0550839 0.05152821
##
## m2 cbind(R1, R2) ~ B + (1 | X)
## m8 cbind(R1, R2) ~ A * B + (1 | X)
## m4 cbind(R1, R2) ~ A + B + (1 | X)
## m6 cbind(R1, R2) ~ B + C + (1 | X)
## m11 cbind(R1, R2) ~ A * B + C + (1 | X)
## m7 cbind(R1, R2) ~ A + B + C + (1 | X)
## m15 cbind(R1, R2) ~ A * B + B * C + (1 | X)
length(errors$warnings)
## [1] 0
anova(m4, m8, test="Chisq") # Adding A*B does not improve fit if use pop as RF | marginally improves fit if use site as RF
## Data: data
## Models:
## m4: cbind(R1, R2) ~ A + B + (1 | X)
## m8: cbind(R1, R2) ~ A * B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m4 4 691.25 706.48 -341.62 683.25
## m8 5 690.60 709.64 -340.30 680.60 2.6424 1 0.104
anova(m2, m4, test="Chisq") # Adding A does not improve fit
## Data: data
## Models:
## m2: cbind(R1, R2) ~ B + (1 | X)
## m4: cbind(R1, R2) ~ A + B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m2 3 690.48 701.90 -342.24 684.48
## m4 4 691.25 706.48 -341.62 683.25 1.2326 1 0.2669
anova(m2, m6, test="Chisq") # Adding C does not improve fit
## Data: data
## Models:
## m2: cbind(R1, R2) ~ B + (1 | X)
## m6: cbind(R1, R2) ~ B + C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m2 3 690.48 701.90 -342.24 684.48
## m6 4 692.03 707.27 -342.02 684.03 0.4458 1 0.5043
nomass_model_all <- glmer(cbind(num_flew, num_notflew) ~ sex_c + (1|population), data=d, family=binomial)
tidy_regression(nomass_model_all, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ sex_c + (1 | population) d binomial
## AIC: 690.4785 690.4785
## (Intercept) coeff: -0.0004595 Pr(>|t|): 0.9969752
## sex_c coeff: -0.7362334 Pr(>|t|): 5.062086e-16 *
nomass_model_all <- glmer(cbind(num_flew, num_notflew) ~ sex_c + (1|site), data=d, family=binomial)
tidy_regression(nomass_model_all, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ sex_c + (1 | site) d binomial
## AIC: 685.6479 685.6479
## (Intercept) coeff: 0.006371 Pr(>|t|): 0.9602811
## sex_c coeff: -0.7878123 Pr(>|t|): 6.065057e-16 *
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1-RF + 4-FF.R"))
## [,1] [,2]
## AICs 682.1004 682.5135
## models 63 26
## probs 0.1016377 0.08266881
##
## m63 cbind(R1, R2) ~ A * D + C * D + B + (1 | X)
## m26 cbind(R1, R2) ~ A * D + B + (1 | X)
length(errors$warnings)
## [1] 0
anova(m26, m36, test="Chisq") # Adding C does not improve fit
## Data: data
## Models:
## m26: cbind(R1, R2) ~ A * D + B + (1 | X)
## m36: cbind(R1, R2) ~ A * D + B + C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m26 6 682.51 705.36 -335.26 670.51
## m36 7 684.32 710.98 -335.16 670.32 0.1954 1 0.6585
anova(m12, m26, test="Chisq") # Adding A*D does improve fit
## Data: data
## Models:
## m12: cbind(R1, R2) ~ A + B + D + (1 | X)
## m26: cbind(R1, R2) ~ A * D + B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m12 5 685.67 704.72 -337.84 675.67
## m26 6 682.51 705.36 -335.26 670.51 5.1609 1 0.0231 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Wanted to check whether sym_dist is making much of an impact overall since m26 was a better fit and you can see it’s far from significant.
tidy_regression(glmer(cbind(num_flew, num_notflew) ~ sym_dist + (1|population), data=d, family=binomial), is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ sym_dist + (1 | population) d binomial
## AIC: 760.9329 760.9329
## (Intercept) coeff: 0.1891344 Pr(>|t|): 0.1982647
## sym_dist coeff: 0.0156086 Pr(>|t|): 0.8614592
mass_model_all <- glmer(cbind(num_flew, num_notflew) ~ host_c * average_mass_c + sex_c + (1|population), data=d, family=binomial)
## boundary (singular) fit: see ?isSingular
tidy_regression(mass_model_all, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ host_c * average_mass_c + sex_c + (1 | population) d binomial
## AIC: 682.5135 682.5135
## (Intercept) coeff: 0.0755669 Pr(>|t|): 0.4658345
## host_c coeff: -0.1293181 Pr(>|t|): 0.1868277
## average_mass_c coeff: -11.585146 Pr(>|t|): 0.1079791
## sex_c coeff: -0.4151303 Pr(>|t|): 0.007602201 *
## host_c:average_mass_c coeff: 10.4421094 Pr(>|t|): 0.02068315 *
Now there’s a conflicting interaction between average mass and host*average_mass…and there’s
How do you graph these models? How would you graph cbind(num_flew, num_notflew)?
plot( cbind(num_flew, num_notflew) ~ cbind(host_c, average_mass), data=d , col=rangi2)
How does num_flew or num_notflew vary with host plant?
coplot(num_flew ~ average_mass_c | host_c, data=d)
coplot(num_notflew~ average_mass_c | host_c, data=d)
Plot the interaction between host*average_mass:
Consistent with the model and graphs: If GRT and had an average mass below the mean of the population, then you flew less times. But if were GRT and had an average mass above the mean of the population, then you flew more times.
Overall mass vs. num_flew trend:
gf_point(num_flew ~ average_mass, data=d) +
geom_smooth(method='lm')
## `geom_smooth()` using formula 'y ~ x'
Then break it between host and average mass:
p1 <- gf_point(num_flew ~ average_mass, col=~host_plant, data=d[d$average_mass_c < 0,], title="(a) avgmass_c <0") +
geom_smooth(method='lm')
p2 <- gf_point(num_flew ~ average_mass, col=~host_plant, data=d[d$average_mass_c > 0,], title="(b) avgmass_c >0") +
geom_smooth(method='lm')
grid.arrange(p1,p2, nrow=2)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
p3 <- gf_point(num_notflew ~ average_mass, col=~host_plant, data=d[d$average_mass_c < 0,], title="(a) avgmass_c <0") +
geom_smooth(method='lm')
p4 <- gf_point(num_notflew ~ average_mass, col=~host_plant, data=d[d$average_mass_c > 0,], title="(b) avgmass_c >0") +
geom_smooth(method='lm')
grid.arrange(p3,p4, nrow=2)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
GRT flyers: Even when they weighed more than the average mass of the population, they flew more times, which breaks the overall trend between number times bug flew vs. average mass which is typically negative.
d <- d %>%
filter(!is.na(body))
d$thorax_c <- d$thorax - mean(d$thorax)
d$wing_c <- d$wing - mean(d$wing)
d$body_c <- d$body - mean(d$body)
R1 = d$num_flew
R2 = d$num_notflew
A = d$thorax_c
B = d$body_c
C = d$wing_c
X = d$population
data<-data.frame(R1, R2, A, B, C, X)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1 RF + 3-FF.R"))
## [,1] [,2] [,3] [,4]
## AICs 696.4932 697.7959 698.4702 699.7618
## models 15 16 17 13
## probs 0.4471192 0.2331006 0.166388 0.08722792
##
## m15 cbind(R1, R2) ~ A * B + B * C + (1 | X)
## m16 cbind(R1, R2) ~ A * C + B * C + (1 | X)
## m17 cbind(R1, R2) ~ A * B + A * C + B * C + (1 | X)
## m13 cbind(R1, R2) ~ B * C + A + (1 | X)
length(errors$warnings)
## [1] 1
anova(m15, m17, test="Chisq") # Adding A*C does not improve fit
## Data: data
## Models:
## m15: cbind(R1, R2) ~ A * B + B * C + (1 | X)
## m17: cbind(R1, R2) ~ A * B + A * C + B * C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m15 7 696.49 723.15 -341.25 682.49
## m17 8 698.47 728.94 -341.24 682.47 0.023 1 0.8795
anova(m13, m15, test="Chisq") # Adding A*B does improve fit
## Data: data
## Models:
## m13: cbind(R1, R2) ~ B * C + A + (1 | X)
## m15: cbind(R1, R2) ~ A * B + B * C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m13 6 699.76 722.61 -343.88 687.76
## m15 7 696.49 723.15 -341.25 682.49 5.2686 1 0.02171 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
morph_model_all <- glmer(cbind(num_flew, num_notflew) ~ thorax_c * body_c + body_c * wing_c + (1|population), data=d, family=binomial)
tidy_regression(morph_model_all, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ thorax_c * body_c + body_c * wing_c + (1 | population) d binomial
## AIC: 696.4932 696.4932
## (Intercept) coeff: 0.2804294 Pr(>|t|): 0.07044802 .
## thorax_c coeff: -2.549434 Pr(>|t|): 0.003596569 *
## body_c coeff: -1.2548354 Pr(>|t|): 0.007228665 *
## wing_c coeff: 2.2612361 Pr(>|t|): 6.326959e-06 *
## thorax_c:body_c coeff: 1.3630407 Pr(>|t|): 0.02490653 *
## body_c:wing_c coeff: -0.6384384 Pr(>|t|): 0.003529522 *
strong negative effect of thorax where the wider the thorax, the less times the bug flies
strong negative effect of body where the longer the body, the less times a bug flies
strong positive effect of wing where the longer the wing, the more times the bug flies
strong positive effect of thorax*body where the longer the body and wider the thorax, the more times a bug flies (hm seems conflicting to the single effects)
negative effect of of body*wing where the longer the body and wing, the less times the bug flies
R1 = d$num_flew
R2 = d$num_notflew
A = d$thorax_c
B = d$wing2body_c
X = d$population
data<-data.frame(R1, R2, A, B, X)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1-RF + 2-FF.R"))
## [,1] [,2]
## AICs 699.1221 700.8564
## models 4 3
## probs 0.7041422 0.2958542
##
## m4 cbind(R1, R2) ~ A * B + (1 | X)
## m3 cbind(R1, R2) ~ A + B + (1 | X)
length(errors$warnings)
## [1] 0
anova(m3, m4, test="Chisq") # Adding A*B improves fit
## Data: data
## Models:
## m3: cbind(R1, R2) ~ A + B + (1 | X)
## m4: cbind(R1, R2) ~ A * B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m3 4 700.86 716.09 -346.43 692.86
## m4 5 699.12 718.16 -344.56 689.12 3.7342 1 0.05331 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
morph_model_all2 <- glmer(cbind(num_flew, num_notflew) ~ wing2body_c * thorax_c + (1|population), data=d, family=binomial)
tidy_regression(morph_model_all2, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ wing2body_c * thorax_c + (1 | population) d binomial
## AIC: 699.1221 699.1221
## (Intercept) coeff: 0.1644896 Pr(>|t|): 0.2796688
## wing2body_c coeff: 30.4358638 Pr(>|t|): 3.405945e-08 *
## thorax_c coeff: -1.4513019 Pr(>|t|): 8.423364e-07 *
## wing2body_c:thorax_c coeff: -37.3187734 Pr(>|t|): 0.05638751 .
# check for collinearity
covariates <- data.frame(d$thorax, d$wing2body, d$average_mass)
pairs(covariates)
Exponential relationships between mass and thorax. Thorax can only get so big - mass can increase but thorax caps out (top right).
covariates <- data.frame(d$thorax, d$wing, d$body, d$average_mass)
pairs(covariates)
Seems like once morphology caps out so does mass not stretch too far.
R1 = d$num_flew
R2 = d$num_notflew
A = d$thorax_c
B = d$wing2body_c
C = d$average_mass_c
X = d$population
data<-data.frame(R1, R2, A, B, C, X)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1 RF + 3-FF.R"))
## [,1] [,2] [,3]
## AICs 660.5244 662.4846 665.1787
## models 15 17 11
## probs 0.6458782 0.242385 0.06302092
##
## m15 cbind(R1, R2) ~ A * B + B * C + (1 | X)
## m17 cbind(R1, R2) ~ A * B + A * C + B * C + (1 | X)
## m11 cbind(R1, R2) ~ A * B + C + (1 | X)
length(errors$warnings) # 13 models failed to converge
## [1] 13
anova(m11, m15, test="Chisq") # Adding B*C improves fit
## Data: data
## Models:
## m11: cbind(R1, R2) ~ A * B + C + (1 | X)
## m15: cbind(R1, R2) ~ A * B + B * C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m11 6 665.18 688.03 -326.59 653.18
## m15 7 660.52 687.18 -323.26 646.52 6.6543 1 0.009892 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m15, m17, test="Chisq") # Adding A*C does not improve fit
## Data: data
## Models:
## m15: cbind(R1, R2) ~ A * B + B * C + (1 | X)
## m17: cbind(R1, R2) ~ A * B + A * C + B * C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m15 7 660.52 687.18 -323.26 646.52
## m17 8 662.48 692.95 -323.24 646.48 0.0398 1 0.8418
multi_model_all <- glmer(cbind(num_flew, num_notflew) ~ thorax_c * wing2body_c + wing2body_c * average_mass + (1|population), data=d, family=binomial)
tidy_regression(multi_model_all, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ thorax_c * wing2body_c + wing2body_c * average_mass + (1 | population) d binomial
## AIC: 660.5244 660.5244
## (Intercept) coeff: 2.0903463 Pr(>|t|): 2.37968e-07 *
## thorax_c coeff: 0.742338 Pr(>|t|): 0.1527069
## wing2body_c coeff: -39.7258582 Pr(>|t|): 9.264061e-07 *
## average_mass coeff: -36.1155189 Pr(>|t|): 7.888075e-07 *
## thorax_c:wing2body_c coeff: -108.5821295 Pr(>|t|): 2.376879e-07 *
## wing2body_c:average_mass coeff: 1062.7542387 Pr(>|t|): 7.274993e-27 *
coefficients are huge. Probably a collinearity issue between morph measurements * mass.
R1 = d$num_flew
R2 = d$num_notflew
A = d$thorax_c
B = d$wing2body_c
C = d$sex_c
X = d$population
data<-data.frame(R1, R2, A, B, C, X)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1 RF + 3-FF.R"))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## AICs 677.5983 678.1801 678.4836 679.3227 680.0466 680.1788 680.4729
## models 11 6 15 14 10 7 17
## probs 0.2571693 0.1922604 0.16519 0.108588 0.07561179 0.07077347 0.06109487
##
## m11 cbind(R1, R2) ~ A * B + C + (1 | X)
## m6 cbind(R1, R2) ~ B + C + (1 | X)
## m15 cbind(R1, R2) ~ A * B + B * C + (1 | X)
## m14 cbind(R1, R2) ~ A * B + A * C + (1 | X)
## m10 cbind(R1, R2) ~ B * C + (1 | X)
## m7 cbind(R1, R2) ~ A + B + C + (1 | X)
## m17 cbind(R1, R2) ~ A * B + A * C + B * C + (1 | X)
length(errors$warnings) # no models failed
## [1] 0
anova(m11, m15, test="Chisq") # Adding B*C does not improve fit
## Data: data
## Models:
## m11: cbind(R1, R2) ~ A * B + C + (1 | X)
## m15: cbind(R1, R2) ~ A * B + B * C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m11 6 677.60 700.45 -332.80 665.60
## m15 7 678.48 705.14 -332.24 664.48 1.1147 1 0.2911
anova(m11, m14, test="Chisq") # Adding A*C does not improve fit
## Data: data
## Models:
## m11: cbind(R1, R2) ~ A * B + C + (1 | X)
## m14: cbind(R1, R2) ~ A * B + A * C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m11 6 677.60 700.45 -332.80 665.60
## m14 7 679.32 705.98 -332.66 665.32 0.2757 1 0.5996
anova(m7, m11, test="Chisq") # Adding A*B does improve fit
## Data: data
## Models:
## m7: cbind(R1, R2) ~ A + B + C + (1 | X)
## m11: cbind(R1, R2) ~ A * B + C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m7 5 680.18 699.22 -335.09 670.18
## m11 6 677.60 700.45 -332.80 665.60 4.5805 1 0.03234 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
multi_model_all2 <- glmer(cbind(num_flew, num_notflew) ~ thorax_c * wing2body_c + sex_c + (1|population), data=d, family=binomial)
tidy_regression(multi_model_all2, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ thorax_c * wing2body_c + sex_c + (1 | population) d binomial
## AIC: 677.5983 677.5983
## (Intercept) coeff: -0.0027673 Pr(>|t|): 0.9846484
## thorax_c coeff: -0.0093963 Pr(>|t|): 0.981983
## wing2body_c coeff: 17.3564108 Pr(>|t|): 0.004548248 *
## sex_c coeff: -0.6549209 Pr(>|t|): 1.908829e-06 *
## thorax_c:wing2body_c coeff: -40.2152503 Pr(>|t|): 0.03633867 *
no effect of thorax
strong positve effect of ratio where the larger the ratio the more times a bug flew
negative effect of sex where if F then less times bug will fly
strong negative effect of thorax*ratio where the larger the ratio and wider the thorax, the less times a bug flew.
# weak positive relationship between thorax vs. wing2body
p1 <- gf_point(thorax ~ wing2body, col=~num_flew, data=d) +
geom_smooth(method='lm')
p2 <- gf_point(thorax ~ wing2body, data=d[d$num_flew == 2,], title="num_flew=2") +
geom_smooth(method='lm')
p3 <- gf_point(thorax ~ wing2body, data=d[d$num_flew == 1,], title="num_flew=1") +
geom_smooth(method='lm')
p4 <- gf_point(thorax ~ wing2body, data=d[d$num_flew == 0,], title="num_flew=0") +
geom_smooth(method='lm')
grid.arrange(p1,p2,p3,p4, ncol=2)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
As wing2body and thorax increases, num_flew decreases too, but its really the thorax shrinking that seems to be driving this.
gf_point(num_flew ~ thorax, data=d) +
geom_smooth(method='lm')
## `geom_smooth()` using formula 'y ~ x'
gf_point(num_flew ~ wing2body, data=d) +
geom_smooth(method='lm')
## `geom_smooth()` using formula 'y ~ x'
R1 = d$num_flew
R2 = d$num_notflew
A = d$thorax_c
B = d$wing2body_c
C = d$sex_c
D = d$average_mass
X = d$population
data<-data.frame(R1, R2, A, B, C, D, X)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1-RF + 4-FF.R"))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## AICs 658.3051 658.3448 659.4618 659.4827 659.5901 660.038 660.3253
## models 106 74 58 112 94 100 97
## probs 0.1493435 0.1464084 0.083755 0.08288477 0.07854843 0.06278998 0.05438723
##
## m106 cbind(R1, R2) ~ A * B + A * C + A * D + B * C + B * D + (1 |
## X)
## m74 cbind(R1, R2) ~ A * B + B * C + B * D + (1 | X)
## m58 cbind(R1, R2) ~ A * B + B * D + C + (1 | X)
## m112 cbind(R1, R2) ~ A * B + A * C + A * D + B * C + B * D + C * D +
## (1 | X)
## m94 cbind(R1, R2) ~ A * B + A * C + B * C + B * D + (1 | X)
## m100 cbind(R1, R2) ~ A * B + B * C + B * D + C * D + (1 | X)
## m97 cbind(R1, R2) ~ A * B + A * D + B * C + B * D + (1 | X)
length(errors$warnings) # 68 models failed + omodels are WAY too complicated. All the top models have multiple interactions b/c I'm guessing, they're correlated in some ways.
## [1] 61
anova(m94, m106, test="Chisq") # Adding A*D marginally improves fit
## Data: data
## Models:
## m94: cbind(R1, R2) ~ A * B + A * C + B * C + B * D + (1 | X)
## m106: cbind(R1, R2) ~ A * B + A * C + A * D + B * C + B * D + (1 |
## m106: X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m94 10 659.59 697.67 -319.80 639.59
## m106 11 658.31 700.19 -318.15 636.31 3.2851 1 0.06991 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m97, m106, test="Chisq") # Adding A*C improves fit
## Data: data
## Models:
## m97: cbind(R1, R2) ~ A * B + A * D + B * C + B * D + (1 | X)
## m106: cbind(R1, R2) ~ A * B + A * C + A * D + B * C + B * D + (1 |
## m106: X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m97 10 660.33 698.41 -320.16 640.33
## m106 11 658.31 700.19 -318.15 636.31 4.0202 1 0.04496 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#multi_model <- glmer(cbind(num_flew, num_notflew) ~ thorax_c * wing2body_c + thorax_c * sex_c + thorax_c * average_mass + wing2body_c * sex_c + wing2body_c * average_mass + (1|population), data=d, family=binomial) # model failed to converge
#tidy_regression(multi_model, is_color=output_col)
tidy_regression(nomass_model_all, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ sex_c + (1 | site) d binomial
## AIC: 685.6479 685.6479
## (Intercept) coeff: 0.006371 Pr(>|t|): 0.9602811
## sex_c coeff: -0.7878123 Pr(>|t|): 6.065057e-16 *
tidy_regression(mass_model_all, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ host_c * average_mass_c + sex_c + (1 | population) d binomial
## AIC: 682.5135 682.5135
## (Intercept) coeff: 0.0755669 Pr(>|t|): 0.4658345
## host_c coeff: -0.1293181 Pr(>|t|): 0.1868277
## average_mass_c coeff: -11.585146 Pr(>|t|): 0.1079791
## sex_c coeff: -0.4151303 Pr(>|t|): 0.007602201 *
## host_c:average_mass_c coeff: 10.4421094 Pr(>|t|): 0.02068315 *
Why this host*average_mass interaction? GRT flyers: Even when they weighed more than the average mass of the population, they flew more times, which breaks the overall trend between number times bug flew vs. average mass which is typically negative. (Refer to plots in the “All Trials” tab)
tidy_regression(morph_model_all, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ thorax_c * body_c + body_c * wing_c + (1 | population) d binomial
## AIC: 696.4932 696.4932
## (Intercept) coeff: 0.2804294 Pr(>|t|): 0.07044802 .
## thorax_c coeff: -2.549434 Pr(>|t|): 0.003596569 *
## body_c coeff: -1.2548354 Pr(>|t|): 0.007228665 *
## wing_c coeff: 2.2612361 Pr(>|t|): 6.326959e-06 *
## thorax_c:body_c coeff: 1.3630407 Pr(>|t|): 0.02490653 *
## body_c:wing_c coeff: -0.6384384 Pr(>|t|): 0.003529522 *
tidy_regression(morph_model_all2, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ wing2body_c * thorax_c + (1 | population) d binomial
## AIC: 699.1221 699.1221
## (Intercept) coeff: 0.1644896 Pr(>|t|): 0.2796688
## wing2body_c coeff: 30.4358638 Pr(>|t|): 3.405945e-08 *
## thorax_c coeff: -1.4513019 Pr(>|t|): 8.423364e-07 *
## wing2body_c:thorax_c coeff: -37.3187734 Pr(>|t|): 0.05638751 .
Why this wing2body*thorax interaction? As wing2body and thorax increases, num_flew decreases too, but its really the thorax shrinking that seems to be driving this.
tidy_regression(multi_model_all, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ thorax_c * wing2body_c + wing2body_c * average_mass + (1 | population) d binomial
## AIC: 660.5244 660.5244
## (Intercept) coeff: 2.0903463 Pr(>|t|): 2.37968e-07 *
## thorax_c coeff: 0.742338 Pr(>|t|): 0.1527069
## wing2body_c coeff: -39.7258582 Pr(>|t|): 9.264061e-07 *
## average_mass coeff: -36.1155189 Pr(>|t|): 7.888075e-07 *
## thorax_c:wing2body_c coeff: -108.5821295 Pr(>|t|): 2.376879e-07 *
## wing2body_c:average_mass coeff: 1062.7542387 Pr(>|t|): 7.274993e-27 *
tidy_regression(multi_model_all2, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ thorax_c * wing2body_c + sex_c + (1 | population) d binomial
## AIC: 677.5983 677.5983
## (Intercept) coeff: -0.0027673 Pr(>|t|): 0.9846484
## thorax_c coeff: -0.0093963 Pr(>|t|): 0.981983
## wing2body_c coeff: 17.3564108 Pr(>|t|): 0.004548248 *
## sex_c coeff: -0.6549209 Pr(>|t|): 1.908829e-06 *
## thorax_c:wing2body_c coeff: -40.2152503 Pr(>|t|): 0.03633867 *
Huge coefficients! I’m sure this is all due to collinearity. Thoughts?
nomass_model_all_old<-glmer(flew_b~sex_c*host_c + (1|ID) + (1|trial_type), family=binomial, data=data_tested)
tidy_regression(nomass_model_all_old, is_color=output_col)
## glmer flew_b ~ sex_c * host_c + (1 | ID) + (1 | trial_type) data_tested binomial
## AIC: 714.6409 714.6409
## (Intercept) coeff: -0.2909258 Pr(>|t|): 0.5897965
## sex_c coeff: -1.8480135 Pr(>|t|): 0.0005216503 *
## host_c coeff: -0.1968491 Pr(>|t|): 0.4865478
## sex_c:host_c coeff: 0.4843427 Pr(>|t|): 0.1109208
It’s possible that females from GRT are more likley to fly because they are colonizers.
# There isn't a mass_model_all_old because too many models failed to converge
data_tested <- data_tested %>%
filter(!is.na(body))
data_tested <- center_data(data_tested)
morph_model_all_old<-glmer(flew_b~ thorax_c * body_c + body_c * wing_c + (1|ID), family=binomial, data=data_tested)
tidy_regression(morph_model_all_old, is_color=output_col) # same morph model as the morph_model_all
## glmer flew_b ~ thorax_c * body_c + body_c * wing_c + (1 | ID) data_tested binomial
## AIC: 739.881 739.881
## (Intercept) coeff: 0.657076 Pr(>|t|): 0.004401535 *
## thorax_c coeff: -4.203661 Pr(>|t|): 0.01688148 *
## body_c coeff: -2.2457843 Pr(>|t|): 0.02205122 *
## wing_c coeff: 3.8423882 Pr(>|t|): 0.0003701166 *
## thorax_c:body_c coeff: 2.2419667 Pr(>|t|): 0.05387419 .
## body_c:wing_c coeff: -1.1049738 Pr(>|t|): 0.009640642 *
morph_model_all_old2<-glmer(flew_b~ thorax_c + wing2body_c + (1|trial_type) + (1|ID), family=binomial, data=data_tested)
tidy_regression(morph_model_all_old2, is_color=output_col)
## glmer flew_b ~ thorax_c + wing2body_c + (1 | trial_type) + (1 | ID) data_tested binomial
## AIC: 731.1757 731.1757
## (Intercept) coeff: 0.3960016 Pr(>|t|): 0.3805652
## thorax_c coeff: -3.3415059 Pr(>|t|): 0.0001888262 *
## wing2body_c coeff: 69.1783151 Pr(>|t|): 1.838582e-05 *
This morph model is a better fit model than the morph_model_all_old model, but HUGE coefficient for wing2body_c. Why?
Some graphs I generated illustrated how changes in mass or egg-laying lead to changes in flight response, speed, and distance. Below, I run analyses on changes in flight response due to changes in mass or egg-laying that will help interpret and describe those graphs. Other graphs such as speed and distance changes will be found in the speed and distance scripts.
To perfom these analyses, a new variable will be created called “flew_diff” which calculates the flight response differences between T1 and T2. If flew_diff = -1, then the bug flew in T1 but not T2. If flew_diff = 0, then the bug either did not fly in either or flew in both T1 and T2 (no flight response). If flew_diff = 1, then the bug flew in T2 but not T1. Since this response variable is no longer binomial, I will need to use multicategorical logit models (refer to Chapter 6 of Introduction to Categorical Data Analysis by Alan Agresti). Below I’ve written out notes on performing these analyses.
What we are interested in is a multicategorical, nomial response variable. When the response variable is nominal, there is no natural order among the response variable categories (unordered categories). When the response variable is multicategorical, its multicategory models assume that the counts in the categories of \(Y\) have a multinomial distribution.
Let J = number of categories for \(Y\).
\(\pi_1,...\pi_J\) = the response probabilities where \(\sum_j \pi_j = 1\).
With \(n\) independent observations, the probability distribution for the number of outcomes of the J types is the multinomial. It specifices the probability for each possible way the \(n\) observations can fall in the J categories. Multicategory logit models simultaneously use all pairs of categories by specifying the odds of outocme in one category instead of another.
Logit models for nomial response variables pair each category with a baseline category. The choice of the baseline category is arbitrary. When the last category (J) is the baseline, the baseline-category logits are
\(\log (\frac{\pi_j}{\pi_J}), j = 1, ..., J - 1\).
Given that the response falls in category j or category J, this is the log odds that the response is j. For J = 3, for instance, the model uses \(\log(\pi_1/\pi_3)\) and \(\log(\pi_2/\pi_3)\). The baseline-category logit model with a predictor x is
\(\log \frac{\pi_j}{\pi_J} = \alpha_J + \beta_j x, j = 1, ..., J - 1\)
The model has J - 1 equations, with separate parameters for each. The effects vary according to the category paried with the baseline. When J = 2, this model simplifies to a single equaiton for \(\log(\pi_1/\pi_2) = logit(\pi_1)\), resulting in ordinary logistic regression for binary responses.
So how do these pair of categories determine equations for all other pairs of categories? Here is an arbitrary pair of categories a and b that follows the general equation above,
\(\log (\frac{\pi_a}{\pi_b}) = \log ( \frac{\pi_a/pi_J}{\pi_b/pi_J}) = \log (\frac{\pi_a}{\pi_J}) - \log (\frac{\pi_b}{\pi_J})\)
\(= (\alpha_a + \beta_a x) - (\alpha_b + \beta_b x)\)
\(= (\alpha_a - \alpha_b) + (\beta_a - \beta_b) x\)
So, the equaiton for categories a and b has the form \(\alpha + \beta x\) with intercept parameter \(\alpha = (\alpha_a - \alpha_b)\) and with slope parameter \(\beta = (\beta_a - \beta_b)\).
Let’s apply it to our data now:
rm(list=ls())
output_col = FALSE # Recommend changing this to TRUE if working in Base R or RStudio, and FALSE if generating an html
source("src/clean_flight_data.R") # Script that loads and cleans up the data
source("src/regression_output.R") # A script that cleans up regression outputs and prints in color or black and white
source("src/center_flight_data.R")
source("get_warnings.R")
data <- read_flight_data("data/all_flight_data-Winter2020.csv")
data_all <- data[[1]]
data_tested <- data[[2]]
d <- data_tested %>%
group_by(ID, sex,population, site, host_plant, latitude, longitude, total_eggs,
beak, thorax, wing, body, w_morph, morph_notes, tested,
host_c, sex_c, w_morph_c, lat_c, sym_dist, sym_dist_s, total_eggs_c,
beak_c, thorax_c, thorax_s, body_c, wing_c, wing2body, wing2body_c, wing2body_s) %>%
summarise_all(funs(list(na.omit(.))))
d$num_flew <- 0
d$num_notflew <- 0
d$average_mass <- 0
d$mass_diff <- 0
d$flew_diff <- 0
d$dist_diff <- 0
d$speed_diff <- 0
for(row in 1:length(d$flew_b)){
n_flew <- sum(d$flew_b[[row]] == 1) # total number of times did fly among trails
d$num_flew[[row]] <- n_flew
n_notflew <- sum(d$flew_b[[row]] == 0) # total number of times did not fly among trails
d$num_notflew[[row]] <- n_notflew
avg_mass <- mean(d$mass[[row]])
d$average_mass[[row]] <- avg_mass
# mass, flight, distance, and speed changes between T1 and T2
d$mass_diff[[row]] <- d$mass[[row]][2] - d$mass[[row]][1] # T2 - T1
d$flew_diff[[row]] <- d$flew_b[[row]][2] - d$flew_b[[row]][1] # T2 - T1
d$dist_diff[[row]] <- d$distance[[row]][2] - d$distance[[row]][1] # T2 - T1
d$speed_diff[[row]] <- d$average_speed[[row]][2] - d$average_speed[[row]][1] # T2 - T1
d$egg_diff[[row]] <- d$eggs_b[[row]][2] - d$eggs_b[[row]][1] # T2 - T1
}
## Warning: Unknown or uninitialised column: `egg_diff`.
d <- select(d, -filename, -channel_letter, -set_number)
d # NA's generated are good because that means it's accounted only for bugs that flew in both trials
## # A tibble: 333 x 68
## # Groups: ID, sex, population, site, host_plant, latitude, longitude,
## # total_eggs, beak, thorax, wing, body, w_morph, morph_notes, tested, host_c,
## # sex_c, w_morph_c, lat_c, sym_dist, sym_dist_s, total_eggs_c, beak_c,
## # thorax_c, thorax_s, body_c, wing_c, wing2body, wing2body_c [333]
## ID sex population site host_plant latitude longitude total_eggs beak
## <fct> <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 1 M Plantatio… Areg… C. corind… 25.0 -80.6 NA 5.5
## 2 2 M Plantatio… Areg… C. corind… 25.0 -80.6 NA 5.64
## 3 3 M Plantatio… Areg… C. corind… 25.0 -80.6 NA 5.75
## 4 4 M Plantatio… Areg… C. corind… 25.0 -80.6 NA 6.21
## 5 5 F Plantatio… Areg… C. corind… 25.0 -80.6 209 7.47
## 6 6 M Plantatio… Foun… C. corind… 25.0 -80.6 NA 5.76
## 7 7 F Plantatio… Foun… C. corind… 25.0 -80.6 8 8.19
## 8 8 F Plantatio… Foun… C. corind… 25.0 -80.6 92 8.39
## 9 9 M Plantatio… Foun… C. corind… 25.0 -80.6 NA 6.1
## 10 10 M Plantatio… Foun… C. corind… 25.0 -80.6 NA 6.09
## # … with 323 more rows, and 59 more variables: thorax <dbl>, wing <dbl>,
## # body <dbl>, w_morph <fct>, morph_notes <fct>, tested <fct>, host_c <dbl>,
## # sex_c <dbl>, w_morph_c <dbl>, lat_c <dbl>, sym_dist <dbl>,
## # sym_dist_s <dbl>, total_eggs_c <dbl>, beak_c <dbl>, thorax_c <dbl>,
## # thorax_s <dbl>, body_c <dbl>, wing_c <dbl>, wing2body <dbl>,
## # wing2body_c <dbl>, wing2body_s <dbl>, trial_type <list>, chamber <list>,
## # average_speed <list>, total_flight_time <list>, distance <list>,
## # shortest_flying_bout <list>, longest_flying_bout <list>,
## # total_duration <list>, max_speed <list>, test_date <list>,
## # time_start <list>, time_end <list>, flew <list>, flight_type <list>,
## # mass <list>, EWM <list>, flew_b <list>, eggs_b <list>,
## # minute_duration <list>, minute_duration_c <list>, min_from_IncStart <list>,
## # min_from_IncStart_c <list>, min_from_IncStart_s <list>,
## # days_from_start <list>, days_from_start_c <list>, mass_c <list>,
## # mass_s <list>, average_mass <dbl>, average_mass_c <list>,
## # average_mass_s <list>, trial_type_og <list>, num_flew <dbl>,
## # num_notflew <dbl>, mass_diff <dbl>, flew_diff <dbl>, dist_diff <dbl>,
## # speed_diff <dbl>, egg_diff <dbl>
Below I used the multinom function from the nnet package to estimate a multinomial logistic regression model. There are other functions in other R packages capable of multinomial regression. I chose the multinom function because it does not require the data to be reshaped (as the mlogit package does) and to mirror the example code found in Hilbe’s Logistic Regression Models.
First, I need to choose the level of our outcome that we wish to use as our baseline and specify this in the relevel function. Then, I ran our model using multinom. The multinom package does not include p-value calculation for the regression coefficients, so I calculate p-values using Wald tests (here z-tests).
df <- d %>%
filter(!is.na(mass_diff), !is.na(flew_diff))
df <- df[with(df, order(mass_diff)),]
df$flew_diff <- relevel(as.factor(df$flew_diff), ref = "0")
Null model:
null <- multinom(flew_diff ~ 1, data = df)
## # weights: 6 (2 variable)
## initial value 305.414216
## iter 10 value 174.928010
## iter 10 value 174.928009
## iter 10 value 174.928009
## final value 174.928009
## converged
Mass_diff model:
model <- multinom(flew_diff ~ mass_diff, data = df)
## # weights: 9 (4 variable)
## initial value 305.414216
## iter 10 value 166.730466
## final value 166.154287
## converged
s <- summary(model) # multinom doesn't compute pvalues so need to do it by hand.
z <- s$coefficients/s$standard.errors # calculate p-values using Wald tests (here z-tests)
wald <- z^2
p <- (1 - pnorm(abs(z), 0, 1)) * 2
table <- cbind(s$coefficients, c(s$edf, s$edf), s$standard.errors[,2], z[,2], wald[,2], p[,2])
colnames(table) <- c("(Intercept)"," Estimate","DF", "Std. Err.", "z", "wald", "P > |z|")
cat("\n")
cat("AIC: ", s$AIC, "\n")
## AIC: 340.3086
table
## (Intercept) Estimate DF Std. Err. z wald P > |z|
## -1 -1.699907 52.84829 4 13.64735 3.8724203 14.99563911 0.0001077599
## 1 -3.066996 -4.57556 4 27.85556 -0.1642602 0.02698141 0.8695263283
anova(null, model, test="Chisq") # adding mass_diff improves fit
## Likelihood ratio tests of Multinomial Models
##
## Response: flew_diff
## Model Resid. df Resid. Dev Test Df LR stat. Pr(Chi)
## 1 1 554 349.8560
## 2 mass_diff 552 332.3086 1 vs 2 2 17.54744 0.0001547465
The ML prediction equations are:
\(\log(\hat{\pi_{-1}}/\hat{\pi_0}) = -1.70 + 50.85 x\) and
\(\log(\hat{\pi_1}/\hat{\pi_0}) = -3.07 - 4.58 x\)
The estimated log odds that the response is -1 (Flew in T1, not T2) rather than 1 (flew in T2, not T1) equals:
\(\log(\hat{\pi_{-1}}/\hat{\pi_1}) = (-1.70 - (-3.07)) + (50.85 - (- 4.58)) x = 1.37 + 55.43 x\) where x = mass_diff
So, the more positive the mass difference (positive (+) mass means gained mass from T1 to T2, and negative (-) mass means lost mass), the more likely to select flew_diff = -1, meaning that the bug flew in T1 but not T2. While the smaller/more negative the mass difference, the more likely to select flew_diff = 1, meaning that the bug flew in T2 but not T1.
(50.85 - (- 4.58))
## [1] 55.43
For bugs of mass_diff x + 1/50 (0.02) g, the estimated odds that flew_diff = -1 rather than flew_diff = 1 equals
times the estimated odds at mass_diff x (g).
exp(coef(model)/50)
## (Intercept) mass_diff
## -1 0.9665733 2.8776264
## 1 0.9405035 0.9125511
The relative risk ratio for a 1/50-unit increase in the variable mass_diff is 2.90 for flew_diff = -1 (T1 flew only) rather than flew_diff = 1 (T2 flew only).
Predicted probabilities
You can also use predicted probabilities to help you understand the model. The multicategory logit model has an alternative expression in terms of the response probabilities. This is
\(\pi_j = \frac{e^{\alpha_j + \beta_j x}}{\sum_he^{\alpha_h + \beta_h x}}, j=1,...,J\)
The denominator is the same for each probability, and the numerators for various j sum to the denominator where \(\sum_j \pi_j = 1\). The parameters (\(\alpha and \beta\)) equal zero for whichever category is the baseline in the logit expressions. So these would be the equations for the model above:
\(\hat{\pi_-1} = \frac{e^{-1.70 + 50.85 x}}{1 + e^{-1.70 + 50.85x} + e^{-3.07 - 4.58 x}}, j=1,...,J\)
\(\hat{\pi_1} = \frac{e^{-3.07 - 4.58 x}}{1 + e^{-1.70 + 50.85x} + e^{-3.07 - 4.58 x}}, j=1,...,J\)
\(\hat{\pi_0} = \frac{e^{0 + 0 x} = 1}{1 + e^{-1.70 + 50.85x} + e^{-3.07 - 4.58x}}, j=1,...,J\)
x = mass_diff which can give you the estimated probabilities.
You can calculate these predicted probabilities for each of our outcome levels using the fitted function. You can start by generating the predicted probabilities for the observations in our dataset and viewing the first few rows. Then you can plot them.
head(pp <- fitted(model))
## 0 -1 1
## 1 0.9309721 0.01577063 0.05325723
## 2 0.9268548 0.02155929 0.05158587
## 3 0.9259895 0.02270809 0.05130243
## 4 0.9219192 0.02793021 0.05015061
## 5 0.9194762 0.03096174 0.04956208
## 6 0.9167257 0.03431055 0.04896370
plot(df$mass_diff, pp[,1], ylim=c(0,1), col="red", type="l", ylab="Predicted Probability", xlab="Changes in Mass From T1 to T2 (g)") # no flight_diff
points(df$mass_diff, pp[,2], col="blue", type="l") # flew in T1 but not T2
points(df$mass_diff, pp[,3], col="green", type="l") # flew in T2 but not T1
text(0.02,0.8, labels="No Flight Diff", col="red")
text(0.0,0.35, labels="Flew in T1 but not T2", col="blue")
text(0.02,0.1, labels="Flew in T2 but not T1", col="green")
abline(v=0.035, lty=2)
abline(v=-0.027, lty=2)
Summary: Those who gain mass (at least more than 0.035 g) are more likely to fly in T1 but not T2 and less likely to have no change in flight response. Those who loose mass are more likely to fly in T2 but not T1 (but need to loose significant mass - more than 0.03 (g)).
Now let’s compare some models:
data <- data.frame(R = df$flew_diff,
A = df$mass_diff,
B = df$host_c,
C = df$sex_c)
source("src/compare_models.R")
model_comparisonsAIC("src/generic multinomial models- multinom 1RF + 3 FF.R")
## [,1] [,2] [,3] [,4] [,5] [,6]
## AICs 333.1306 333.6353 334.6408 335.332 335.5504 335.8177
## models 5 11 13 16 7 14
## probs 0.2715071 0.2109556 0.1275994 0.09031437 0.08097221 0.07083936
## [,7]
## AICs 335.9544
## models 9
## probs 0.06615921
##
## m5 multinom(formula = R ~ A + C, data = data, trace = FALSE)
## m11 multinom(formula = R ~ A * B + C, data = data, trace = FALSE)
## m13 multinom(formula = R ~ B * C + A, data = data, trace = FALSE)
## m16 multinom(formula = R ~ B * C + A * B, data = data, trace = FALSE)
## m7 multinom(formula = R ~ A + B + C, data = data, trace = FALSE)
## m14 multinom(formula = R ~ A * B + A * C, data = data, trace = FALSE)
## m9 multinom(formula = R ~ A * C, data = data, trace = FALSE)
anova(m5, m7, test="Chisq") # Adding B does not improve fit
## Likelihood ratio tests of Multinomial Models
##
## Response: R
## Model Resid. df Resid. Dev Test Df LR stat. Pr(Chi)
## 1 A + C 550 321.1306
## 2 A + B + C 548 319.5504 1 vs 2 2 1.580235 0.4537914
anova(m5, m9, test="Chisq") # Adding A*C does not improve fit
## Likelihood ratio tests of Multinomial Models
##
## Response: R
## Model Resid. df Resid. Dev Test Df LR stat. Pr(Chi)
## 1 A + C 550 321.1306
## 2 A * C 548 319.9544 1 vs 2 2 1.176152 0.5553949
delta_mass_model <- multinom(flew_diff ~ mass_diff + sex_c, data = df)
## # weights: 12 (6 variable)
## initial value 305.414216
## iter 10 value 167.318267
## iter 20 value 161.019802
## iter 30 value 160.852946
## iter 40 value 160.741183
## iter 50 value 160.658113
## iter 60 value 160.603172
## iter 70 value 160.587839
## iter 80 value 160.572910
## iter 90 value 160.565452
## final value 160.565295
## converged
s <- summary(delta_mass_model) # multinom doesn't compute pvalues so need to do it by hand.
z <- s$coefficients/s$standard.errors # calculate p-values using Wald tests (here z-tests)
wald <- z^2
p <- (1 - pnorm(abs(z), 0, 1)) * 2
table <- cbind(s$coefficients, c(s$edf, s$edf), s$standard.errors[,2:3], z[,2:3], wald[,2:3], p[,2:3])
colnames(table) <- c("(Intercept)","coeff mass_diff", "coeff sex_c","DF", "MD SE", "sex SE", "MD z", "sex z",
"MD wald","sex wald","MD P > |z|", "sex P > |z|")
cat("\n", "AIC: ", s$AIC, "\n", "MD = mass_diff", "\n")
##
## AIC: 333.1306
## MD = mass_diff
table
## (Intercept) coeff mass_diff coeff sex_c DF MD SE sex SE MD z
## -1 -1.863449 61.25330 -0.2881294 6 15.99454 0.1991477 3.8296384
## 1 -8.900205 -55.86612 -6.3472957 6 67.78557 89.6299371 -0.8241596
## sex z MD wald sex wald MD P > |z| sex P > |z|
## -1 -1.44681204 14.666130 2.093265087 0.0001283317 0.1479496
## 1 -0.07081669 0.679239 0.005015004 0.4098489056 0.9435436
The general ML prediction equaiton is:
\(\log(\hat{\pi_{j}}/\hat{\pi_0}) =\alpha_{j} + \beta_{-1}^{\delta mass} x_1 + \beta_{j}^{sex} x_2\) where j = -1 or 1
The individual ML prediction equations are:
\(\log(\hat{\pi_{-1}}/\hat{\pi_0}) = -1.86 + 61.25 (\delta mass) - 0.29 (sex)\) and
\(\log(\hat{\pi_1}/\hat{\pi_0}) = -8.90 - 55.87 (\delta mass) - 6.35 (sex)\)
The estimated log odds that the response is -1 (Flew in T1, not T2) rather than 1 (flew in T2, not T1) equals:
\(\log(\hat{\pi_{-1}}/\hat{\pi_1}) = (-1.86 - (-8.90)) + (61.25 - (- 55.87)) \delta mass + (-0.29 - (-6.35)) sex\)
\(\log(\hat{\pi_{-1}}/\hat{\pi_1}) = 7.04 + 117.12 \delta mass + 6.06 sex\)
For bugs of mass_diff x + 1/50 (0.02) g and female, the estimated odds that flew_diff = -1 rather than flew_diff = 1 equals
exp((117.12+6.06)/50)
## [1] 11.74702
times (almost 12 times) the estimated odds at mass_diff x (g).
For bugs of mass_diff x + 1/50 (0.02) g and male, the estimated odds that flew_diff = -1 rather than flew_diff = 1 equals
exp((117.12-6.06)/50)
## [1] 9.218386
times the estimated odds at mass_diff x (g).
head(pp <- fitted(delta_mass_model))
## 0 -1 1
## 1 0.9926638 0.007333222 2.928886e-06
## 2 0.9894420 0.010555888 2.087933e-06
## 3 0.9887828 0.011215207 1.973171e-06
## 4 0.9857139 0.014284503 1.573132e-06
## 5 0.9838824 0.016116154 1.404213e-06
## 6 0.9818204 0.018178338 1.253133e-06
plot(df$mass_diff, pp[,1], ylim=c(0,1), col="red", type="b", ylab="Predicted Probability", xlab="Changes in Mass From T1 to T2 (g)") # no flight_diff
points(df$mass_diff, pp[,2], col="blue", type="b") # flew in T1 but not T2
points(df$mass_diff, pp[,3], col="green", type="b") # flew in T2 but not T1
text(0.031,0.8, labels="No Flight Diff", col="red")
text(0.008,0.5, labels="Flew in T1 but not T2", col="blue")
text(0.052,0.1, labels="Flew in T2 but not T1", col="green")
# female line is always higher
text(0.0,0.96, labels="F", col="red")
text(0.0,0.7, labels="M", col="red")
text(0.0,0.25, labels="F", col="blue")
text(0.02,0.2, labels="M", col="blue")
text(-0.03,0.25, labels="F", col="green")
text(0.025,0.05, labels="M", col="green")
abline(v=0.03, lty=2, col="slategrey")
abline(v=0.035, lty=2, col="slategrey")
abline(v=0.04, lty=2, col="slategrey")
abline(v=-0.01, lty=2, col="slategrey")
abline(v=-0.045, lty=2, col="slategrey")
\(\log(\hat{\pi_{-1}}/\hat{\pi_1}) = 6.67 + 95.21 \delta mass + 5.45 sex\)
As females gain mass (need to gain more than males - 0.04 vs. 0.03 g) they are more likely to fly in T1 than T2. As females loose mass they are much more likely than males to fly in T2 than T1. But both are mor likely to have no mass flight response change.
More on multinomial plotting for when have more than 1 predictor variables: https://stats.idre.ucla.edu/r/dae/multinomial-logistic-regression/
df <- d %>%
filter(!is.na(egg_diff), !is.na(flew_diff), sex_c == 1)
df$flew_diff <- relevel(as.factor(df$flew_diff), ref = "0")
Null model:
null <- multinom(flew_diff ~ 1, data = df)
## # weights: 2 (1 variable)
## initial value 66.542129
## final value 46.327446
## converged
Egg_diff model:
model <- multinom(flew_diff ~ egg_diff, data = df)
## # weights: 3 (2 variable)
## initial value 66.542129
## iter 10 value 40.203864
## iter 10 value 40.203864
## iter 10 value 40.203864
## final value 40.203864
## converged
s <- summary(model) # multinom doesn't compute pvalues so need to do it by hand.
z <- s$coefficients/s$standard.errors # calculate p-values using Wald tests (here z-tests)
wald <- z^2
p <- (1 - pnorm(abs(z), 0, 1)) * 2
table <- cbind(s$coefficients, c(s$edf, s$edf), s$standard.errors[2], z[2], wald[2], p[2])
colnames(table) <- c(" Estimate","DF", "Std. Err.", "z", "wald", "P > |z|")
cat("\n")
cat("AIC: ", s$AIC, "\n")
## AIC: 84.40773
table
## Estimate DF Std. Err. z wald P > |z|
## (Intercept) -2.219471 2 0.5486809 3.234434 10.46156 0.00121884
## egg_diff 1.774672 2 0.5486809 3.234434 10.46156 0.00121884
anova(null, model, test="Chisq") # adding egg_diff improves fit
## Likelihood ratio tests of Multinomial Models
##
## Response: flew_diff
## Model Resid. df Resid. Dev Test Df LR stat. Pr(Chi)
## 1 1 -1 92.65489
## 2 egg_diff -2 80.40773 1 vs 2 1 12.24716 0.0004659658
The ML prediction equations are:
\(\log(\hat{\pi_{-1}}/\hat{\pi_0}) = -2.22 + 1.78 x\) and that’s it. Why? Because these comparisons have been reduced to a binomial situation. There are no flight responses = 1. None of the female bugs Fly in T2 but not T1. So we can do our regular binomial comparisons.
# stopped here
# Remove any missing masses
df <- df[!is.na(df$mass_diff),]
# Change the -1's to 1's, but still need to keep in mind 1 will then mean that all the bugs flew in T1 but not T2
df$flew_diff <- abs(as.integer(df$flew_diff)) - 1
df$flew_diff <- as.factor(df$flew_diff)
R = df$flew_diff
A = df$egg_diff
B = df$mass_diff
#C = df$sym_dist
C = df$host_c
data<-data.frame(R, A, B, C)
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 3-FF.R")
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## AICs 73.28097 73.75454 74.48026 75.28029 75.42404 75.67354 75.73601
## models 7 12 13 11 16 6 14
## probs 0.210355 0.1660043 0.1154862 0.07741166 0.07204284 0.06359357 0.06163784
##
## m7 glm(formula = R ~ A + B + C, family = binomial, data = data)
## m12 glm(formula = R ~ A * C + B, family = binomial, data = data)
## m13 glm(formula = R ~ B * C + A, family = binomial, data = data)
## m11 glm(formula = R ~ A * B + C, family = binomial, data = data)
## m16 glm(formula = R ~ A * C + B * C, family = binomial, data = data)
## m6 glm(formula = R ~ B + C, family = binomial, data = data)
## m14 glm(formula = R ~ A * B + A * C, family = binomial, data = data)
anova(m7, m12, test="Chisq") # Adding A*C does not improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ A + B + C
## Model 2: R ~ A * C + B
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 89 65.281
## 2 88 63.755 1 1.5264 0.2166
anova(m6, m7, test="Chisq") # Addin A does improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ B + C
## Model 2: R ~ A + B + C
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 90 69.674
## 2 89 65.281 1 4.3926 0.0361 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
delta_egg_model <- glm(flew_diff ~ egg_diff + mass_diff + host_c, family = binomial, data = df)
tidy_regression(delta_egg_model, is_color = output_col)
## glm flew_diff ~ egg_diff + mass_diff + host_c binomial df
## AIC: 73.28097
## (Intercept) coeff: -2.1794225 Pr(>|t|): 2.381079e-06 *
## egg_diff coeff: 1.3447603 Pr(>|t|): 0.03846687 *
## mass_diff coeff: 40.2176192 Pr(>|t|): 0.02047179 *
## host_c coeff: 0.7246695 Pr(>|t|): 0.02731218 *
gf_point(flew_diff ~ mass_diff, data=df, col=~egg_diff)
delta_mass_model
## Call:
## multinom(formula = flew_diff ~ mass_diff + sex_c, data = df)
##
## Coefficients:
## (Intercept) mass_diff sex_c
## -1 -1.863449 61.25330 -0.2881294
## 1 -8.900205 -55.86612 -6.3472957
##
## Residual Deviance: 321.1306
## AIC: 333.1306
Look within the ‘Delta flight response’ tab to see the making of the graphs generated from this model. Here is the graph:
tidy_regression(delta_egg_model, is_color = output_col)
## glm flew_diff ~ egg_diff + mass_diff + host_c binomial df
## AIC: 73.28097
## (Intercept) coeff: -2.1794225 Pr(>|t|): 2.381079e-06 *
## egg_diff coeff: 1.3447603 Pr(>|t|): 0.03846687 *
## mass_diff coeff: 40.2176192 Pr(>|t|): 0.02047179 *
## host_c coeff: 0.7246695 Pr(>|t|): 0.02731218 *
Testing glmer() with pop and chamber
Cleaning the Data
rm(list=ls())
output_col = FALSE
source("src/clean_flight_data.R")
source("src/regression_output.R")
source("src/center_flight_data.R")
source("get_warnings.R")
data <- read_flight_data("data/all_flight_data-Winter2020.csv")
data_all <- data[[1]]
data_tested <- data[[2]]
data_T1 <-data_tested[data_tested$trial_type=="T1",]
data_T1 <- center_data(data_T1)
Experimental Set-Up Effects
## glm flew_b ~ chamber binomial data_T1
## AIC: 458.9055
## (Intercept) coeff: 0.2876821 Pr(>|t|): 0.5942526
## chamberA-2 coeff: -0.0966268 Pr(>|t|): 0.8766875
## chamberA-3 coeff: 0.117783 Pr(>|t|): 0.8474768
## chamberA-4 coeff: 0.4054651 Pr(>|t|): 0.5053931
## chamberB-1 coeff: 0.0918075 Pr(>|t|): 0.8875092
## chamberB-2 coeff: 0.2130932 Pr(>|t|): 0.7267933
## chamberB-3 coeff: -0.074108 Pr(>|t|): 0.9040261
## chamberB-4 coeff: 0.3254224 Pr(>|t|): 0.6114074
## glm flew_b ~ days_from_start_c binomial data_T1
## AIC: 448.1122
## (Intercept) coeff: 0.4298329 Pr(>|t|): 0.0001338893 *
## days_from_start_c coeff: -0.0339142 Pr(>|t|): 0.2615186
## glm flew_b ~ min_from_IncStart binomial data_T1
## AIC: 449.1128
## (Intercept) coeff: 0.360952 Pr(>|t|): 0.03534992 *
## min_from_IncStart coeff: 0.0004162 Pr(>|t|): 0.6064583
Biological Effects
## glm flew_b ~ mass_c binomial data_T1
## AIC: 401.7318
## (Intercept) coeff: 0.4447371 Pr(>|t|): 0.0002377227 *
## mass_c coeff: -33.2650205 Pr(>|t|): 6.742316e-09 *
Morphology Effects
## glm flew_b ~ beak_c binomial data_T1
## AIC: 438.1289
## (Intercept) coeff: 0.4391226 Pr(>|t|): 0.0001235556 *
## beak_c coeff: -0.368997 Pr(>|t|): 0.0009363177 *
## glm flew_b ~ thorax_c binomial data_T1
## AIC: 441.4424
## (Intercept) coeff: 0.4374127 Pr(>|t|): 0.0001215489 *
## thorax_c coeff: -1.0468489 Pr(>|t|): 0.005435159 *
## glm flew_b ~ body_c binomial data_T1
## AIC: 447.0143
## (Intercept) coeff: 0.4311034 Pr(>|t|): 0.000131644 *
## body_c coeff: -0.1613906 Pr(>|t|): 0.1253027
## glm flew_b ~ wing_c binomial data_T1
## AIC: 449.2264
## (Intercept) coeff: 0.4283101 Pr(>|t|): 0.0001371065 *
## wing_c coeff: -0.053208 Pr(>|t|): 0.6957995
Glmer() Testing Notes
(1|chamber) as a random effect does not need to be included in the Trial 1 glmer() analyses because in all cases the variance is essentially 0. What pops up if you do try glmer on the best fit model? A singular fit error is raised: “When you obtain a singular fit, this is often indicating that the model is overfitted – that is, the random effects structure is too complex to be supported by the data, which naturally leads to the advice to remove the most complex part of the random effects structure (usually random slopes). The benefit of this approach is that it leads to a more parsimonious (conservative) model that is not over-fitted.” Source: https://stats.stackexchange.com/questions/378939/dealing-with-singular-fit-in-mixed-models
However, (1|population) and (1|days_from_start_c) does matter when doing the morphology analyses in Trial 1, and will not throw an error because the variance is large enough to change the fit of a model.
# Remove any missing masses
data_T1 <- data_T1 %>%
filter(!is.na(mass))
data_T1 <- center_data(data_T1)
R = data_T1$flew_b
A = data_T1$host_c
B = data_T1$sex_c
C = data_T1$sym_dist
D = data_T1$mass_c
data<-data.frame(R, A, B, C, D)
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 3-FF.R")
## [,1] [,2] [,3] [,4] [,5] [,6]
## AICs 405.1863 407.0964 407.1841 407.7049 408.2027 408.3016
## models 8 2 11 15 4 6
## probs 0.337383 0.1298202 0.1242531 0.09576901 0.07466518 0.07106273
##
## m8 glm(formula = R ~ A * B, family = binomial, data = data)
## m2 glm(formula = R ~ B, family = binomial, data = data)
## m11 glm(formula = R ~ A * B + C, family = binomial, data = data)
## m15 glm(formula = R ~ A * B + B * C, family = binomial, data = data)
## m4 glm(formula = R ~ A + B, family = binomial, data = data)
## m6 glm(formula = R ~ B + C, family = binomial, data = data)
anova(m8, m11, test="Chisq") ## Adding C does not improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ A * B
## Model 2: R ~ A * B + C
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 325 397.19
## 2 324 397.18 1 0.0022039 0.9626
anova(m11, m15, test="Chisq") ## Adding B*C interaction does not improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ A * B + C
## Model 2: R ~ A * B + B * C
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 324 397.18
## 2 323 395.70 1 1.4792 0.2239
anova(m11, m14, test="Chisq") ## Adding A*C interaction does not improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ A * B + C
## Model 2: R ~ A * B + A * C
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 324 397.18
## 2 323 397.16 1 0.023286 0.8787
nomass_T1<-glm(flew_b~sex_c*host_c, family=binomial, data=data_T1)
tidy_regression(nomass_T1, is_color=output_col)
## glm flew_b ~ sex_c * host_c binomial data_T1
## AIC: 405.1863
## (Intercept) coeff: 0.2373995 Pr(>|t|): 0.07673066 .
## sex_c coeff: -0.6108252 Pr(>|t|): 5.260333e-06 *
## host_c coeff: -0.0537428 Pr(>|t|): 0.6886482
## sex_c:host_c coeff: 0.3020053 Pr(>|t|): 0.02434372 *
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 4-FF.R")
## [,1] [,2] [,3] [,4]
## AICs 395.8828 396.098 396.5748 397.0146
## models 51 63 26 18
## probs 0.09542866 0.08569499 0.06751641 0.05419081
##
## m51 glm(formula = R ~ A * D + C * D, family = binomial, data = data)
## m63 glm(formula = R ~ A * D + C * D + B, family = binomial, data = data)
## m26 glm(formula = R ~ A * D + B, family = binomial, data = data)
## m18 glm(formula = R ~ A * D, family = binomial, data = data)
R = data_T1$flew_b
A = data_T1$host_c
B = data_T1$sex_c
C = data_T1$mass_c
data<-data.frame(R, A, B, C)
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 3-FF.R")
## [,1] [,2] [,3] [,4] [,5]
## AICs 396.5748 397.0146 398.4673 398.5139 398.5501
## models 12 9 14 11 16
## probs 0.2662013 0.2136616 0.103341 0.100959 0.09914754
##
## m12 glm(formula = R ~ A * C + B, family = binomial, data = data)
## m9 glm(formula = R ~ A * C, family = binomial, data = data)
## m14 glm(formula = R ~ A * B + A * C, family = binomial, data = data)
## m11 glm(formula = R ~ A * B + C, family = binomial, data = data)
## m16 glm(formula = R ~ A * C + B * C, family = binomial, data = data)
anova(m9, m12, test="Chisq") # Adding B does not improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ A * C
## Model 2: R ~ A * C + B
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 325 389.01
## 2 324 386.57 1 2.4397 0.1183
anova(m51, m63, test="Chisq") # Adding B does not improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ A * D + C * D
## Model 2: R ~ A * D + C * D + B
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 323 383.88
## 2 322 382.10 1 1.7848 0.1816
anova(m27, m51, test="Chisq") # Adding C*D does improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ A * D + C
## Model 2: R ~ A * D + C * D
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 324 389.01
## 2 323 383.88 1 5.1242 0.02359 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# top model if don't include sym_dist
mass_T1_nosym <- glm(flew_b~host_c*mass_c, family=binomial, data=data_T1)
tidy_regression(mass_T1_nosym, is_color=output_col)
## glm flew_b ~ host_c * mass_c binomial data_T1
## AIC: 397.0146
## (Intercept) coeff: 0.3972549 Pr(>|t|): 0.002534805 *
## host_c coeff: -0.1665608 Pr(>|t|): 0.2055597
## mass_c coeff: -28.0261972 Pr(>|t|): 4.143597e-06 *
## host_c:mass_c coeff: 15.6704317 Pr(>|t|): 0.01004477 *
# top model if you do include sym_dist
mass_T1_sym<- glm(flew_b~host_c*mass_c + sym_dist*mass_c, family=binomial, data=data_T1)
tidy_regression(mass_T1_sym, is_color=output_col)
## glm flew_b ~ host_c * mass_c + sym_dist * mass_c binomial data_T1
## AIC: 395.8828
## (Intercept) coeff: 0.4352287 Pr(>|t|): 0.04304544 *
## host_c coeff: -0.1640368 Pr(>|t|): 0.3508711
## mass_c coeff: -15.1464735 Pr(>|t|): 0.07961529 .
## sym_dist coeff: -0.1084774 Pr(>|t|): 0.5151625
## host_c:mass_c coeff: 22.7792912 Pr(>|t|): 0.001430864 *
## mass_c:sym_dist coeff: -17.3605309 Pr(>|t|): 0.04408037 *
data_T1 <-data_tested[data_tested$trial_type=="T1",]
data_T1 <- data_T1 %>%
filter(!is.na(body))
data_T1 <- center_data(data_T1)
R = data_T1$flew_b
A = data_T1$thorax_c
B = data_T1$body_c
C = data_T1$wing_c
data<-data.frame(R, A, B, C)
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 3-FF.R")
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## AICs 420.4885 421.1253 421.4696 421.5025 421.8375 421.9204 422.082
## models 13 15 7 12 14 16 11
## probs 0.1864269 0.1355936 0.1141476 0.1122827 0.09496689 0.09111367 0.08404131
## [,8]
## AICs 422.6621
## models 17
## probs 0.06288207
##
## m13 glm(formula = R ~ B * C + A, family = binomial, data = data)
## m15 glm(formula = R ~ A * B + B * C, family = binomial, data = data)
## m7 glm(formula = R ~ A + B + C, family = binomial, data = data)
## m12 glm(formula = R ~ A * C + B, family = binomial, data = data)
## m14 glm(formula = R ~ A * B + A * C, family = binomial, data = data)
## m16 glm(formula = R ~ A * C + B * C, family = binomial, data = data)
## m11 glm(formula = R ~ A * B + C, family = binomial, data = data)
## m17 glm(formula = R ~ A * B + A * C + B * C, family = binomial, data = data)
anova(m7, m13, test="Chisq") # Adding B*C marginally improves fit
## Analysis of Deviance Table
##
## Model 1: R ~ A + B + C
## Model 2: R ~ B * C + A
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 328 413.47
## 2 327 410.49 1 2.9811 0.08424 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m13, m16, test="Chisq") # Adding A*C does not improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ B * C + A
## Model 2: R ~ A * C + B * C
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 327 410.49
## 2 326 409.92 1 0.56814 0.451
morph_T1 <- glm(flew_b ~ body_c * wing_c + thorax_c, family = binomial, data = data_T1)
tidy_regression(morph_T1, is_color=output_col)
## glm flew_b ~ body_c * wing_c + thorax_c binomial data_T1
## AIC: 420.4885
## (Intercept) coeff: 0.6086248 Pr(>|t|): 2.775207e-05 *
## body_c coeff: -1.1762506 Pr(>|t|): 0.06266837 .
## wing_c coeff: 2.350309 Pr(>|t|): 0.0004808155 *
## thorax_c coeff: -2.6772762 Pr(>|t|): 0.01747959 *
## body_c:wing_c coeff: -0.1698641 Pr(>|t|): 0.08756481 .
maringal negative effect of body wher ethe larger the body the less likely to fly
strong positive effect of wing length where the longer the wing, the more likely to fly
storng negative effect of thorax where the larger the thorasxs, the less likely to fly
no effect of body*wing (we know body and wing are closely related so they could be canceling each other out (b/c body is a proxi for mass))
morph_T1glmer <- glmer(formula = flew_b ~ body_c * wing_c + thorax_c + (1| population), family = binomial, data = data_T1)
tidy_regression(morph_T1glmer, is_color=output_col) # did change the coefficients slightly, but not a better fit
## glmer flew_b ~ body_c * wing_c + thorax_c + (1 | population) data_T1 binomial
## AIC: 421.8532 421.8532
## (Intercept) coeff: 0.5335423 Pr(>|t|): 0.005217949 *
## body_c coeff: -1.2372535 Pr(>|t|): 0.05374932 .
## wing_c coeff: 2.4704559 Pr(>|t|): 0.0003676219 *
## thorax_c coeff: -2.792138 Pr(>|t|): 0.01517357 *
## body_c:wing_c coeff: -0.1651854 Pr(>|t|): 0.1002469
R = data_T1$flew_b
A = data_T1$thorax_c
B = data_T1$wing2body_c
data<-data.frame(R, A, B)
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 2-FF.R")
## [,1] [,2]
## AICs 419.9404 420.2258
## models 4 3
## probs 0.531513 0.4608213
##
## m4 glm(formula = R ~ A * B, family = binomial, data = data)
## m3 glm(formula = R ~ A + B, family = binomial, data = data)
anova(m3, m4, test="Chisq") # Adding A*B does not improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ A + B
## Model 2: R ~ A * B
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 329 414.23
## 2 328 411.94 1 2.2854 0.1306
morph_T1_ratio <- glm(flew_b ~ wing2body_c + thorax_c, family = binomial, data = data_T1)
tidy_regression(morph_T1_ratio, is_color=output_col)
## glm flew_b ~ wing2body_c + thorax_c binomial data_T1
## AIC: 420.2258
## (Intercept) coeff: 0.4670867 Pr(>|t|): 8.08552e-05 *
## wing2body_c coeff: 32.6671543 Pr(>|t|): 6.925235e-06 *
## thorax_c coeff: -1.2219033 Pr(>|t|): 0.001695651 *
tidy_regression(nomass_T1, is_color=output_col)
## glm flew_b ~ sex_c * host_c binomial data_T1
## AIC: 405.1863
## (Intercept) coeff: 0.2373995 Pr(>|t|): 0.07673066 .
## sex_c coeff: -0.6108252 Pr(>|t|): 5.260333e-06 *
## host_c coeff: -0.0537428 Pr(>|t|): 0.6886482
## sex_c:host_c coeff: 0.3020053 Pr(>|t|): 0.02434372 *
tidy_regression(mass_T1_sym, is_color=output_col)
## glm flew_b ~ host_c * mass_c + sym_dist * mass_c binomial data_T1
## AIC: 395.8828
## (Intercept) coeff: 0.4352287 Pr(>|t|): 0.04304544 *
## host_c coeff: -0.1640368 Pr(>|t|): 0.3508711
## mass_c coeff: -15.1464735 Pr(>|t|): 0.07961529 .
## sym_dist coeff: -0.1084774 Pr(>|t|): 0.5151625
## host_c:mass_c coeff: 22.7792912 Pr(>|t|): 0.001430864 *
## mass_c:sym_dist coeff: -17.3605309 Pr(>|t|): 0.04408037 *
tidy_regression(mass_T1_nosym, is_color=output_col)
## glm flew_b ~ host_c * mass_c binomial data_T1
## AIC: 397.0146
## (Intercept) coeff: 0.3972549 Pr(>|t|): 0.002534805 *
## host_c coeff: -0.1665608 Pr(>|t|): 0.2055597
## mass_c coeff: -28.0261972 Pr(>|t|): 4.143597e-06 *
## host_c:mass_c coeff: 15.6704317 Pr(>|t|): 0.01004477 *
tidy_regression(morph_T1, is_color=output_col) # glmer did not improve fit
## glm flew_b ~ body_c * wing_c + thorax_c binomial data_T1
## AIC: 420.4885
## (Intercept) coeff: 0.6086248 Pr(>|t|): 2.775207e-05 *
## body_c coeff: -1.1762506 Pr(>|t|): 0.06266837 .
## wing_c coeff: 2.350309 Pr(>|t|): 0.0004808155 *
## thorax_c coeff: -2.6772762 Pr(>|t|): 0.01747959 *
## body_c:wing_c coeff: -0.1698641 Pr(>|t|): 0.08756481 .
tidy_regression(morph_T1_ratio, is_color=output_col)
## glm flew_b ~ wing2body_c + thorax_c binomial data_T1
## AIC: 420.2258
## (Intercept) coeff: 0.4670867 Pr(>|t|): 8.08552e-05 *
## wing2body_c coeff: 32.6671543 Pr(>|t|): 6.925235e-06 *
## thorax_c coeff: -1.2219033 Pr(>|t|): 0.001695651 *
Testing glmer() with pop and chamber
Cleaning the Data
rm(list=ls())
output_col = FALSE
source("src/clean_flight_data.R")
source("src/regression_output.R")
source("src/center_flight_data.R")
source("get_warnings.R")
data <- read_flight_data("data/all_flight_data-Winter2020.csv")
data_all <- data[[1]]
data_tested <- data[[2]]
data_T2 <-data_tested[data_tested$trial_type=="T2",]
data_T2 <- center_data(data_T2)
Experimental Set-Up Effects
## glm flew_b ~ chamber binomial data_T2
## AIC: 393.888
## (Intercept) coeff: -0.0909718 Pr(>|t|): 0.7631039
## chamberA-2 coeff: 0.3273606 Pr(>|t|): 0.4754205
## chamberA-3 coeff: 0.784119 Pr(>|t|): 0.1224819
## chamberA-4 coeff: -0.2837217 Pr(>|t|): 0.488549
## chamberB-1 coeff: 0.784119 Pr(>|t|): 0.1559208
## chamberB-2 coeff: 0.6017974 Pr(>|t|): 0.2039747
## chamberB-3 coeff: -0.468644 Pr(>|t|): 0.3199648
## chamberB-4 coeff: -0.1809619 Pr(>|t|): 0.6866405
## glm flew_b ~ days_from_start_c binomial data_T2
## AIC: 393.3056
## (Intercept) coeff: 0.014415 Pr(>|t|): 0.9039395
## days_from_start_c coeff: -0.0540142 Pr(>|t|): 0.2070645
## glm flew_b ~ min_from_IncStart binomial data_T2
## AIC: 394.6235
## (Intercept) coeff: -0.0779168 Pr(>|t|): 0.7062456
## min_from_IncStart coeff: 0.0004647 Pr(>|t|): 0.5857749
Biological Effects
## glm flew_b ~ mass_c binomial data_T2
## AIC: 358.9575
## (Intercept) coeff: -0.0288154 Pr(>|t|): 0.8227357
## mass_c coeff: -33.6659527 Pr(>|t|): 1.030843e-07 *
Morphology Effects
## glm flew_b ~ beak_c binomial data_T2
## AIC: 380.4646
## (Intercept) coeff: 0.0069373 Pr(>|t|): 0.9547796
## beak_c coeff: -0.468637 Pr(>|t|): 0.0002555501 *
## glm flew_b ~ thorax_c binomial data_T2
## AIC: 381.7558
## (Intercept) coeff: 0.0109082 Pr(>|t|): 0.9287235
## thorax_c coeff: -1.49497 Pr(>|t|): 0.0004698825 *
## glm flew_b ~ body_c binomial data_T2
## AIC: 387.3314
## (Intercept) coeff: 0.0133229 Pr(>|t|): 0.9121223
## body_c coeff: -0.3168907 Pr(>|t|): 0.006959575 *
## glm flew_b ~ wing_c binomial data_T2
## AIC: 392.1204
## (Intercept) coeff: 0.014232 Pr(>|t|): 0.905352
## wing_c coeff: -0.2472874 Pr(>|t|): 0.09699422 .
Glmer() Testing Notes
(1|population) + (1|chamber) as random effects do not need to be included in the Trial 2 glmer() mass and no mass analyses because in all cases the variance is essentially 0. What pops up if you do try glmer on the best fit model? A singular fit error is raised. Refer to the reasoning in Trial 1.
However, no error is thrown when included in the morphology analyses.
R = data_T2$flew_b
A = data_T2$host_c
B = data_T2$sex_c
C = data_T2$sym_dist
D = data_T2$mass_c # as a covariate
data<-data.frame(R, A, B, C, D)
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 3-FF.R")
## [,1] [,2] [,3] [,4] [,5] [,6]
## AICs 361.1644 362.5404 363.0369 364.4845 364.5336 364.8156
## models 2 4 6 7 8 10
## probs 0.3641601 0.1830193 0.142783 0.06923625 0.06756002 0.05867243
##
## m2 glm(formula = R ~ B, family = binomial, data = data)
## m4 glm(formula = R ~ A + B, family = binomial, data = data)
## m6 glm(formula = R ~ B + C, family = binomial, data = data)
## m7 glm(formula = R ~ A + B + C, family = binomial, data = data)
## m8 glm(formula = R ~ A * B, family = binomial, data = data)
## m10 glm(formula = R ~ B * C, family = binomial, data = data)
anova(m2, m4, test="Chisq") # Adding A does not improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ B
## Model 2: R ~ A + B
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 280 357.16
## 2 279 356.54 1 0.624 0.4296
anova(m2, m6, test="Chisq") # Adding C does not improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ B
## Model 2: R ~ B + C
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 280 357.16
## 2 279 357.04 1 0.12746 0.7211
nomass_T2 <-glm(flew_b~sex_c, family=binomial, data=data_T2)
tidy_regression(nomass_T2, is_color=output_col)
## glm flew_b ~ sex_c binomial data_T2
## AIC: 361.1644
## (Intercept) coeff: -0.2425498 Pr(>|t|): 0.07779975 .
## sex_c coeff: -0.7620335 Pr(>|t|): 3.010899e-08 *
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 4-FF.R")
## [,1] [,2] [,3] [,4]
## AICs 358.8454 358.9575 359.2153 359.563
## models 9 4 7 12
## probs 0.08891803 0.08406952 0.0739026 0.06210944
##
## m9 glm(formula = R ~ B + D, family = binomial, data = data)
## m4 glm(formula = R ~ D, family = binomial, data = data)
## m7 glm(formula = R ~ A + D, family = binomial, data = data)
## m12 glm(formula = R ~ A + B + D, family = binomial, data = data)
anova(m4, m7, test="Chisq") # Adding A does not improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ D
## Model 2: R ~ A + D
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 280 354.96
## 2 279 353.22 1 1.7422 0.1869
anova(m7, m12, test="Chisq") # Adding B does not improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ A + D
## Model 2: R ~ A + B + D
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 279 353.22
## 2 278 351.56 1 1.6523 0.1986
anova(m4, m9, test="Chisq") # Adding B does not improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ D
## Model 2: R ~ B + D
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 280 354.96
## 2 279 352.85 1 2.1121 0.1461
mass_T2 <-glm(flew_b~mass_c, family=binomial, data=data_T2)
tidy_regression(mass_T2, is_color=output_col)
## glm flew_b ~ mass_c binomial data_T2
## AIC: 358.9575
## (Intercept) coeff: -0.0288154 Pr(>|t|): 0.8227357
## mass_c coeff: -33.6659527 Pr(>|t|): 1.030843e-07 *
Mass is really dominating everything, so let’s split by sex after looking at Trial 2.
# Remove any missing masses and morphology measurements
data_T2 <- data_T2 %>%
filter(!is.na(mass)) %>%
filter(!is.na(body))
data_T2 <- center_data(data_T2)
R = data_T2$flew_b
A = data_T2$thorax_c
B = data_T2$body_c
C = data_T2$wing_c
data<-data.frame(R, A, B, C)
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 3-FF.R")
## [,1] [,2] [,3] [,4]
## AICs 364.316 365.2947 366.2914 368.3788
## models 16 15 17 13
## probs 0.4230217 0.2593179 0.1575417 0.05547893
##
## m16 glm(formula = R ~ A * C + B * C, family = binomial, data = data)
## m15 glm(formula = R ~ A * B + B * C, family = binomial, data = data)
## m17 glm(formula = R ~ A * B + A * C + B * C, family = binomial, data = data)
## m13 glm(formula = R ~ B * C + A, family = binomial, data = data)
anova(m13, m16, test="Chisq") # Adding A*C improves fit
## Analysis of Deviance Table
##
## Model 1: R ~ B * C + A
## Model 2: R ~ A * C + B * C
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 277 358.38
## 2 276 352.32 1 6.0628 0.01381 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
morph_T2 <- glm(flew_b ~ thorax_c * wing_c + body_c * wing_c, family = binomial, data = data_T2)
tidy_regression(morph_T2, is_color=output_col)
## glm flew_b ~ thorax_c * wing_c + body_c * wing_c binomial data_T2
## AIC: 364.316
## (Intercept) coeff: 0.1914188 Pr(>|t|): 0.2140046
## thorax_c coeff: -2.3082152 Pr(>|t|): 0.0828982 .
## wing_c coeff: 1.9063552 Pr(>|t|): 0.008518188 *
## body_c coeff: -1.1472691 Pr(>|t|): 0.09990423 .
## thorax_c:wing_c coeff: 3.692847 Pr(>|t|): 0.0199121 *
## wing_c:body_c coeff: -1.216138 Pr(>|t|): 0.006636944 *
negative marginal effect of thorax, where the wider the thorax, the then bug less likely to fly
positive effect of wing length, where if wing longer then bug more likely to fly
negative effect of body, where the longer the body the less likely the bug is to fly
positive effect of thorax:wing where the wider the thorax and wing the more likely the bug is to fly (this encapsulates overall flight power in a way b/c thorax muscles and wing length)
negative effect of wing and body where the longer the wing and body the less likely the bug is to fly (this encapsulates mass limitations)
R = data_T2$flew_b
A = data_T2$thorax_c
B = data_T2$wing2body_c
data<-data.frame(R, A, B)
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 2-FF.R")
## [,1] [,2]
## AICs 367.6219 368.6156
## models 3 4
## probs 0.6206869 0.3776612
##
## m3 glm(formula = R ~ A + B, family = binomial, data = data)
## m4 glm(formula = R ~ A * B, family = binomial, data = data)
anova(m3, m4, test="Chisq") # Adding A*B does not improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ A + B
## Model 2: R ~ A * B
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 279 361.62
## 2 278 360.62 1 1.0063 0.3158
morph_T2_ratio <- glm(flew_b ~ thorax_c + wing2body_c, family = binomial, data = data_T2)
tidy_regression(morph_T2_ratio, is_color=output_col)
## glm flew_b ~ thorax_c + wing2body_c binomial data_T2
## AIC: 367.6219
## (Intercept) coeff: 0.0109675 Pr(>|t|): 0.9303663
## thorax_c coeff: -1.6132431 Pr(>|t|): 0.0002366193 *
## wing2body_c coeff: 28.5706239 Pr(>|t|): 0.0001653885 *
tidy_regression(nomass_T2, is_color=output_col)
## glm flew_b ~ sex_c binomial data_T2
## AIC: 361.1644
## (Intercept) coeff: -0.2425498 Pr(>|t|): 0.07779975 .
## sex_c coeff: -0.7620335 Pr(>|t|): 3.010899e-08 *
tidy_regression(mass_T2, is_color=output_col)
## glm flew_b ~ mass_c binomial data_T2
## AIC: 358.9575
## (Intercept) coeff: -0.0288154 Pr(>|t|): 0.8227357
## mass_c coeff: -33.6659527 Pr(>|t|): 1.030843e-07 *
tidy_regression(morph_T2, is_color=output_col) # glmer did not improve fit
## glm flew_b ~ thorax_c * wing_c + body_c * wing_c binomial data_T2
## AIC: 364.316
## (Intercept) coeff: 0.1914188 Pr(>|t|): 0.2140046
## thorax_c coeff: -2.3082152 Pr(>|t|): 0.0828982 .
## wing_c coeff: 1.9063552 Pr(>|t|): 0.008518188 *
## body_c coeff: -1.1472691 Pr(>|t|): 0.09990423 .
## thorax_c:wing_c coeff: 3.692847 Pr(>|t|): 0.0199121 *
## wing_c:body_c coeff: -1.216138 Pr(>|t|): 0.006636944 *
tidy_regression(morph_T2_ratio, is_color=output_col)
## glm flew_b ~ thorax_c + wing2body_c binomial data_T2
## AIC: 367.6219
## (Intercept) coeff: 0.0109675 Pr(>|t|): 0.9303663
## thorax_c coeff: -1.6132431 Pr(>|t|): 0.0002366193 *
## wing2body_c coeff: 28.5706239 Pr(>|t|): 0.0001653885 *
Cleaning the Data
rm(list=ls())
output_col = FALSE
source("src/clean_flight_data.R")
source("src/regression_output.R")
source("src/center_flight_data.R")
source("get_warnings.R")
data <- read_flight_data("data/all_flight_data-Winter2020.csv")
data_all <- data[[1]]
data_tested <- data[[2]]
d <- data_tested %>%
group_by(ID, sex,population, site, host_plant, latitude, longitude, total_eggs,
beak, thorax, wing, body, w_morph, morph_notes, tested,
host_c, sex_c, w_morph_c, lat_c, sym_dist, sym_dist_s, total_eggs_c,
beak_c, thorax_c, thorax_s, body_c, wing_c, wing2body, wing2body_c, wing2body_s) %>%
summarise_all(funs(list(na.omit(.))))
d$num_flew <- 0
d$num_notflew <- 0
d$average_mass <- 0
for(row in 1:length(d$flew_b)){
n_flew <- sum(d$flew_b[[row]] == 1) # total number of times did fly among trails
d$num_flew[[row]] <- n_flew
n_notflew <- sum(d$flew_b[[row]] == 0) # total number of times did not fly among trails
d$num_notflew[[row]] <- n_notflew
avg_mass <- mean(d$mass[[row]])
d$average_mass[[row]] <- avg_mass
}
d <- select(d, -filename, -channel_letter, -set_number)
data_fem <- d[d$sex=="F",]
data_fem <- center_data(data_fem, is_not_binded = FALSE)
data_fem
## # A tibble: 119 x 63
## # Groups: ID, sex, population, site, host_plant, latitude, longitude,
## # total_eggs, beak, thorax, wing, body, w_morph, morph_notes, tested, host_c,
## # sex_c, w_morph_c, lat_c, sym_dist, sym_dist_s, total_eggs_c, beak_c,
## # thorax_c, thorax_s, body_c, wing_c, wing2body, wing2body_c [119]
## ID sex population site host_plant latitude longitude total_eggs beak
## <fct> <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 5 F Plantatio… Areg… C. corind… 25.0 -80.6 209 7.47
## 2 7 F Plantatio… Foun… C. corind… 25.0 -80.6 8 8.19
## 3 8 F Plantatio… Foun… C. corind… 25.0 -80.6 92 8.39
## 4 12 F Key Largo KLMRL C. corind… 25.1 -80.4 329 8.79
## 5 13 F Key Largo KLMRL C. corind… 25.1 -80.4 296 6.99
## 6 16 F Key Largo JP C. corind… 25.1 -80.4 1 6.33
## 7 19 F Key Largo JP C. corind… 25.1 -80.4 NA 7.54
## 8 20 F Key Largo JP C. corind… 25.1 -80.4 55 8.73
## 9 25 F North Key… DD f… C. corind… 25.3 -80.3 NA 8.03
## 10 26 F North Key… Char… C. corind… 25.2 -80.3 33 7.15
## # … with 109 more rows, and 54 more variables: thorax <dbl>, wing <dbl>,
## # body <dbl>, w_morph <fct>, morph_notes <fct>, tested <fct>, host_c <dbl>,
## # sex_c <dbl>, w_morph_c <dbl>, lat_c <dbl>, sym_dist <dbl>,
## # sym_dist_s <dbl>, total_eggs_c <dbl>, beak_c <dbl>, thorax_c <dbl>,
## # thorax_s <dbl>, body_c <dbl>, wing_c <dbl>, wing2body <dbl>,
## # wing2body_c <dbl>, wing2body_s <dbl>, trial_type <list>, chamber <list>,
## # average_speed <list>, total_flight_time <list>, distance <list>,
## # shortest_flying_bout <list>, longest_flying_bout <list>,
## # total_duration <list>, max_speed <list>, test_date <list>,
## # time_start <list>, time_end <list>, flew <list>, flight_type <list>,
## # mass <list>, EWM <list>, flew_b <list>, eggs_b <list>,
## # minute_duration <list>, minute_duration_c <list>, min_from_IncStart <dbl>,
## # min_from_IncStart_c <dbl>, min_from_IncStart_s <dbl>,
## # days_from_start <list>, days_from_start_c <list>, mass_c <list>,
## # mass_s <list>, average_mass <dbl>, average_mass_c <dbl>,
## # average_mass_s <dbl>, trial_type_og <list>, num_flew <dbl>,
## # num_notflew <dbl>
Experimental, Biological, and Morphological Effects
## glm cbind(num_flew, num_notflew) ~ average_mass binomial data_fem
## AIC: 244.0145
## (Intercept) coeff: 0.8199527 Pr(>|t|): 0.1567534
## average_mass coeff: -19.7615291 Pr(>|t|): 0.007607435 *
## glm cbind(num_flew, num_notflew) ~ total_eggs binomial data_fem
## AIC: 180.1734
## (Intercept) coeff: -0.000878 Pr(>|t|): 0.9971877
## total_eggs coeff: -0.0113041 Pr(>|t|): 3.560437e-05 *
## glm cbind(num_flew, num_notflew) ~ beak_c binomial data_fem
## AIC: 243.4425
## (Intercept) coeff: -0.7413431 Pr(>|t|): 7.132613e-07 *
## beak_c coeff: 0.5176271 Pr(>|t|): 0.004779854 *
## glm cbind(num_flew, num_notflew) ~ thorax_c binomial data_fem
## AIC: 249.8961
## (Intercept) coeff: -0.7144045 Pr(>|t|): 9.943685e-07 *
## thorax_c coeff: 0.7277155 Pr(>|t|): 0.1679599
## glm cbind(num_flew, num_notflew) ~ body_c binomial data_fem
## AIC: 245.9068
## (Intercept) coeff: -0.7328335 Pr(>|t|): 7.928047e-07 *
## body_c coeff: 0.3564223 Pr(>|t|): 0.01817676 *
## glm cbind(num_flew, num_notflew) ~ wing_c binomial data_fem
## AIC: 245.7877
## (Intercept) coeff: -0.7311905 Pr(>|t|): 8.381109e-07 *
## wing_c coeff: 0.4265058 Pr(>|t|): 0.01764792 *
R1 = data_fem$num_flew
R2 = data_fem$num_notflew
A = data_fem$host_c
B = data_fem$wing_c
C = data_fem$average_mass
X = data_fem$population # can't do trial type anymore
data<-data.frame(R1, R2, A, B, C, X)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1 RF + 2-FF.R"))
## [,1] [,2] [,3] [,4]
## AICs 247.7877 248.9599 249.678 251.8143
## models 2 4 3 5
## probs 0.4678333 0.2603472 0.1818148 0.06247934
##
## m2 cbind(R1, R2) ~ B + (1 | X)
## m4 cbind(R1, R2) ~ A * B + (1 | X)
## m3 cbind(R1, R2) ~ A + B + (1 | X)
## m0 cbind(R1, R2) ~ 1 + (1 | X)
length(errors$warnings)
## [1] 0
anova(m0, m2, test="Chisq") # Adding B improves fit
## Data: data
## Models:
## m0: cbind(R1, R2) ~ 1 + (1 | X)
## m2: cbind(R1, R2) ~ B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 2 251.81 257.37 -123.91 247.81
## m2 3 247.79 256.12 -120.89 241.79 6.0266 1 0.01409 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m2, m3, test="Chisq") # Adding A does not improve fit
## Data: data
## Models:
## m2: cbind(R1, R2) ~ B + (1 | X)
## m3: cbind(R1, R2) ~ A + B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m2 3 247.79 256.12 -120.89 241.79
## m3 4 249.68 260.79 -120.84 241.68 0.1098 1 0.7404
nomass_fem <- glmer(cbind(num_flew, num_notflew) ~ wing_c + (1|population), data=data_fem, family=binomial)
## boundary (singular) fit: see ?isSingular
tidy_regression(nomass_fem, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ wing_c + (1 | population) data_fem binomial
## AIC: 247.7877 247.7877
## (Intercept) coeff: -0.7311905 Pr(>|t|): 8.382439e-07 *
## wing_c coeff: 0.4265058 Pr(>|t|): 0.01764923 *
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1 RF + 3-FF.R"))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## AICs 230.6699 231.793 231.8642 232.3348 232.6675 233.7873 233.8316
## models 12 14 11 6 16 17 15
## probs 0.2649127 0.1510885 0.145803 0.1152337 0.0975719 0.0557406 0.05451835
## [,8]
## AICs 233.906
## models 7
## probs 0.05252892
##
## m12 cbind(R1, R2) ~ A * C + B + (1 | X)
## m14 cbind(R1, R2) ~ A * B + A * C + (1 | X)
## m11 cbind(R1, R2) ~ A * B + C + (1 | X)
## m6 cbind(R1, R2) ~ B + C + (1 | X)
## m16 cbind(R1, R2) ~ A * C + B * C + (1 | X)
## m17 cbind(R1, R2) ~ A * B + A * C + B * C + (1 | X)
## m15 cbind(R1, R2) ~ A * B + B * C + (1 | X)
## m7 cbind(R1, R2) ~ A + B + C + (1 | X)
length(errors$warnings)
## [1] 0
anova(m7, m12, test="Chisq") # Adding A*C improves fit
## Data: data
## Models:
## m7: cbind(R1, R2) ~ A + B + C + (1 | X)
## m12: cbind(R1, R2) ~ A * C + B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m7 5 233.91 247.80 -111.95 223.91
## m12 6 230.67 247.34 -109.33 218.67 5.2361 1 0.02212 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mass_fem <- glmer(cbind(num_flew, num_notflew) ~ host_c * average_mass + wing_c + (1|population), data=data_fem, family=binomial)
## boundary (singular) fit: see ?isSingular
tidy_regression(mass_fem, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ host_c * average_mass + wing_c + (1 | population) data_fem binomial
## AIC: 230.6699 230.6699
## (Intercept) coeff: 1.0497932 Pr(>|t|): 0.1635618
## host_c coeff: -1.6597665 Pr(>|t|): 0.01871997 *
## average_mass coeff: -22.9687138 Pr(>|t|): 0.02197948 *
## wing_c coeff: 0.8179845 Pr(>|t|): 0.0001948365 *
## host_c:average_mass coeff: 21.0529026 Pr(>|t|): 0.0219996 *
negative effect of host where if from GRT the less times a bug will fly
strong negative effect of average mass where the heavier the less times a bug will fly
positive effect of wing length where the longer the wing the more times the bug will fly
strong positive effect of host*average mass where the heavier the bug and from GRT the more times the bug will fly (I’ve seen these strong conflicting interactions a lot with host and something else)
# check for collinearity between mass and wing length
data<-data.frame(data_fem$host_c, data_fem$wing_c, data_fem$average_mass )
colnames(data) <- c("Host Plant", "Wing Length", "Average Mass")
pairs(data)
GRT bugs weigh less.
R1 = data_fem$num_flew
R2 = data_fem$num_notflew
A = data_fem$host_c
B = data_fem$wing_c
C = data_fem$average_mass
D = data_fem$total_eggs_c # can't use eggs_b anymore
X = data_fem$population
data<-data.frame(R1, R2, A, B, C, D, X)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1-RF + 4-FF.R"))
## [,1] [,2]
## AICs 177.1075 177.3402
## models 9 20
## probs 0.09166895 0.08160042
##
## m9 cbind(R1, R2) ~ B + D + (1 | X)
## m20 cbind(R1, R2) ~ B * D + (1 | X)
length(errors$warnings) # 83 models failed
## [1] 83
anova(m9, m20, test="Chisq") # Adding B*D does not improve fit
## Data: data
## Models:
## m9: cbind(R1, R2) ~ B + D + (1 | X)
## m20: cbind(R1, R2) ~ B * D + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m9 4 177.11 187.65 -84.554 169.11
## m20 5 177.34 190.51 -83.670 167.34 1.7673 1 0.1837
egg_model <- glmer(cbind(num_flew, num_notflew) ~ wing_c + total_eggs_c + (1|population), data=data_fem, family=binomial)
## boundary (singular) fit: see ?isSingular
tidy_regression(egg_model, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ wing_c + total_eggs_c + (1 | population) data_fem binomial
## AIC: 177.1075 177.1075
## (Intercept) coeff: -1.1895793 Pr(>|t|): 5.069879e-09 *
## wing_c coeff: 0.5576031 Pr(>|t|): 0.01181402 *
## total_eggs_c coeff: -0.0110275 Pr(>|t|): 4.517167e-05 *
d <- data_fem %>%
filter(!is.na(body))
d$thorax_c <- d$thorax - mean(d$thorax)
d$wing_c <- d$wing - mean(d$wing)
d$body_c <- d$body - mean(d$body)
R1 = d$num_flew
R2 = d$num_notflew
A = d$thorax_c
B = d$body_c
C = d$wing_c
X = d$population
data<-data.frame(R1, R2, A, B, C, X)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1 RF + 3-FF.R"))
## [,1] [,2] [,3] [,4] [,5] [,6]
## AICs 244.8068 245.5373 246.3075 246.7777 247.4988 247.752
## models 16 15 4 17 5 13
## probs 0.2213596 0.1536284 0.1045263 0.08262519 0.05761315 0.05076343
##
## m16 cbind(R1, R2) ~ A * C + B * C + (1 | X)
## m15 cbind(R1, R2) ~ A * B + B * C + (1 | X)
## m4 cbind(R1, R2) ~ A + B + (1 | X)
## m17 cbind(R1, R2) ~ A * B + A * C + B * C + (1 | X)
## m5 cbind(R1, R2) ~ A + C + (1 | X)
## m13 cbind(R1, R2) ~ B * C + A + (1 | X)
length(errors$warnings)
## [1] 0
anova(m15, m16, test="Chisq") # replacing A*B with A*C improves fit
## Data: data
## Models:
## m15: cbind(R1, R2) ~ A * B + B * C + (1 | X)
## m16: cbind(R1, R2) ~ A * C + B * C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m15 7 245.54 264.99 -115.77 231.54
## m16 7 244.81 264.26 -115.40 230.81 0.7305 0 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m13, m16, test="Chisq") # Adding B*C improves fit
## Data: data
## Models:
## m13: cbind(R1, R2) ~ B * C + A + (1 | X)
## m16: cbind(R1, R2) ~ A * C + B * C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m13 6 247.75 264.43 -117.88 235.75
## m16 7 244.81 264.26 -115.40 230.81 4.9452 1 0.02616 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
morph_fem <- glmer(cbind(num_flew, num_notflew) ~ thorax_c * wing_c + body_c * wing_c + (1 | population), data=d, family=binomial)
## boundary (singular) fit: see ?isSingular
tidy_regression(morph_fem, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ thorax_c * wing_c + body_c * wing_c + (1 | population) d binomial
## AIC: 244.8068 244.8068
## (Intercept) coeff: -0.5721164 Pr(>|t|): 0.00361911 *
## thorax_c coeff: -3.9771602 Pr(>|t|): 0.02124539 *
## wing_c coeff: -0.1206764 Pr(>|t|): 0.896578
## body_c coeff: 1.5567268 Pr(>|t|): 0.1033466
## thorax_c:wing_c coeff: 4.5495185 Pr(>|t|): 0.03910877 *
## wing_c:body_c coeff: -1.5315868 Pr(>|t|): 0.02345094 *
strong negative effect of thorax where the wider the thorax, the less times the bug flies
no effect of wing length
no effect of body length
strong positive (marginal) effect of thorax*body where the longer the body and wider the thorax, the more times a bug flies (seems to conflict the single effects)
negative effect of of body*wing where the longer the body and wing, the less times the bug flies
R1 = d$num_flew
R2 = d$num_notflew
A = d$thorax_c
B = d$wing2body_c
X = d$population
data<-data.frame(R1, R2, A, B, X)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1 RF + 2-FF.R"))
## [,1] [,2] [,3] [,4] [,5]
## AICs 250.0524 250.5642 251.7468 251.8143 251.8961
## models 2 4 3 5 1
## probs 0.3316744 0.2567869 0.1421605 0.1374432 0.1319351
##
## m2 cbind(R1, R2) ~ B + (1 | X)
## m4 cbind(R1, R2) ~ A * B + (1 | X)
## m3 cbind(R1, R2) ~ A + B + (1 | X)
## m0 cbind(R1, R2) ~ 1 + (1 | X)
## m1 cbind(R1, R2) ~ A + (1 | X)
length(errors$warnings)
## [1] 0
anova(m0, m2, test="Chisq") # Adding B marginally improves fit
## Data: data
## Models:
## m0: cbind(R1, R2) ~ 1 + (1 | X)
## m2: cbind(R1, R2) ~ B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 2 251.81 257.37 -123.91 247.81
## m2 3 250.05 258.39 -122.03 244.05 3.7619 1 0.05243 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m2, m3, test="Chisq") # Adding B does not improve fit
## Data: data
## Models:
## m2: cbind(R1, R2) ~ B + (1 | X)
## m3: cbind(R1, R2) ~ A + B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m2 3 250.05 258.39 -122.03 244.05
## m3 4 251.75 262.86 -121.87 243.75 0.3056 1 0.5804
morph_fem2 <- glmer(cbind(num_flew, num_notflew) ~ wing2body_c + (1 | population), data=d, family=binomial)
## boundary (singular) fit: see ?isSingular
tidy_regression(morph_fem2, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ wing2body_c + (1 | population) d binomial
## AIC: 250.0524 250.0524
## (Intercept) coeff: -0.7145386 Pr(>|t|): 1.121718e-06 *
## wing2body_c coeff: 16.3816527 Pr(>|t|): 0.06120233 .
tidy_regression(nomass_fem, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ wing_c + (1 | population) data_fem binomial
## AIC: 247.7877 247.7877
## (Intercept) coeff: -0.7311905 Pr(>|t|): 8.382439e-07 *
## wing_c coeff: 0.4265058 Pr(>|t|): 0.01764923 *
tidy_regression(mass_fem, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ host_c * average_mass + wing_c + (1 | population) data_fem binomial
## AIC: 230.6699 230.6699
## (Intercept) coeff: 1.0497932 Pr(>|t|): 0.1635618
## host_c coeff: -1.6597665 Pr(>|t|): 0.01871997 *
## average_mass coeff: -22.9687138 Pr(>|t|): 0.02197948 *
## wing_c coeff: 0.8179845 Pr(>|t|): 0.0001948365 *
## host_c:average_mass coeff: 21.0529026 Pr(>|t|): 0.0219996 *
tidy_regression(egg_model, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ wing_c + total_eggs_c + (1 | population) data_fem binomial
## AIC: 177.1075 177.1075
## (Intercept) coeff: -1.1895793 Pr(>|t|): 5.069879e-09 *
## wing_c coeff: 0.5576031 Pr(>|t|): 0.01181402 *
## total_eggs_c coeff: -0.0110275 Pr(>|t|): 4.517167e-05 *
tidy_regression(morph_fem, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ thorax_c * wing_c + body_c * wing_c + (1 | population) d binomial
## AIC: 244.8068 244.8068
## (Intercept) coeff: -0.5721164 Pr(>|t|): 0.00361911 *
## thorax_c coeff: -3.9771602 Pr(>|t|): 0.02124539 *
## wing_c coeff: -0.1206764 Pr(>|t|): 0.896578
## body_c coeff: 1.5567268 Pr(>|t|): 0.1033466
## thorax_c:wing_c coeff: 4.5495185 Pr(>|t|): 0.03910877 *
## wing_c:body_c coeff: -1.5315868 Pr(>|t|): 0.02345094 *
tidy_regression(morph_fem2, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ wing2body_c + (1 | population) d binomial
## AIC: 250.0524 250.0524
## (Intercept) coeff: -0.7145386 Pr(>|t|): 1.121718e-06 *
## wing2body_c coeff: 16.3816527 Pr(>|t|): 0.06120233 .
Cleaning the Data
Experimental Set-Up Effects
## glm flew_b ~ chamber binomial data_fem
## AIC: 282.7963
## (Intercept) coeff: -1.6094379 Pr(>|t|): 0.01093576 *
## chamberA-2 coeff: 0.8109302 Pr(>|t|): 0.2789959
## chamberA-3 coeff: 1.0498221 Pr(>|t|): 0.1740305
## chamberA-4 coeff: 1.0388931 Pr(>|t|): 0.1498307
## chamberB-1 coeff: 1.2417131 Pr(>|t|): 0.1053886
## chamberB-2 coeff: 0.8556661 Pr(>|t|): 0.2627736
## chamberB-3 coeff: 0.4700036 Pr(>|t|): 0.5317849
## chamberB-4 coeff: 1.3411739 Pr(>|t|): 0.06690121 .
## glm flew_b ~ days_from_start binomial data_fem
## AIC: 273.9376
## (Intercept) coeff: -0.3131225 Pr(>|t|): 0.2499367
## days_from_start coeff: -0.035226 Pr(>|t|): 0.09466742 .
## glm flew_b ~ min_from_IncStart binomial data_fem
## AIC: 276.4128
## (Intercept) coeff: -0.595335 Pr(>|t|): 0.01168785 *
## min_from_IncStart coeff: -0.0005925 Pr(>|t|): 0.5530668
Biological Effects
## glm flew_b ~ mass_c binomial data_fem
## AIC: 258.5349
## (Intercept) coeff: -0.7752137 Pr(>|t|): 6.296854e-07 *
## mass_c coeff: -25.7670872 Pr(>|t|): 0.0004286196 *
## glm flew_b ~ total_eggs_c binomial data_fem
## AIC: 203.7404
## (Intercept) coeff: -1.1924462 Pr(>|t|): 4.467475e-09 *
## total_eggs_c coeff: -0.0113041 Pr(>|t|): 3.560418e-05 *
## glm flew_b ~ eggs_b binomial data_fem
## AIC: 224.3271
## (Intercept) coeff: 0.5959834 Pr(>|t|): 0.01289674 *
## eggs_b coeff: -2.2671149 Pr(>|t|): 1.112852e-11 *
Morphology Effects
## glm flew_b ~ beak_c binomial data_fem
## AIC: 268.3958
## (Intercept) coeff: -0.7370404 Pr(>|t|): 7.970136e-07 *
## beak_c coeff: 0.5176271 Pr(>|t|): 0.004779927 *
## glm flew_b ~ thorax_c binomial data_fem
## AIC: 274.8494
## (Intercept) coeff: -0.7139042 Pr(>|t|): 1.007652e-06 *
## thorax_c coeff: 0.7277155 Pr(>|t|): 0.1679606
## glm flew_b ~ body_c binomial data_fem
## AIC: 270.8601
## (Intercept) coeff: -0.7302927 Pr(>|t|): 8.444718e-07 *
## body_c coeff: 0.3564223 Pr(>|t|): 0.01817797 *
## glm flew_b ~ wing_c binomial data_fem
## AIC: 270.741
## (Intercept) coeff: -0.7314942 Pr(>|t|): 8.319902e-07 *
## wing_c coeff: 0.4265058 Pr(>|t|): 0.01764921 *
data_fem <- data_tested[data_tested$sex=="F",]
data_fem <- data_fem %>%
filter(!is.na(mass))
data_fem <- center_data(data_fem)
R = data_fem$flew_b
A = data_fem$host_c
B = data_fem$wing_c # used to be sym_dis
C = data_fem$mass_c
data<-data.frame(R, A, B, C)
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 3-FF.R")
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## AICs 242.0988 242.1587 243.0503 243.4488 243.7701 244.0985 244.1586
## models 6 11 12 14 7 10 15
## probs 0.2065913 0.2004959 0.1283766 0.1051884 0.08957379 0.07601309 0.07376069
##
## m6 glm(formula = R ~ B + C, family = binomial, data = data)
## m11 glm(formula = R ~ A * B + C, family = binomial, data = data)
## m12 glm(formula = R ~ A * C + B, family = binomial, data = data)
## m14 glm(formula = R ~ A * B + A * C, family = binomial, data = data)
## m7 glm(formula = R ~ A + B + C, family = binomial, data = data)
## m10 glm(formula = R ~ B * C, family = binomial, data = data)
## m15 glm(formula = R ~ A * B + B * C, family = binomial, data = data)
anova(m3, m6, test="Chisq") # Adding B does improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ C
## Model 2: R ~ B + C
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 210 254.53
## 2 209 236.10 1 18.436 1.757e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m3, m5, test="Chisq") # Adding A does not improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ C
## Model 2: R ~ A + C
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 210 254.53
## 2 209 254.49 1 0.040648 0.8402
anova(m6, m7, test="Chisq") # Adding A does not improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ B + C
## Model 2: R ~ A + B + C
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 209 236.10
## 2 208 235.77 1 0.32864 0.5665
anova(m6, m10, test="Chisq") # Adding B*C does not improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ B + C
## Model 2: R ~ B * C
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 209 236.1
## 2 208 236.1 1 0.00032619 0.9856
## glm flew_b ~ wing_c + mass_c binomial data_fem
## AIC: 242.0988
## (Intercept) coeff: -0.8614654 Pr(>|t|): 2.942928e-07 *
## wing_c coeff: 0.8692786 Pr(>|t|): 7.878961e-05 *
## mass_c coeff: -38.2356966 Pr(>|t|): 3.658768e-06 *
R = data_fem$flew_b
A = data_fem$host_c
B = data_fem$mass_c
C = data_fem$wing_c # replaced sym_dist
X = data_fem$ID # variance is zero if use trial_type | 3 models fail to converge if use ID
data<-data.frame(R, A, B, C, X)
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glmer 1 RF + 3-FF.R")
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.29003 (tol = 0.001, component 1)
## [,1] [,2] [,3] [,4] [,5]
## AICs 202.6123 203.7209 206.0113 206.1216 206.2997
## models 14 17 6 11 18
## probs 0.3725183 0.2140005 0.06808972 0.06443456 0.05894485
##
## m14 R ~ A * B + A * C + (1 | X)
## m17 R ~ A * B + A * C + B * C + (1 | X)
## m6 R ~ B + C + (1 | X)
## m11 R ~ A * B + C + (1 | X)
## m18 R ~ A * B * C + (1 | X)
# top model if use ID:
multi_glmer_fem <-glmer(flew_b~host_c * mass_c + host_c * wing_c + (1 | ID), family=binomial, data=data_fem)
tidy_regression(multi_glmer_fem, is_color=output_col)
## glmer flew_b ~ host_c * mass_c + host_c * wing_c + (1 | ID) data_fem binomial
## AIC: 202.6123 202.6123
## (Intercept) coeff: -14.3528925 Pr(>|t|): 0.0001404174 *
## host_c coeff: -3.5218926 Pr(>|t|): 0.1015445
## mass_c coeff: -357.1011952 Pr(>|t|): 0.0004263504 *
## wing_c coeff: 8.6042524 Pr(>|t|): 0.002471981 *
## host_c:mass_c coeff: -189.3281934 Pr(>|t|): 0.01570222 *
## host_c:wing_c coeff: 6.7278037 Pr(>|t|): 0.01681285 *
# top model if use trial_type:
multi_glmer_fem2 <-glmer(flew_b~mass_c + wing_c + (1|trial_type), family=binomial, data=data_fem)
## boundary (singular) fit: see ?isSingular
# if you replace trial_type with ID then you'll see the coefficients are way too huge and at different scales. Why?
tidy_regression(multi_glmer_fem2, is_color=output_col)
## glmer flew_b ~ mass_c + wing_c + (1 | trial_type) data_fem binomial
## AIC: 244.0988 244.0988
## (Intercept) coeff: -0.8614654 Pr(>|t|): 2.941513e-07 *
## mass_c coeff: -38.2356966 Pr(>|t|): 3.640896e-06 *
## wing_c coeff: 0.8692786 Pr(>|t|): 7.874479e-05 *
R = data_fem$flew_b
A = data_fem$host_c
B = data_fem$wing_c
C = data_fem$mass_c
D = data_fem$eggs_b
data<-data.frame(R, A, B, C, D)
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 4-FF.R")
## [,1] [,2] [,3]
## AICs 214.6841 214.9507 214.9984
## models 14 35 34
## probs 0.0668611 0.05851743 0.05713751
##
## m14 glm(formula = R ~ B + C + D, family = binomial, data = data)
## m35 glm(formula = R ~ A * C + B + D, family = binomial, data = data)
## m34 glm(formula = R ~ A * B + C + D, family = binomial, data = data)
egg_model2 <-glm(flew_b~wing_c + mass_c + eggs_b, family=binomial, data=data_fem)
tidy_regression(egg_model2, is_color=output_col)
## glm flew_b ~ wing_c + mass_c + eggs_b binomial data_fem
## AIC: 214.6841
## (Intercept) coeff: 0.360869 Pr(>|t|): 0.2011714
## wing_c coeff: 0.6411567 Pr(>|t|): 0.007192596 *
## mass_c coeff: -18.0024358 Pr(>|t|): 0.05131856 .
## eggs_b coeff: -1.9676327 Pr(>|t|): 1.88186e-07 *
egg_glmer <-glmer(flew_b~wing_c + mass_c + eggs_b + (1|ID), family=binomial, data=data_fem)
tidy_regression(egg_glmer, is_color=output_col) # model fits better
## glmer flew_b ~ wing_c + mass_c + eggs_b + (1 | ID) data_fem binomial
## AIC: 172.286 172.286
## (Intercept) coeff: 5.8613659 Pr(>|t|): 0.01023977 *
## wing_c coeff: 1.7946948 Pr(>|t|): 0.2609419
## mass_c coeff: -83.2034793 Pr(>|t|): 0.1559541
## eggs_b coeff: -15.9949936 Pr(>|t|): 7.729145e-08 *
still a strong negative effect if laid eggs that day but mass and wing length are no longer significant…
R = data_fem$flew_b
A = data_fem$host_c
B = data_fem$wing_c
C = data_fem$mass_c
D = data_fem$eggs_b
X = data_fem$ID
data<-data.frame(R, A, B, C, D, X)
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-gaussian glmer 1-RF + 4-FF.R") # only 1 model failed to converge
## [,1] [,2] [,3] [,4]
## AICs 192.2485 195.0665 195.2866 195.2868
## models 53 21 86 33
## probs 0.4532078 0.1107595 0.09921775 0.09920911
##
## m53 R ~ B * C + C * D + (1 | X)
## m21 R ~ C * D + (1 | X)
## m86 R ~ B * C + B * D + C * D + (1 | X)
## m33 R ~ C * D + B + (1 | X)
anova(m21, m33, test="Chisq") # Adding B improves fits
## refitting model(s) with ML (instead of REML)
## Data: data
## Models:
## m21: R ~ C * D + (1 | X)
## m33: R ~ C * D + B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m21 6 192.24 212.38 -90.121 180.24
## m33 7 187.83 211.33 -86.916 173.83 6.4111 1 0.01134 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m33, m53, test="Chisq") # Adding B*C does not improve fit
## refitting model(s) with ML (instead of REML)
## Data: data
## Models:
## m33: R ~ C * D + B + (1 | X)
## m53: R ~ B * C + C * D + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m33 7 187.83 211.33 -86.916 173.83
## m53 8 187.44 214.30 -85.722 171.44 2.3881 1 0.1223
egg_glmer2<-glmer(flew_b~mass_c *eggs_b + wing_c + (1|ID), family=binomial, data=data_fem)
tidy_regression(egg_glmer2, is_color=output_col) # model fits better
## glmer flew_b ~ mass_c * eggs_b + wing_c + (1 | ID) data_fem binomial
## AIC: 168.0517 168.0517
## (Intercept) coeff: -1.3549037 Pr(>|t|): 0.6132529
## mass_c coeff: -434.4925354 Pr(>|t|): 0.000529944 *
## eggs_b coeff: -9.2584415 Pr(>|t|): 0.0002528496 *
## wing_c coeff: 2.5051483 Pr(>|t|): 0.2992479
## mass_c:eggs_b coeff: 372.4847126 Pr(>|t|): 0.002918668 *
coefficients are extremely out of scale. * negative effect of mass * negative effect of eggs positive effect of wing positivie effect mass*eggs? doesn’t really make sense. –> collinearity
R = data_fem$flew_b
A = data_fem$thorax_c
B = data_fem$body_c
C = data_fem$wing_c
data<-data.frame(R, A, B, C)
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 3-FF.R")
## [,1] [,2] [,3] [,4] [,5] [,6]
## AICs 264.8145 264.9062 265.8203 266.1749 266.5372 266.6123
## models 16 15 4 5 3 17
## probs 0.1615492 0.1543053 0.09769975 0.08182416 0.06826939 0.06575111
## [,7] [,8]
## AICs 267.0072 267.0447
## models 2 10
## probs 0.05397086 0.05296782
##
## m16 glm(formula = R ~ A * C + B * C, family = binomial, data = data)
## m15 glm(formula = R ~ A * B + B * C, family = binomial, data = data)
## m4 glm(formula = R ~ A + B, family = binomial, data = data)
## m5 glm(formula = R ~ A + C, family = binomial, data = data)
## m3 glm(formula = R ~ C, family = binomial, data = data)
## m17 glm(formula = R ~ A * B + A * C + B * C, family = binomial, data = data)
## m2 glm(formula = R ~ B, family = binomial, data = data)
## m10 glm(formula = R ~ B * C, family = binomial, data = data)
anova(m13, m16, test="Chisq") # Adding A*C improves fit
## Analysis of Deviance Table
##
## Model 1: R ~ B * C + A
## Model 2: R ~ A * C + B * C
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 207 257.16
## 2 206 252.81 1 4.3481 0.03705 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
morph_fem2 <- glm(flew_b ~ thorax_c * wing_c + body_c * wing_c, data=data_fem)
tidy_regression(morph_fem2, is_color=output_col)
## glm flew_b ~ thorax_c * wing_c + body_c * wing_c data_fem
## AIC: 286.161
## (Intercept) coeff: 0.3459579 Pr(>|t|): 7.445235e-15 *
## thorax_c coeff: -0.3650122 Pr(>|t|): 0.2063331
## wing_c coeff: 0.0121891 Pr(>|t|): 0.9523256
## body_c coeff: 0.1402803 Pr(>|t|): 0.4768686
## thorax_c:wing_c coeff: 0.1125674 Pr(>|t|): 0.6577591
## wing_c:body_c coeff: -0.0443685 Pr(>|t|): 0.4358603
morph_fem_glmer <- glmer(flew_b ~ thorax_c * wing_c + body_c * wing_c + (1|ID), data=data_fem, family=binomial)
tidy_regression(morph_fem_glmer, is_color=output_col) #even better fitted model; effects no longer significant and coefficients changed.
## glmer flew_b ~ thorax_c * wing_c + body_c * wing_c + (1 | ID) data_fem binomial
## AIC: 237.6908 237.6908
## (Intercept) coeff: -6.3578121 Pr(>|t|): 5.347574e-05 *
## thorax_c coeff: -7.0364568 Pr(>|t|): 0.374903
## wing_c coeff: 0.1961325 Pr(>|t|): 0.9568734
## body_c coeff: 2.4508614 Pr(>|t|): 0.5362512
## thorax_c:wing_c coeff: 8.012155 Pr(>|t|): 0.4452285
## wing_c:body_c coeff: -2.6935872 Pr(>|t|): 0.4002898
Trying wing2body
R = data_fem$flew_b
A = data_fem$thorax_c
B = data_fem$wing2body_c
data<-data.frame(R, A, B)
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 2-FF.R")
## [,1] [,2] [,3] [,4] [,5]
## AICs 267.9101 268.2296 269.6979 270.8674 270.9484
## models 2 4 3 1 5
## probs 0.3692413 0.314723 0.1510409 0.08416619 0.08082861
##
## m2 glm(formula = R ~ B, family = binomial, data = data)
## m4 glm(formula = R ~ A * B, family = binomial, data = data)
## m3 glm(formula = R ~ A + B, family = binomial, data = data)
## m1 glm(formula = R ~ A, family = binomial, data = data)
## m0 glm(formula = R ~ 1, family = binomial, data = data)
anova(m0, m2, test="Chisq") # Adding B improves fit
## Analysis of Deviance Table
##
## Model 1: R ~ 1
## Model 2: R ~ B
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 211 268.95
## 2 210 263.91 1 5.0382 0.02479 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m2, m3, test="Chisq") # Adding A does not improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ B
## Model 2: R ~ A + B
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 210 263.91
## 2 209 263.70 1 0.2122 0.645
morph_fem3 <- glm(flew_b ~ wing2body_c, data=data_fem, family=binomial) # variance of population and days_from_start_c = 0
tidy_regression(morph_fem3, is_color=output_col)
## glm flew_b ~ wing2body_c binomial data_fem
## AIC: 267.9101
## (Intercept) coeff: -0.7286364 Pr(>|t|): 1.010644e-06 *
## wing2body_c coeff: 19.4519307 Pr(>|t|): 0.03183424 *
Worth running a glmer() script.
R = data_fem$flew_b
A = data_fem$thorax_c
B = data_fem$body_c
C = data_fem$wing_c
X = data_fem$trial_type # you get a different set of top models if you use trial_type or ID, you get the null model if ID and the usual model if trial_type
data<-data.frame(R, A, B, C, X)
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glmer 1 RF + 3-FF.R")
## [,1] [,2] [,3] [,4] [,5] [,6]
## AICs 266.442 266.563 267.499 267.9407 268.2505 268.277
## models 16 15 4 5 17 3
## probs 0.1623081 0.1527796 0.09568086 0.07671927 0.06570907 0.06484419
## [,7] [,8]
## AICs 268.7106 268.7662
## models 2 10
## probs 0.05220543 0.05077523
##
## m16 R ~ A * C + B * C + (1 | X)
## m15 R ~ A * B + B * C + (1 | X)
## m4 R ~ A + B + (1 | X)
## m5 R ~ A + C + (1 | X)
## m17 R ~ A * B + A * C + B * C + (1 | X)
## m3 R ~ C + (1 | X)
## m2 R ~ B + (1 | X)
## m10 R ~ B * C + (1 | X)
anova(m0, m3, test="Chisq") # Adding C does not improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m3: R ~ C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 2 272.72 279.43 -134.36 268.72
## m3 3 268.28 278.35 -131.14 262.28 6.4406 1 0.01115 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m0, m2, test="Chisq") # Adding B does not improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m2: R ~ B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 2 272.72 279.43 -134.36 268.72
## m2 3 268.71 278.78 -131.35 262.71 6.007 1 0.01425 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m0, m1, test="Chisq") # Adding A does not improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m1: R ~ A + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 2 272.72 279.43 -134.36 268.72
## m1 3 272.61 282.68 -133.30 266.61 2.1097 1 0.1464
# when use ID and trial type as RFs get the null model as the top model
morph_fem_glmer3 <- glmer(flew_b ~ 1 + (1|ID), data=data_fem, family=binomial)
tidy_regression(morph_fem_glmer3 , is_color=output_col)
## glmer flew_b ~ 1 + (1 | ID) data_fem binomial
## AIC: 230.106 230.106
## 1 coeff: -7.3268741 Pr(>|t|): 2.291958e-11 *
# when use only trial type as RF:
morph_fem_glmer4 <- glmer(flew_b ~ thorax_c * wing_c + body_c * wing_c + (1|trial_type), data=data_fem, family=binomial)
tidy_regression(morph_fem_glmer4 , is_color=output_col)
## glmer flew_b ~ thorax_c * wing_c + body_c * wing_c + (1 | trial_type) data_fem binomial
## AIC: 266.442 266.442
## (Intercept) coeff: -0.5761341 Pr(>|t|): 0.01623514 *
## thorax_c coeff: -3.7064606 Pr(>|t|): 0.03308087 *
## wing_c coeff: 0.11423 Pr(>|t|): 0.9051479
## body_c coeff: 1.2869096 Pr(>|t|): 0.1925277
## thorax_c:wing_c coeff: 4.3295685 Pr(>|t|): 0.05106715 .
## wing_c:body_c coeff: -1.471794 Pr(>|t|): 0.03012126 *
Trying wing2body
R = data_fem$flew_b
A = data_fem$thorax_c
B = data_fem$wing2body_c
X = data_fem$trial_type # you get a different set of top models if you use trial_type or ID
data<-data.frame(R, A, B, X)
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glmer 1-RF + 2-FF.R")
## [,1] [,2] [,3] [,4] [,5]
## AICs 269.7551 270.0063 271.5272 272.608 272.7176
## models 2 4 3 1 5
## probs 0.3620922 0.319347 0.1492806 0.08696059 0.0823197
##
## m2 R ~ B + (1 | X)
## m4 R ~ A * B + (1 | X)
## m3 R ~ A + B + (1 | X)
## m1 R ~ A + (1 | X)
## m0 R ~ 1 + (1 | X)
anova(m0, m2, test="Chisq") # Adding A marginally imprvoes fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m2: R ~ B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 2 272.72 279.43 -134.36 268.72
## m2 3 269.75 279.82 -131.88 263.75 4.9626 1 0.0259 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m0, m1, test="Chisq")
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m1: R ~ A + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 2 272.72 279.43 -134.36 268.72
## m1 3 272.61 282.68 -133.30 266.61 2.1097 1 0.1464
# best fit model if only use trial type as RF:
morph_fem_glmer5 <- glmer(flew_b ~ wing2body_c + (1|trial_type), data=data_fem, family=binomial)
tidy_regression(morph_fem_glmer5 , is_color=output_col)
## glmer flew_b ~ wing2body_c + (1 | trial_type) data_fem binomial
## AIC: 269.7551 269.7551
## (Intercept) coeff: -0.7387311 Pr(>|t|): 4.812048e-05 *
## wing2body_c coeff: 19.3571342 Pr(>|t|): 0.0329741 *
# best fit model is the null if use both trial type and ID as RFs:
morph_fem_glmer6 <- glmer(flew_b ~ 1 + (1|ID), data=data_fem, family=binomial)
tidy_regression(morph_fem_glmer6, is_color=output_col)
## glmer flew_b ~ 1 + (1 | ID) data_fem binomial
## AIC: 230.106 230.106
## 1 coeff: -7.3268741 Pr(>|t|): 2.291958e-11 *
nomass_fem2 <-glm(flew_b~wing_c, family=binomial, data=data_fem)
tidy_regression(nomass_fem2, is_color=output_col)
## glm flew_b ~ wing_c binomial data_fem
## AIC: 266.5372
## (Intercept) coeff: -0.7338294 Pr(>|t|): 9.640784e-07 *
## wing_c coeff: 0.4423614 Pr(>|t|): 0.01462542 *
tidy_regression(mass_fem2, is_color=output_col)
## glm flew_b ~ wing_c + mass_c binomial data_fem
## AIC: 242.0988
## (Intercept) coeff: -0.8614654 Pr(>|t|): 2.942928e-07 *
## wing_c coeff: 0.8692786 Pr(>|t|): 7.878961e-05 *
## mass_c coeff: -38.2356966 Pr(>|t|): 3.658768e-06 *
tidy_regression(multi_glmer_fem, is_color=output_col)
## glmer flew_b ~ host_c * mass_c + host_c * wing_c + (1 | ID) data_fem binomial
## AIC: 202.6123 202.6123
## (Intercept) coeff: -14.3528925 Pr(>|t|): 0.0001404174 *
## host_c coeff: -3.5218926 Pr(>|t|): 0.1015445
## mass_c coeff: -357.1011952 Pr(>|t|): 0.0004263504 *
## wing_c coeff: 8.6042524 Pr(>|t|): 0.002471981 *
## host_c:mass_c coeff: -189.3281934 Pr(>|t|): 0.01570222 *
## host_c:wing_c coeff: 6.7278037 Pr(>|t|): 0.01681285 *
tidy_regression(multi_glmer_fem2, is_color=output_col)
## glmer flew_b ~ mass_c + wing_c + (1 | trial_type) data_fem binomial
## AIC: 244.0988 244.0988
## (Intercept) coeff: -0.8614654 Pr(>|t|): 2.941513e-07 *
## mass_c coeff: -38.2356966 Pr(>|t|): 3.640896e-06 *
## wing_c coeff: 0.8692786 Pr(>|t|): 7.874479e-05 *
tidy_regression(egg_model2, is_color=output_col)
## glm flew_b ~ wing_c + mass_c + eggs_b binomial data_fem
## AIC: 214.6841
## (Intercept) coeff: 0.360869 Pr(>|t|): 0.2011714
## wing_c coeff: 0.6411567 Pr(>|t|): 0.007192596 *
## mass_c coeff: -18.0024358 Pr(>|t|): 0.05131856 .
## eggs_b coeff: -1.9676327 Pr(>|t|): 1.88186e-07 *
tidy_regression(egg_glmer, is_color=output_col)
## glmer flew_b ~ wing_c + mass_c + eggs_b + (1 | ID) data_fem binomial
## AIC: 172.286 172.286
## (Intercept) coeff: 5.8613659 Pr(>|t|): 0.01023977 *
## wing_c coeff: 1.7946948 Pr(>|t|): 0.2609419
## mass_c coeff: -83.2034793 Pr(>|t|): 0.1559541
## eggs_b coeff: -15.9949936 Pr(>|t|): 7.729145e-08 *
tidy_regression(egg_glmer2, is_color=output_col)
## glmer flew_b ~ mass_c * eggs_b + wing_c + (1 | ID) data_fem binomial
## AIC: 168.0517 168.0517
## (Intercept) coeff: -1.3549037 Pr(>|t|): 0.6132529
## mass_c coeff: -434.4925354 Pr(>|t|): 0.000529944 *
## eggs_b coeff: -9.2584415 Pr(>|t|): 0.0002528496 *
## wing_c coeff: 2.5051483 Pr(>|t|): 0.2992479
## mass_c:eggs_b coeff: 372.4847126 Pr(>|t|): 0.002918668 *
All the morph models below had an issue between using ID vs. trial type as the RF. When I used trial_type I got reasonable results. When I used ID I always got the null model. Why is this?
tidy_regression(morph_fem2, is_color=output_col)
## glm flew_b ~ thorax_c * wing_c + body_c * wing_c data_fem
## AIC: 286.161
## (Intercept) coeff: 0.3459579 Pr(>|t|): 7.445235e-15 *
## thorax_c coeff: -0.3650122 Pr(>|t|): 0.2063331
## wing_c coeff: 0.0121891 Pr(>|t|): 0.9523256
## body_c coeff: 0.1402803 Pr(>|t|): 0.4768686
## thorax_c:wing_c coeff: 0.1125674 Pr(>|t|): 0.6577591
## wing_c:body_c coeff: -0.0443685 Pr(>|t|): 0.4358603
tidy_regression(morph_fem_glmer, is_color=output_col)
## glmer flew_b ~ thorax_c * wing_c + body_c * wing_c + (1 | ID) data_fem binomial
## AIC: 237.6908 237.6908
## (Intercept) coeff: -6.3578121 Pr(>|t|): 5.347574e-05 *
## thorax_c coeff: -7.0364568 Pr(>|t|): 0.374903
## wing_c coeff: 0.1961325 Pr(>|t|): 0.9568734
## body_c coeff: 2.4508614 Pr(>|t|): 0.5362512
## thorax_c:wing_c coeff: 8.012155 Pr(>|t|): 0.4452285
## wing_c:body_c coeff: -2.6935872 Pr(>|t|): 0.4002898
tidy_regression(morph_fem3, is_color=output_col)
## glm flew_b ~ wing2body_c binomial data_fem
## AIC: 267.9101
## (Intercept) coeff: -0.7286364 Pr(>|t|): 1.010644e-06 *
## wing2body_c coeff: 19.4519307 Pr(>|t|): 0.03183424 *
tidy_regression(morph_fem_glmer3 , is_color=output_col)
## glmer flew_b ~ 1 + (1 | ID) data_fem binomial
## AIC: 230.106 230.106
## 1 coeff: -7.3268741 Pr(>|t|): 2.291958e-11 *
tidy_regression(morph_fem_glmer4 , is_color=output_col)
## glmer flew_b ~ thorax_c * wing_c + body_c * wing_c + (1 | trial_type) data_fem binomial
## AIC: 266.442 266.442
## (Intercept) coeff: -0.5761341 Pr(>|t|): 0.01623514 *
## thorax_c coeff: -3.7064606 Pr(>|t|): 0.03308087 *
## wing_c coeff: 0.11423 Pr(>|t|): 0.9051479
## body_c coeff: 1.2869096 Pr(>|t|): 0.1925277
## thorax_c:wing_c coeff: 4.3295685 Pr(>|t|): 0.05106715 .
## wing_c:body_c coeff: -1.471794 Pr(>|t|): 0.03012126 *
tidy_regression(morph_fem_glmer5 , is_color=output_col)
## glmer flew_b ~ wing2body_c + (1 | trial_type) data_fem binomial
## AIC: 269.7551 269.7551
## (Intercept) coeff: -0.7387311 Pr(>|t|): 4.812048e-05 *
## wing2body_c coeff: 19.3571342 Pr(>|t|): 0.0329741 *
tidy_regression(morph_fem_glmer6, is_color=output_col)
## glmer flew_b ~ 1 + (1 | ID) data_fem binomial
## AIC: 230.106 230.106
## 1 coeff: -7.3268741 Pr(>|t|): 2.291958e-11 *
Cleaning the Data
rm(list=ls())
output_col = FALSE
source("src/clean_flight_data.R")
source("src/regression_output.R")
source("src/center_flight_data.R")
source("get_warnings.R")
data <- read_flight_data("data/all_flight_data-Winter2020.csv")
data_all <- data[[1]]
data_tested <- data[[2]]
d <- data_tested %>%
group_by(ID, sex,population, site, host_plant, latitude, longitude, total_eggs,
beak, thorax, wing, body, w_morph, morph_notes, tested,
host_c, sex_c, w_morph_c, lat_c, sym_dist, sym_dist_s, total_eggs_c,
beak_c, thorax_c, thorax_s, body_c, wing_c, wing2body, wing2body_c, wing2body_s) %>%
summarise_all(funs(list(na.omit(.))))
d$num_flew <- 0
d$num_notflew <- 0
d$average_mass <- 0
for(row in 1:length(d$flew_b)){
n_flew <- sum(d$flew_b[[row]] == 1) # total number of times did fly among trails
d$num_flew[[row]] <- n_flew
n_notflew <- sum(d$flew_b[[row]] == 0) # total number of times did not fly among trails
d$num_notflew[[row]] <- n_notflew
avg_mass <- mean(d$mass[[row]])
d$average_mass[[row]] <- avg_mass
}
d <- select(d, -filename, -channel_letter, -set_number)
data_male <- d[d$sex=="M",]
data_male <- center_data(data_male, is_not_binded = FALSE)
data_male
## # A tibble: 214 x 63
## # Groups: ID, sex, population, site, host_plant, latitude, longitude,
## # total_eggs, beak, thorax, wing, body, w_morph, morph_notes, tested, host_c,
## # sex_c, w_morph_c, lat_c, sym_dist, sym_dist_s, total_eggs_c, beak_c,
## # thorax_c, thorax_s, body_c, wing_c, wing2body, wing2body_c [214]
## ID sex population site host_plant latitude longitude total_eggs beak
## <fct> <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 1 M Plantatio… Areg… C. corind… 25.0 -80.6 NA 5.5
## 2 2 M Plantatio… Areg… C. corind… 25.0 -80.6 NA 5.64
## 3 3 M Plantatio… Areg… C. corind… 25.0 -80.6 NA 5.75
## 4 4 M Plantatio… Areg… C. corind… 25.0 -80.6 NA 6.21
## 5 6 M Plantatio… Foun… C. corind… 25.0 -80.6 NA 5.76
## 6 9 M Plantatio… Foun… C. corind… 25.0 -80.6 NA 6.1
## 7 10 M Plantatio… Foun… C. corind… 25.0 -80.6 NA 6.09
## 8 11 M Key Largo KLMRL C. corind… 25.1 -80.4 NA 5.88
## 9 14 M Key Largo KLMRL C. corind… 25.1 -80.4 NA 5.25
## 10 15 M Key Largo JP C. corind… 25.1 -80.4 NA 6.53
## # … with 204 more rows, and 54 more variables: thorax <dbl>, wing <dbl>,
## # body <dbl>, w_morph <fct>, morph_notes <fct>, tested <fct>, host_c <dbl>,
## # sex_c <dbl>, w_morph_c <dbl>, lat_c <dbl>, sym_dist <dbl>,
## # sym_dist_s <dbl>, total_eggs_c <dbl>, beak_c <dbl>, thorax_c <dbl>,
## # thorax_s <dbl>, body_c <dbl>, wing_c <dbl>, wing2body <dbl>,
## # wing2body_c <dbl>, wing2body_s <dbl>, trial_type <list>, chamber <list>,
## # average_speed <list>, total_flight_time <list>, distance <list>,
## # shortest_flying_bout <list>, longest_flying_bout <list>,
## # total_duration <list>, max_speed <list>, test_date <list>,
## # time_start <list>, time_end <list>, flew <list>, flight_type <list>,
## # mass <list>, EWM <list>, flew_b <list>, eggs_b <list>,
## # minute_duration <list>, minute_duration_c <list>, min_from_IncStart <dbl>,
## # min_from_IncStart_c <dbl>, min_from_IncStart_s <dbl>,
## # days_from_start <list>, days_from_start_c <list>, mass_c <list>,
## # mass_s <list>, average_mass <dbl>, average_mass_c <dbl>,
## # average_mass_s <dbl>, trial_type_og <list>, num_flew <dbl>,
## # num_notflew <dbl>
Experimental, Biological, and Morphological Effects
## glm cbind(num_flew, num_notflew) ~ average_mass binomial data_male
## AIC: 440.8166
## (Intercept) coeff: 0.6948251 Pr(>|t|): 0.2717353
## average_mass coeff: 1.7230351 Pr(>|t|): 0.9146541
## glm cbind(num_flew, num_notflew) ~ beak_c binomial data_male
## AIC: 438.5285
## (Intercept) coeff: 0.7624726 Pr(>|t|): 1.555005e-12 *
## beak_c coeff: 0.4003301 Pr(>|t|): 0.1339901
## glm cbind(num_flew, num_notflew) ~ thorax_c binomial data_male
## AIC: 440.6291
## (Intercept) coeff: 0.7618644 Pr(>|t|): 1.376158e-12 *
## thorax_c coeff: 0.237372 Pr(>|t|): 0.6555153
## glm cbind(num_flew, num_notflew) ~ body_c binomial data_male
## AIC: 436.6678
## (Intercept) coeff: 0.7653895 Pr(>|t|): 1.502199e-12 *
## body_c coeff: 0.3148292 Pr(>|t|): 0.04156653 *
## glm cbind(num_flew, num_notflew) ~ wing_c binomial data_male
## AIC: 434.1313
## (Intercept) coeff: 0.7693616 Pr(>|t|): 1.439954e-12 *
## wing_c coeff: 0.4672408 Pr(>|t|): 0.009983293 *
R1 = data_male$num_flew
R2 = data_male$num_notflew
A = data_male$host_c
B = data_male$wing_c
C = data_male$average_mass
X = data_male$population # can't do trial type anymore
data<-data.frame(R1, R2, A, B, C, X)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1 RF + 2-FF.R"))
## [,1] [,2] [,3]
## AICs 429.694 431.3445 432.2158
## models 2 3 4
## probs 0.545637 0.2390621 0.1546389
##
## m2 cbind(R1, R2) ~ B + (1 | X)
## m3 cbind(R1, R2) ~ A + B + (1 | X)
## m4 cbind(R1, R2) ~ A * B + (1 | X)
length(errors$warnings)
## [1] 0
anova(m2, m3, test="Chisq") # Adding A does not improve fit
## Data: data
## Models:
## m2: cbind(R1, R2) ~ B + (1 | X)
## m3: cbind(R1, R2) ~ A + B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m2 3 429.69 439.79 -211.85 423.69
## m3 4 431.34 444.81 -211.67 423.34 0.3495 1 0.5544
nomass_male <- glmer(cbind(num_flew, num_notflew) ~ wing_c + (1|population), data=data_male, family=binomial)
tidy_regression(nomass_male, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ wing_c + (1 | population) data_male binomial
## AIC: 429.694 429.694
## (Intercept) coeff: 0.6501987 Pr(>|t|): 0.001137608 *
## wing_c coeff: 0.5219421 Pr(>|t|): 0.007476693 *
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1 RF + 3-FF.R"))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## AICs 425.2395 425.3462 426.4034 426.4553 426.5348 426.8988 427.3874
## models 10 6 7 13 17 14 16
## probs 0.1864691 0.1767786 0.1042012 0.1015297 0.09757337 0.0813386 0.06370849
## [,8]
## AICs 427.6089
## models 11
## probs 0.0570286
##
## m10 cbind(R1, R2) ~ B * C + (1 | X)
## m6 cbind(R1, R2) ~ B + C + (1 | X)
## m7 cbind(R1, R2) ~ A + B + C + (1 | X)
## m13 cbind(R1, R2) ~ B * C + A + (1 | X)
## m17 cbind(R1, R2) ~ A * B + A * C + B * C + (1 | X)
## m14 cbind(R1, R2) ~ A * B + A * C + (1 | X)
## m16 cbind(R1, R2) ~ A * C + B * C + (1 | X)
## m11 cbind(R1, R2) ~ A * B + C + (1 | X)
length(errors$warnings)
## [1] 0
anova(m6, m10, test="Chisq") # Adding B*C does not improve fit
## Data: data
## Models:
## m6: cbind(R1, R2) ~ B + C + (1 | X)
## m10: cbind(R1, R2) ~ B * C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m6 4 425.35 438.81 -208.67 417.35
## m10 5 425.24 442.07 -207.62 415.24 2.1067 1 0.1467
anova(m6, m7, test="Chisq") # Adding A does not improve fot
## Data: data
## Models:
## m6: cbind(R1, R2) ~ B + C + (1 | X)
## m7: cbind(R1, R2) ~ A + B + C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m6 4 425.35 438.81 -208.67 417.35
## m7 5 426.40 443.23 -208.20 416.40 0.9428 1 0.3315
mass_male <- glmer(cbind(num_flew, num_notflew) ~ wing_c + average_mass + (1|population), data=data_male, family=binomial)
tidy_regression(mass_male, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ wing_c + average_mass + (1 | population) data_male binomial
## AIC: 425.3462 425.3462
## (Intercept) coeff: 2.8185733 Pr(>|t|): 0.001597233 *
## wing_c coeff: 0.9611493 Pr(>|t|): 0.00032735 *
## average_mass coeff: -56.8399391 Pr(>|t|): 0.01224168 *
# check for collinearity between mass and wing length
data<-data.frame(data_male$host_c, data_male$wing_c, data_male$average_mass)
colnames(data) <- c("Host Plant", "Wing Length", "Average Mass")
pairs(data)
d <- data_male %>%
filter(!is.na(body))
d$thorax_c <- d$thorax - mean(d$thorax)
d$wing_c <- d$wing - mean(d$wing)
d$body_c <- d$body - mean(d$body)
R1 = d$num_flew
R2 = d$num_notflew
A = d$thorax_c
B = d$body_c
C = d$wing_c
X = d$population
data<-data.frame(R1, R2, A, B, C, X)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1 RF + 3-FF.R"))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## AICs 417.7065 418.3341 419.3638 419.4253 419.6429 420.7335 420.7853
## models 13 10 9 16 15 12 11
## probs 0.2603437 0.1902239 0.1136751 0.1102345 0.0988691 0.05730978 0.05584524
##
## m13 cbind(R1, R2) ~ B * C + A + (1 | X)
## m10 cbind(R1, R2) ~ B * C + (1 | X)
## m9 cbind(R1, R2) ~ A * C + (1 | X)
## m16 cbind(R1, R2) ~ A * C + B * C + (1 | X)
## m15 cbind(R1, R2) ~ A * B + B * C + (1 | X)
## m12 cbind(R1, R2) ~ A * C + B + (1 | X)
## m11 cbind(R1, R2) ~ A * B + C + (1 | X)
length(errors$warnings)
## [1] 0
anova(m13, m16, test="Chisq") # Adding B*C does not improve fit
## Data: data
## Models:
## m13: cbind(R1, R2) ~ B * C + A + (1 | X)
## m16: cbind(R1, R2) ~ A * C + B * C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m13 6 417.71 437.90 -202.85 405.71
## m16 7 419.43 442.99 -202.71 405.43 0.2812 1 0.5959
anova(m10, m13, test="Chisq") # Adding A does not improve fit
## Data: data
## Models:
## m10: cbind(R1, R2) ~ B * C + (1 | X)
## m13: cbind(R1, R2) ~ B * C + A + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m10 5 418.33 435.16 -204.17 408.33
## m13 6 417.71 437.90 -202.85 405.71 2.6276 1 0.105
morph_male <- glmer(cbind(num_flew, num_notflew) ~ body_c * wing_c + thorax_c + (1 | population), data=d, family=binomial)
tidy_regression(morph_male, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ body_c * wing_c + thorax_c + (1 | population) d binomial
## AIC: 417.7065 417.7065
## (Intercept) coeff: 0.9005518 Pr(>|t|): 8.927614e-05 *
## body_c coeff: -0.5749452 Pr(>|t|): 0.4359261
## wing_c coeff: 1.4278926 Pr(>|t|): 0.0544712 .
## thorax_c coeff: -1.8911886 Pr(>|t|): 0.106149
## body_c:wing_c coeff: -0.6554036 Pr(>|t|): 0.004827235 *
R1 = d$num_flew
R2 = d$num_notflew
A = d$thorax_c
B = d$wing2body_c
X = d$population
data<-data.frame(R1, R2, A, B, X)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 2R ~ 1 RF + 2-FF.R"))
## [,1] [,2] [,3]
## AICs 424.3553 425.5079 426.0182
## models 2 4 3
## probs 0.4988961 0.2803679 0.2172206
##
## m2 cbind(R1, R2) ~ B + (1 | X)
## m4 cbind(R1, R2) ~ A * B + (1 | X)
## m3 cbind(R1, R2) ~ A + B + (1 | X)
length(errors$warnings)
## [1] 0
anova(m2, m3, test="Chisq") # Adding B does not improve fit
## Data: data
## Models:
## m2: cbind(R1, R2) ~ B + (1 | X)
## m3: cbind(R1, R2) ~ A + B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m2 3 424.36 434.45 -209.18 418.36
## m3 4 426.02 439.48 -209.01 418.02 0.337 1 0.5615
morph_male2 <- glmer(cbind(num_flew, num_notflew) ~ wing2body_c + (1 | population), data=d, family=binomial)
tidy_regression(morph_male2, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ wing2body_c + (1 | population) d binomial
## AIC: 424.3553 424.3553
## (Intercept) coeff: 0.6428367 Pr(>|t|): 0.004005262 *
## wing2body_c coeff: 24.3101404 Pr(>|t|): 0.00056135 *
# check for collinearity between mass and morhpology
data<-data.frame(data_male$thorax_c, data_male$wing_c, data_male$body_c, data_male$average_mass)
colnames(data) <- c("Thorax Length", "Wing Length", "Body Length", "Average Mass")
pairs(data)
data_male <- data_tested[data_tested$sex=="M",]
data_male <- center_data(data_male)
data<-data.frame(data_male$thorax_c, data_male$wing_c, data_male$body_c, data_male$mass_c, data_male$days_from_start_c)
colnames(data) <- c("Thorax Length", "Wing Length", "Body Length", "Average Mass", "Days from Start")
pairs(data)
m <- lm(mass ~ days_from_start, data=data_male)
summary(m)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0365303349 6.852946e-04 53.306032 4.856096e-183
## days_from_start 0.0001970102 5.128275e-05 3.841646 1.422310e-04
plot(mass ~ days_from_start, data=data_male)
abline(m, col="red")
#plot(body ~ days_from_start, data=data_male)
text(mass ~ days_from_start, labels=data_male$ID, data=data_male, cex=0.5, font=2, pos=4) # maybe the smaller males died faster? that's why it looks like.
tidy_regression(nomass_male, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ wing_c + (1 | population) data_male binomial
## AIC: 429.694 429.694
## (Intercept) coeff: 0.6501987 Pr(>|t|): 0.001137608 *
## wing_c coeff: 0.5219421 Pr(>|t|): 0.007476693 *
tidy_regression(mass_male, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ wing_c + average_mass + (1 | population) data_male binomial
## AIC: 425.3462 425.3462
## (Intercept) coeff: 2.8185733 Pr(>|t|): 0.001597233 *
## wing_c coeff: 0.9611493 Pr(>|t|): 0.00032735 *
## average_mass coeff: -56.8399391 Pr(>|t|): 0.01224168 *
tidy_regression(morph_male, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ body_c * wing_c + thorax_c + (1 | population) d binomial
## AIC: 417.7065 417.7065
## (Intercept) coeff: 0.9005518 Pr(>|t|): 8.927614e-05 *
## body_c coeff: -0.5749452 Pr(>|t|): 0.4359261
## wing_c coeff: 1.4278926 Pr(>|t|): 0.0544712 .
## thorax_c coeff: -1.8911886 Pr(>|t|): 0.106149
## body_c:wing_c coeff: -0.6554036 Pr(>|t|): 0.004827235 *
tidy_regression(morph_male2, is_color=output_col)
## glmer cbind(num_flew, num_notflew) ~ wing2body_c + (1 | population) d binomial
## AIC: 424.3553 424.3553
## (Intercept) coeff: 0.6428367 Pr(>|t|): 0.004005262 *
## wing2body_c coeff: 24.3101404 Pr(>|t|): 0.00056135 *
Without Mass Modeling + Days From Start
With Mass Modeling + Days From Start
glmer() A=thorax, B=body, C=wing, D=days_from_start, X=ID, Y=trial_type
glmer() A=wing2body, B=thorax, C=days_from_start, X=ID, Y=trial_type
Cleaning Data
rm(list=ls())
output_col = FALSE
source("src/clean_flight_data.R")
source("src/regression_output.R")
source("src/center_flight_data.R")
source("get_warnings.R")
data <- read_flight_data("data/all_flight_data-Winter2020.csv")
data_all <- data[[1]]
data_tested <- data[[2]]
data_male <- data_tested[data_tested$sex=="M",]
data_male <- center_data(data_male)
Experimental Set-Up Effects
## glm flew_b ~ chamber binomial data_male
## AIC: 509.4223
## (Intercept) coeff: 0.6190392 Pr(>|t|): 0.06184483 .
## chamberA-2 coeff: 0.238411 Pr(>|t|): 0.604278
## chamberA-3 coeff: 0.36179 Pr(>|t|): 0.4203252
## chamberA-4 coeff: -0.1010961 Pr(>|t|): 0.8045408
## chamberB-1 coeff: 0.6131045 Pr(>|t|): 0.2585014
## chamberB-2 coeff: 0.4795731 Pr(>|t|): 0.2820852
## chamberB-3 coeff: -0.0510552 Pr(>|t|): 0.9095683
## chamberB-4 coeff: -0.1564157 Pr(>|t|): 0.7302194
## glm flew_b ~ days_from_start_c binomial data_male
## AIC: 495.9476
## (Intercept) coeff: 0.7773817 Pr(>|t|): 1.038357e-12 *
## days_from_start_c coeff: -0.0421748 Pr(>|t|): 0.007751326 *
## glm flew_b ~ min_from_IncStart binomial data_male
## AIC: 500.0813
## (Intercept) coeff: 0.5248177 Pr(>|t|): 0.002036133 *
## min_from_IncStart coeff: 0.0014157 Pr(>|t|): 0.08099961 .
Biological Effects
## glm flew_b ~ mass_c binomial data_male
## AIC: 502.0346
## (Intercept) coeff: 0.7640292 Pr(>|t|): 1.316748e-12 *
## mass_c coeff: -16.026149 Pr(>|t|): 0.2777163
Morphology Effects
## glm flew_b ~ beak_c binomial data_male
## AIC: 500.9118
## (Intercept) coeff: 0.7667136 Pr(>|t|): 1.252154e-12 *
## beak_c coeff: 0.4003301 Pr(>|t|): 0.1339899
## glm flew_b ~ thorax_c binomial data_male
## AIC: 503.0124
## (Intercept) coeff: 0.7620312 Pr(>|t|): 1.36459e-12 *
## thorax_c coeff: 0.237372 Pr(>|t|): 0.6555153
## glm flew_b ~ body_c binomial data_male
## AIC: 499.051
## (Intercept) coeff: 0.7698475 Pr(>|t|): 1.190066e-12 *
## body_c coeff: 0.3148292 Pr(>|t|): 0.04156646 *
## glm flew_b ~ wing_c binomial data_male
## AIC: 496.5145
## (Intercept) coeff: 0.7745228 Pr(>|t|): 1.100469e-12 *
## wing_c coeff: 0.4672408 Pr(>|t|): 0.009983264 *
# Remove any missing masses
data_male <- data_male %>%
filter(!is.na(mass), !is.na(wing))
data_male <- center_data(data_male)
R = data_male$flew_b
A = data_male$host_c
B = data_male$days_from_start_c
C = data_male$wing_c
D = data_male$mass_c
data<-data.frame(R, A, B, C, D)
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 3-FF.R")
## [,1] [,2] [,3] [,4] [,5] [,6]
## AICs 488.1151 488.5811 489.4843 489.5464 489.7248 489.726
## models 12 7 16 14 11 13
## probs 0.2034347 0.1611523 0.1025948 0.09945603 0.09096803 0.09091233
## [,7]
## AICs 490.2494
## models 6
## probs 0.06998184
##
## m12 glm(formula = R ~ A * C + B, family = binomial, data = data)
## m7 glm(formula = R ~ A + B + C, family = binomial, data = data)
## m16 glm(formula = R ~ A * C + B * C, family = binomial, data = data)
## m14 glm(formula = R ~ A * B + A * C, family = binomial, data = data)
## m11 glm(formula = R ~ A * B + C, family = binomial, data = data)
## m13 glm(formula = R ~ B * C + A, family = binomial, data = data)
## m6 glm(formula = R ~ B + C, family = binomial, data = data)
anova(m7, m12, test="Chisq") # Adding A*C does not improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ A + B + C
## Model 2: R ~ A * C + B
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 395 480.58
## 2 394 478.12 1 2.466 0.1163
anova(m6, m7, test="Chisq") # Adding A improves fit
## Analysis of Deviance Table
##
## Model 1: R ~ B + C
## Model 2: R ~ A + B + C
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 396 484.25
## 2 395 480.58 1 3.6682 0.05546 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## glm flew_b ~ host_c + days_from_start_c + wing_c binomial data_male
## AIC: 488.5811
## (Intercept) coeff: 0.6820696 Pr(>|t|): 3.333341e-08 *
## host_c coeff: -0.2383709 Pr(>|t|): 0.05382225 .
## days_from_start_c coeff: -0.0482276 Pr(>|t|): 0.002981107 *
## wing_c coeff: 0.4764872 Pr(>|t|): 0.00973476 *
## Consider covariates
model_male_final <-glmer(flew_b~host_c + days_from_start_c + wing_c + (1|ID), family=binomial, data=data_male)
tidy_regression(model_male_final, is_color=output_col)
## glmer flew_b ~ host_c + days_from_start_c + wing_c + (1 | ID) data_male binomial
## AIC: 465.5099 465.5099
## (Intercept) coeff: 1.1655976 Pr(>|t|): 9.228989e-05 *
## host_c coeff: -0.4450102 Pr(>|t|): 0.07884675 .
## days_from_start_c coeff: -0.0806044 Pr(>|t|): 0.0009388736 *
## wing_c coeff: 0.7759874 Pr(>|t|): 0.03696311 *
## Changed the effect slightly and improved the model
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 4-FF.R")
## [,1] [,2]
## AICs 479.0927 479.4123
## models 15 35
## probs 0.06153771 0.05244876
##
## m15 glm(formula = R ~ A + B + C + D, family = binomial, data = data)
## m35 glm(formula = R ~ A * C + B + D, family = binomial, data = data)
anova(m15, m35, test="Chisq") # Adding A*C does not improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ A + B + C + D
## Model 2: R ~ A * C + B + D
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 394 469.09
## 2 393 467.41 1 1.6804 0.1949
## glm flew_b ~ host_c + mass_c + days_from_start_c + wing_c binomial data_male
## AIC: 479.0927
## (Intercept) coeff: 0.6597987 Pr(>|t|): 1.555244e-07 *
## host_c coeff: -0.3281229 Pr(>|t|): 0.01127145 *
## mass_c coeff: -68.3341293 Pr(>|t|): 0.0008482356 *
## days_from_start_c coeff: -0.0393589 Pr(>|t|): 0.01815356 *
## wing_c coeff: 0.9794922 Pr(>|t|): 6.066412e-05 *
## Consider covariates
mass_male_glmer <-glmer(flew_b~host_c + mass_c + days_from_start_c + wing_c + (1|population) + (1|trial_type), family=binomial, data=data_male) # no error, but singular fit error
## boundary (singular) fit: see ?isSingular
tidy_regression(mass_male_glmer, is_color=output_col)
## glmer flew_b ~ host_c + mass_c + days_from_start_c + wing_c + (1 | population) + (1 | trial_type) data_male binomial
## AIC: 478.8491 478.8491
## (Intercept) coeff: 0.6241034 Pr(>|t|): 0.001564901 *
## host_c coeff: -0.2482033 Pr(>|t|): 0.219619
## mass_c coeff: -69.9405104 Pr(>|t|): 0.0007512309 *
## days_from_start_c coeff: -0.0405234 Pr(>|t|): 0.01652003 *
## wing_c coeff: 1.0751122 Pr(>|t|): 3.758597e-05 *
## Changed the effects slightly, a better fitting model, and made host not significant
data_male <- data_tested[data_tested$sex=="M",]
data_male <- data_male %>%
filter(!is.na(mass)) %>%
filter(!is.na(body))
data_male <- center_data(data_male)
R = data_male$flew_b
A = data_male$thorax_c
B = data_male$body_c
C = data_male$wing_c
data<-data.frame(R, A, B, C)
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glm 3-FF.R")
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## AICs 484.7282 485.4541 486.4361 486.4901 486.7282 487.3525 487.5907
## models 13 10 16 9 15 17 11
## probs 0.2585369 0.1798506 0.1100679 0.1071386 0.09511429 0.0696105 0.06179579
##
## m13 glm(formula = R ~ B * C + A, family = binomial, data = data)
## m10 glm(formula = R ~ B * C, family = binomial, data = data)
## m16 glm(formula = R ~ A * C + B * C, family = binomial, data = data)
## m9 glm(formula = R ~ A * C, family = binomial, data = data)
## m15 glm(formula = R ~ A * B + B * C, family = binomial, data = data)
## m17 glm(formula = R ~ A * B + A * C + B * C, family = binomial, data = data)
## m11 glm(formula = R ~ A * B + C, family = binomial, data = data)
anova(m10, m13, test="Chisq") # Adding A*C marginally improves fit
## Analysis of Deviance Table
##
## Model 1: R ~ B * C
## Model 2: R ~ B * C + A
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 395 477.45
## 2 394 474.73 1 2.7258 0.09874 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
morph_male <-glm(flew_b~body_c* wing_c + thorax_c, family=binomial, data=data_male)
tidy_regression(morph_male, is_color=output_col)
## glm flew_b ~ body_c * wing_c + thorax_c binomial data_male
## AIC: 484.7282
## (Intercept) coeff: 1.0559015 Pr(>|t|): 1.132112e-13 *
## body_c coeff: -0.3355401 Pr(>|t|): 0.6353415
## wing_c coeff: 1.0577471 Pr(>|t|): 0.1386527
## thorax_c coeff: -1.8246782 Pr(>|t|): 0.1003827
## body_c:wing_c coeff: -0.6888439 Pr(>|t|): 0.00219101 *
no effect of body length
marginal negative effect of thorax length, where if longer thorax then less likely to fly
marginal positive effect of wing length, where if longer wing then more likely to fly
negative effect of body*wing where if longer body and wing then less likely to fly
morph_male_glmer <- glmer(flew_b ~ body_c* wing_c + thorax_c + (1|ID), family=binomial, data=data_male)
tidy_regression(morph_male_glmer, is_color=output_col) # model improves fit and it converges
## glmer flew_b ~ body_c * wing_c + thorax_c + (1 | ID) data_male binomial
## AIC: 466.6286 466.6286
## (Intercept) coeff: 1.7399068 Pr(>|t|): 8.026006e-07 *
## body_c coeff: -0.629109 Pr(>|t|): 0.610882
## wing_c coeff: 1.6562317 Pr(>|t|): 0.1870663
## thorax_c coeff: -2.6509841 Pr(>|t|): 0.1724853
## body_c:wing_c coeff: -1.1963799 Pr(>|t|): 0.004926184 *
no effect of body length
no effect of wing length
marginal negative effect of thorax length, where if longer thorax then less likely to fly
negative effect of body*wing where if longer body and wing then less likely to fly
R = data_male$flew_b
A = data_male$thorax_c
B = data_male$body_c
C = data_male$wing_c
D = data_male$days_from_start_c
X = data_male$ID # 3 models failed to converge when ID as RF
#Y = data_male$trial_type # more models fail if both ID and trial as RF
data<-data.frame(R, A, B, C, D, X)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-binomial glmer 1-RF + 4-FF-noD-interactions.R"))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## AICs 456.4231 456.6314 457.1142 458.2684 458.2915 458.4077 458.9216
## models 27 24 22 25 32 33 20
## probs 0.2068326 0.186375 0.1464003 0.08220848 0.08126476 0.07667724 0.05930279
## [,8]
## AICs 458.9281
## models 26
## probs 0.05910996
##
## m27 R ~ B * C + A + D + (1 | X)
## m24 R ~ B * C + D + (1 | X)
## m22 R ~ A * C + D + (1 | X)
## m25 R ~ A * B + C + D + (1 | X)
## m32 R ~ A * B + B * C + D + (1 | X)
## m33 R ~ A * C + B * C + D + (1 | X)
## m20 R ~ A * B + D + (1 | X)
## m26 R ~ A * C + B + D + (1 | X)
length(errors$warnings)
## [1] 3
anova(m24, m27, test="Chisq") # Adding A does not improve fit
## Data: data
## Models:
## m24: R ~ B * C + D + (1 | X)
## m27: R ~ B * C + A + D + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m24 6 456.63 480.57 -222.32 444.63
## m27 7 456.42 484.35 -221.21 442.42 2.2083 1 0.1373
anova(m22, m26, test="Chisq") # Adding B does not improve fit
## Data: data
## Models:
## m22: R ~ A * C + D + (1 | X)
## m26: R ~ A * C + B + D + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m22 6 457.11 481.05 -222.56 445.11
## m26 7 458.93 486.85 -222.46 444.93 0.1861 1 0.6662
morph_male_glmer2 <- glmer(flew_b ~ thorax_c * wing_c + days_from_start_c + (1 | ID) , family=binomial, data=data_male)
tidy_regression(morph_male_glmer2, is_color=output_col)
## glmer flew_b ~ thorax_c * wing_c + days_from_start_c + (1 | ID) data_male binomial
## AIC: 457.1142 457.1142
## (Intercept) coeff: 1.8615926 Pr(>|t|): 6.746572e-06 *
## thorax_c coeff: -4.0162376 Pr(>|t|): 0.03497821 *
## wing_c coeff: 1.6375954 Pr(>|t|): 0.01600065 *
## days_from_start_c coeff: -0.0809056 Pr(>|t|): 0.001209843 *
## thorax_c:wing_c coeff: -4.4538238 Pr(>|t|): 0.01644018 *
# left off here
R = data_male$flew_b
A = data_male$wing2body_c
B = data_male$thorax_c
C = data_male$days_from_start_c
X = data_male$ID
Y = data_male$trial_type
data<-data.frame(R, A, B, C, X, Y)
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-binomial glmer 2 RF + 3-FF-noCinteractions.R")
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## [,1] [,2] [,3] [,4] [,5]
## AICs 464.6018 466.4904 466.6018 466.9435 468.4904
## models 5 7 24 9 26
## probs 0.406782 0.1582219 0.1496467 0.1261457 0.05820658
##
## m5 R ~ A + C + (1 | X)
## m7 R ~ A + B + C + (1 | X)
## m24 R ~ A + C + (1 | X) + (1 | Y)
## m9 R ~ A * B + C + (1 | X)
## m26 R ~ A + B + C + (1 | X) + (1 | Y)
anova(m5, m7, test="Chisq") # Adding B does not improve fit
## Data: data
## Models:
## m5: R ~ A + C + (1 | X)
## m7: R ~ A + B + C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m5 4 464.60 480.56 -228.30 456.60
## m7 5 466.49 486.44 -228.25 456.49 0.1114 1 0.7385
anova(m5, m24, test="Chisq") # Adding Y does not improve fit
## Data: data
## Models:
## m5: R ~ A + C + (1 | X)
## m24: R ~ A + C + (1 | X) + (1 | Y)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m5 4 464.6 480.56 -228.3 456.6
## m24 5 466.6 486.55 -228.3 456.6 0 1 1
morph_male_glmer3 <- glmer(flew_b ~ wing2body_c + days_from_start_c + (1 | ID), family=binomial, data=data_male)
tidy_regression(morph_male_glmer3, is_color=output_col)
## glmer flew_b ~ wing2body_c + days_from_start_c + (1 | ID) data_male binomial
## AIC: 464.6018 464.6018
## (Intercept) coeff: 1.3973254 Pr(>|t|): 4.776832e-06 *
## wing2body_c coeff: 36.5621338 Pr(>|t|): 0.0108387 *
## days_from_start_c coeff: -0.0765424 Pr(>|t|): 0.001669225 *
tidy_regression(nomass_male, is_color=output_col)
## glm flew_b ~ host_c + days_from_start_c + wing_c binomial data_male
## AIC: 488.5811
## (Intercept) coeff: 0.6820696 Pr(>|t|): 3.333341e-08 *
## host_c coeff: -0.2383709 Pr(>|t|): 0.05382225 .
## days_from_start_c coeff: -0.0482276 Pr(>|t|): 0.002981107 *
## wing_c coeff: 0.4764872 Pr(>|t|): 0.00973476 *
tidy_regression(mass_male, is_color=output_col)
## glm flew_b ~ host_c + mass_c + days_from_start_c + wing_c binomial data_male
## AIC: 479.0927
## (Intercept) coeff: 0.6597987 Pr(>|t|): 1.555244e-07 *
## host_c coeff: -0.3281229 Pr(>|t|): 0.01127145 *
## mass_c coeff: -68.3341293 Pr(>|t|): 0.0008482356 *
## days_from_start_c coeff: -0.0393589 Pr(>|t|): 0.01815356 *
## wing_c coeff: 0.9794922 Pr(>|t|): 6.066412e-05 *
tidy_regression(mass_male_glmer, is_color=output_col)
## glmer flew_b ~ host_c + mass_c + days_from_start_c + wing_c + (1 | population) + (1 | trial_type) data_male binomial
## AIC: 478.8491 478.8491
## (Intercept) coeff: 0.6241034 Pr(>|t|): 0.001564901 *
## host_c coeff: -0.2482033 Pr(>|t|): 0.219619
## mass_c coeff: -69.9405104 Pr(>|t|): 0.0007512309 *
## days_from_start_c coeff: -0.0405234 Pr(>|t|): 0.01652003 *
## wing_c coeff: 1.0751122 Pr(>|t|): 3.758597e-05 *
tidy_regression(morph_male, is_color=output_col)
## glm flew_b ~ body_c * wing_c + thorax_c binomial data_male
## AIC: 484.7282
## (Intercept) coeff: 1.0559015 Pr(>|t|): 1.132112e-13 *
## body_c coeff: -0.3355401 Pr(>|t|): 0.6353415
## wing_c coeff: 1.0577471 Pr(>|t|): 0.1386527
## thorax_c coeff: -1.8246782 Pr(>|t|): 0.1003827
## body_c:wing_c coeff: -0.6888439 Pr(>|t|): 0.00219101 *
tidy_regression(morph_male_glmer, is_color=output_col)
## glmer flew_b ~ body_c * wing_c + thorax_c + (1 | ID) data_male binomial
## AIC: 466.6286 466.6286
## (Intercept) coeff: 1.7399068 Pr(>|t|): 8.026006e-07 *
## body_c coeff: -0.629109 Pr(>|t|): 0.610882
## wing_c coeff: 1.6562317 Pr(>|t|): 0.1870663
## thorax_c coeff: -2.6509841 Pr(>|t|): 0.1724853
## body_c:wing_c coeff: -1.1963799 Pr(>|t|): 0.004926184 *
tidy_regression(morph_male_glmer2, is_color=output_col)
## glmer flew_b ~ thorax_c * wing_c + days_from_start_c + (1 | ID) data_male binomial
## AIC: 457.1142 457.1142
## (Intercept) coeff: 1.8615926 Pr(>|t|): 6.746572e-06 *
## thorax_c coeff: -4.0162376 Pr(>|t|): 0.03497821 *
## wing_c coeff: 1.6375954 Pr(>|t|): 0.01600065 *
## days_from_start_c coeff: -0.0809056 Pr(>|t|): 0.001209843 *
## thorax_c:wing_c coeff: -4.4538238 Pr(>|t|): 0.01644018 *
tidy_regression(morph_male_glmer3, is_color=output_col)
## glmer flew_b ~ wing2body_c + days_from_start_c + (1 | ID) data_male binomial
## AIC: 464.6018 464.6018
## (Intercept) coeff: 1.3973254 Pr(>|t|): 4.776832e-06 *
## wing2body_c coeff: 36.5621338 Pr(>|t|): 0.0108387 *
## days_from_start_c coeff: -0.0765424 Pr(>|t|): 0.001669225 *