Flight Trials Winter 2020 Dataset was conducted from 2/17/2020 - 3/10/2020. Soapberry bugs were flight tested twice for multiple hours in the flight mill and observed from 8 AM to (5-8 PM) each day. Used multivariate (glm) and mixed effect modeling (glmer) to analyze the flight results.
Cleaning the Data
rm(list=ls())
output_col = FALSE # Recommend changing this to TRUE if working in Base R or RStudio, and FALSE if generating an html
source("src/clean_flight_data.R") # Script that loads and cleans up the data
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
source("src/regression_output.R") # A script that cleans up regression outputs and prints in color or black and white
source("src/center_flight_data.R")
source("get_warnings.R")
data <- read_flight_data("data/all_flight_data-Winter2020.csv")
data_all <- data[[1]]
data_tested <- data[[2]]
### Remove everyone who didn't fly (then remove distances = 0, if that didn't work fully)
data_flew <- data_tested[data_tested$flew_b == 1, ]
data_flew <- data_flew[data_flew$distance > 0, ]
data_flew <- center_data(data_flew)
###Break up by trial type
d_T1 <-data_flew[data_flew$trial_type=="T1",]
d_T1 <- center_data(d_T1)
d_T2 <-data_flew[data_flew$trial_type=="T2",]
d_T2 <- center_data(d_T2)
###Break up by sex
d_fem <-data_flew[data_flew$sex=="F",]
d_fem <- center_data(d_fem)
d_male <-data_flew[data_flew$sex=="M",]
d_male <- center_data(d_male)
h1 <- as.grob(expression(
hist(data_flew$distance)))
p1 <- as.grob(expression(
plot(distance ~ mass_c, data=data_flew)))
p2 <- as.grob(expression(
plot(distance ~ days_from_start_c, data=data_flew)))
p3 <- as.grob(expression(
plot(distance ~ min_from_IncStart, data=data_flew)))
p4 <- as.grob(expression(
plot(distance ~ wing_c, data=data_flew)))
p5 <- as.grob(expression(
plot(distance ~ beak_c, data=data_flew)))
p6 <- as.grob(expression(
plot(distance ~ thorax_c, data=data_flew)))
p7 <- as.grob(expression(
plot(distance ~ body_c, data=data_flew)))
grid.arrange(h1,p1,p2,p3,p4,p5,p6,p7, ncol=3)
gf_point(distance~sym_dist, col=~host_plant, alpha=~sex_c, data=data_flew)
summary <- aggregate(distance~sym_dist*sex*host_plant, data=data_flew, FUN=mean)
plot(summary$distance~summary$sym_dist,
col=c(1,2)[as.factor(summary$sex)], # Female = Black, Male = Red
pch=c(19,22)[as.factor(summary$host_plant)], # Filled circle is GRT, Open square is BV
main="Observed Data: distance ~ sex*host_plant*sym_dist",
xlab = "Distance from Sympatric Zone (°)",
ylab= "Distance Flew",
#sub=eq_glmer
)
legend("topright",
legend = c("F and K.elegans", "M and C.corindum", "F and C.corindum","M and K.elegans"),
#inset=c(-0.27,0.2),
col= 1:2,
pch = c(0,16,19),
title="Groups")
p <- ggplot(data_flew, aes(x=sym_dist, y=distance, color=host_plant)) +
geom_violin()
p + stat_summary(fun=mean, geom="point", shape=23, size=2)
## Warning: position_dodge requires non-overlapping x intervals
rm(list=ls())
output_col = FALSE # Recommend changing this to TRUE if working in Base R or RStudio, and FALSE if generating an html
source("src/clean_flight_data.R") # Script that loads and cleans up the data
source("src/regression_output.R") # A script that cleans up regression outputs and prints in color or black and white
source("src/center_flight_data.R")
source("get_warnings.R")
data <- read_flight_data("data/all_flight_data-Winter2020.csv")
data_all <- data[[1]]
data_tested <- data[[2]]
data_flew <- data_tested[data_tested$flew_b == 1, ]
data_flew <- center_data(data_flew)
d <- data_flew %>%
group_by(ID, sex,population, site, host_plant, latitude, longitude, total_eggs,
beak, thorax, wing, body, w_morph, morph_notes, tested,
host_c, sex_c, w_morph_c, lat_c, sym_dist, sym_dist_s, total_eggs_c,
beak_c, thorax_c, thorax_s, body_c, wing_c, wing2body, wing2body_c, wing2body_s) %>%
summarise_all(funs(list(na.omit(.))))
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.
d$num_flew <- 0
d$num_notflew <- 0
d$average_mass <- 0
d$egg_diff <- 0
d$mass_diff <- 0
d$flew_diff <- 0
d$dist_diff <- 0
d$speed_diff <- 0
for(row in 1:length(d$flew_b)){
n_flew <- sum(d$flew_b[[row]] == 1) # total number of times did fly among trails
d$num_flew[[row]] <- n_flew
n_notflew <- sum(d$flew_b[[row]] == 0) # total number of times did not fly among trails
d$num_notflew[[row]] <- n_notflew
avg_mass <- mean(d$mass[[row]])
d$average_mass[[row]] <- avg_mass
# mass, flight, distance, and speed changes between T1 and T2
d$mass_diff[[row]] <- d$mass[[row]][2] - d$mass[[row]][1] # T2 - T1
d$flew_diff[[row]] <- d$flew_b[[row]][2] - d$flew_b[[row]][1] # T2 - T1
d$dist_diff[[row]] <- d$distance[[row]][2] - d$distance[[row]][1] # T2 - T1
d$speed_diff[[row]] <- d$average_speed[[row]][2] - d$average_speed[[row]][1] # T2 - T1
d$egg_diff[[row]] <- d$eggs_b[[row]][2] - d$eggs_b[[row]][1] # T2 - T1
}
d <- select(d, -filename, -channel_letter, -set_number)
d # NA's generated are good because that means it's accounted only for bugs that flew in both trials
## # A tibble: 211 x 68
## # Groups: ID, sex, population, site, host_plant, latitude, longitude,
## # total_eggs, beak, thorax, wing, body, w_morph, morph_notes, tested, host_c,
## # sex_c, w_morph_c, lat_c, sym_dist, sym_dist_s, total_eggs_c, beak_c,
## # thorax_c, thorax_s, body_c, wing_c, wing2body, wing2body_c [211]
## ID sex population site host_plant latitude longitude total_eggs beak
## <fct> <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 1 M Plantatio… Areg… C. corind… 25.0 -80.6 NA 5.5
## 2 2 M Plantatio… Areg… C. corind… 25.0 -80.6 NA 5.64
## 3 3 M Plantatio… Areg… C. corind… 25.0 -80.6 NA 5.75
## 4 4 M Plantatio… Areg… C. corind… 25.0 -80.6 NA 6.21
## 5 6 M Plantatio… Foun… C. corind… 25.0 -80.6 NA 5.76
## 6 7 F Plantatio… Foun… C. corind… 25.0 -80.6 8 8.19
## 7 9 M Plantatio… Foun… C. corind… 25.0 -80.6 NA 6.1
## 8 10 M Plantatio… Foun… C. corind… 25.0 -80.6 NA 6.09
## 9 11 M Key Largo KLMRL C. corind… 25.1 -80.4 NA 5.88
## 10 12 F Key Largo KLMRL C. corind… 25.1 -80.4 329 8.79
## # … with 201 more rows, and 59 more variables: thorax <dbl>, wing <dbl>,
## # body <dbl>, w_morph <fct>, morph_notes <fct>, tested <fct>, host_c <dbl>,
## # sex_c <dbl>, w_morph_c <dbl>, lat_c <dbl>, sym_dist <dbl>,
## # sym_dist_s <dbl>, total_eggs_c <dbl>, beak_c <dbl>, thorax_c <dbl>,
## # thorax_s <dbl>, body_c <dbl>, wing_c <dbl>, wing2body <dbl>,
## # wing2body_c <dbl>, wing2body_s <dbl>, trial_type <list>, chamber <list>,
## # average_speed <list>, total_flight_time <list>, distance <list>,
## # shortest_flying_bout <list>, longest_flying_bout <list>,
## # total_duration <list>, max_speed <list>, test_date <list>,
## # time_start <list>, time_end <list>, flew <list>, flight_type <list>,
## # mass <list>, EWM <list>, flew_b <list>, eggs_b <list>,
## # minute_duration <list>, minute_duration_c <list>, min_from_IncStart <list>,
## # min_from_IncStart_c <list>, min_from_IncStart_s <list>,
## # days_from_start <list>, days_from_start_c <list>, mass_c <list>,
## # mass_s <list>, average_mass <dbl>, average_mass_c <list>,
## # average_mass_s <list>, trial_type_og <list>, num_flew <dbl>,
## # num_notflew <dbl>, egg_diff <dbl>, mass_diff <dbl>, flew_diff <dbl>,
## # dist_diff <dbl>, speed_diff <dbl>
# Filter out bugs that ONLY flew in T1:
rows_remove <- c()
for (row in 1:nrow(d)){
if (length(d$trial_type[[row]]) < 2) {
rows_remove <- c(rows_remove, row)
}
}
d <- d[-rows_remove, ]
d
## # A tibble: 132 x 68
## # Groups: ID, sex, population, site, host_plant, latitude, longitude,
## # total_eggs, beak, thorax, wing, body, w_morph, morph_notes, tested, host_c,
## # sex_c, w_morph_c, lat_c, sym_dist, sym_dist_s, total_eggs_c, beak_c,
## # thorax_c, thorax_s, body_c, wing_c, wing2body, wing2body_c [132]
## ID sex population site host_plant latitude longitude total_eggs beak
## <fct> <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 1 M Plantatio… Areg… C. corind… 25.0 -80.6 NA 5.5
## 2 3 M Plantatio… Areg… C. corind… 25.0 -80.6 NA 5.75
## 3 6 M Plantatio… Foun… C. corind… 25.0 -80.6 NA 5.76
## 4 7 F Plantatio… Foun… C. corind… 25.0 -80.6 8 8.19
## 5 10 M Plantatio… Foun… C. corind… 25.0 -80.6 NA 6.09
## 6 11 M Key Largo KLMRL C. corind… 25.1 -80.4 NA 5.88
## 7 15 M Key Largo JP C. corind… 25.1 -80.4 NA 6.53
## 8 17 M Key Largo JP C. corind… 25.1 -80.4 NA 6.18
## 9 19 F Key Largo JP C. corind… 25.1 -80.4 NA 7.54
## 10 21 M North Key… DD f… C. corind… 25.3 -80.3 NA 6.03
## # … with 122 more rows, and 59 more variables: thorax <dbl>, wing <dbl>,
## # body <dbl>, w_morph <fct>, morph_notes <fct>, tested <fct>, host_c <dbl>,
## # sex_c <dbl>, w_morph_c <dbl>, lat_c <dbl>, sym_dist <dbl>,
## # sym_dist_s <dbl>, total_eggs_c <dbl>, beak_c <dbl>, thorax_c <dbl>,
## # thorax_s <dbl>, body_c <dbl>, wing_c <dbl>, wing2body <dbl>,
## # wing2body_c <dbl>, wing2body_s <dbl>, trial_type <list>, chamber <list>,
## # average_speed <list>, total_flight_time <list>, distance <list>,
## # shortest_flying_bout <list>, longest_flying_bout <list>,
## # total_duration <list>, max_speed <list>, test_date <list>,
## # time_start <list>, time_end <list>, flew <list>, flight_type <list>,
## # mass <list>, EWM <list>, flew_b <list>, eggs_b <list>,
## # minute_duration <list>, minute_duration_c <list>, min_from_IncStart <list>,
## # min_from_IncStart_c <list>, min_from_IncStart_s <list>,
## # days_from_start <list>, days_from_start_c <list>, mass_c <list>,
## # mass_s <list>, average_mass <dbl>, average_mass_c <list>,
## # average_mass_s <list>, trial_type_og <list>, num_flew <dbl>,
## # num_notflew <dbl>, egg_diff <dbl>, mass_diff <dbl>, flew_diff <dbl>,
## # dist_diff <dbl>, speed_diff <dbl>
## Test
tidy_regression(lm(dist_diff ~ mass_diff, data=d), is_color = output_col) # no effect of mass_diff
## lm dist_diff ~ mass_diff d
## AIC: 2637.914
## (Intercept) coeff: -179.8179401 Pr(>|t|): 0.7000694
## mass_diff coeff: -36007.8382449 Pr(>|t|): 0.3411103
hist(d$dist_diff)
No mass_diff
R = d$dist_diff
A = d$host_c
B = d$sex_c
C = d$sym_dist
D = d$mass_diff
X = d$population
data<-data.frame(R, A, B, C, D, X)
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-gaussian glmer 1-RF + 3-FF REMLF.R")
## [,1]
## AICs 2624.771
## models 18
## probs 0.997518
##
## m0 R ~ (1 | X)
dist_model <- lmer(speed_diff ~ 1 + (1 | population), data=d, REML=FALSE)
## boundary (singular) fit: see ?isSingular
tidy_regression(dist_model, is_color=output_col)
## lmer speed_diff ~ 1 + (1 | population) d FALSE
## AIC: -93.55941 -93.55941
## 1 coeff: 0.012506 tval: 0.865809
s.test <- paste("pval: ", shapiro.test(residuals(dist_model))$p.value)
qqnorm(resid(dist_model))
qqline(resid(dist_model))
text(-1, 0.1, s.test)
Mass_diff
d <- d %>%
filter(!is.na(mass_diff))
R = d$dist_diff
A = d$host_c
B = d$sex_c
C = d$sym_dist
D = d$mass_diff
X = d$population
data<-data.frame(R, A, B, C, D, X)
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-gaussian glmer 1-RF + 4-FF REMLF.R")
## [,1] [,2] [,3] [,4] [,5]
## AICs 2638.837 2639.786 2639.914 2640.539 2640.713
## models 113 2 4 3 1
## probs 0.1337868 0.08327474 0.07810996 0.05713923 0.0523805
##
## m0 R ~ 1 + (1 | X)
## m2 R ~ B + (1 | X)
## m4 R ~ D + (1 | X)
## m3 R ~ C + (1 | X)
## m1 R ~ A + (1 | X)
anova(m0, m2, test="Chisq") # Adding B does not improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m2: R ~ B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 2638.8 2647.5 -1316.4 2632.8
## m2 4 2639.8 2651.3 -1315.9 2631.8 1.0518 1 0.3051
anova(m0, m4, test="Chisq")
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m4: R ~ D + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 2638.8 2647.5 -1316.4 2632.8
## m4 4 2639.9 2651.4 -1316.0 2631.9 0.9237 1 0.3365
anova(m0, m3, test="Chisq")
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m3: R ~ C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 2638.8 2647.5 -1316.4 2632.8
## m3 4 2640.5 2652.1 -1316.3 2632.5 0.2985 1 0.5848
anova(m0, m1, test="Chisq")
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m1: R ~ A + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 2638.8 2647.5 -1316.4 2632.8
## m1 4 2640.7 2652.2 -1316.4 2632.7 0.1246 1 0.7241
Null is the best model again
d_fem <- d[d$sex == "F",] # very low sample size
No mass_diff
R = d_fem$dist_diff
A = d_fem$host_c
B = d_fem$sym_dist
C = d_fem$mass_diff
X = d_fem$population
data<-data.frame(R, A, B, C, X)
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-gaussian glmer 1-RF + 2-FF REMLF.R")
## [,1] [,2] [,3] [,4]
## AICs 534.8694 536.3361 536.7746 538.3341
## models 5 1 2 3
## probs 0.4740489 0.2276819 0.1828578 0.08384723
##
## m0 R ~ (1 | X)
## m1 R ~ A + (1 | X)
## m2 R ~ B + (1 | X)
## m3 R ~ A + B + (1 | X)
anova(m0, m1, test="Chisq")
## Data: data
## Models:
## m0: R ~ (1 | X)
## m1: R ~ A + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 534.87 538.64 -264.44 528.87
## m1 4 536.34 541.37 -264.17 528.34 0.5333 1 0.4652
anova(m0, m2, test="Chisq")
## Data: data
## Models:
## m0: R ~ (1 | X)
## m2: R ~ B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 534.87 538.64 -264.44 528.87
## m2 4 536.77 541.81 -264.39 528.77 0.0948 1 0.7582
mass_diff
source("src/compare_models.R")
model_comparisonsAIC("src/generic models-gaussian glmer 1-RF + 3-FF REMLF.R")
## [,1]
## AICs 518.7669
## models 18
## probs 0.9991563
##
## m0 R ~ (1 | X)
Null is the best model for both again.
dist_model <- lmer(speed_diff ~ 1 + (1 | population), data=d_fem, REML=FALSE)
## boundary (singular) fit: see ?isSingular
tidy_regression(dist_model, is_color=output_col)
## lmer speed_diff ~ 1 + (1 | population) d_fem FALSE
## AIC: -13.79848 -13.79848
## 1 coeff: -0.0399724 tval: -1.232642
s.test <- paste("pval: ", shapiro.test(residuals(dist_model))$p.value)
qqnorm(resid(dist_model))
qqline(resid(dist_model))
text(-1, 0.1, s.test)
Cleaning the Data
rm(list=ls())
output_col = FALSE # Recommend changing this to TRUE if working in Base R or RStudio, and FALSE if generating an html
source("src/clean_flight_data.R") # Script that loads and cleans up the data
source("src/regression_output.R") # A script that cleans up regression outputs and prints in color or black and white
source("src/center_flight_data.R")
source("get_warnings.R")
data <- read_flight_data("data/all_flight_data-Winter2020.csv")
data_all <- data[[1]]
data_tested <- data[[2]]
### Remove everyone who didn't fly (then remove distances = 0, if that didn't work fully)
data_flew <- data_tested[data_tested$flew_b == 1, ]
data_flew <- data_flew[data_flew$distance > 0, ]
data_flew <- center_data(data_flew)
###Break up by trial type
d_T1 <-data_flew[data_flew$trial_type=="T1",]
d_T1 <- center_data(d_T1)
d_T2 <-data_flew[data_flew$trial_type=="T2",]
d_T2 <- center_data(d_T2)
###Break up by sex
d_fem <-data_flew[data_flew$sex=="F",]
d_fem <- center_data(d_fem)
d_male <-data_flew[data_flew$sex=="M",]
d_male <- center_data(d_male)
Testing link functions
model_test<-glm(distance~chamber, data=d_T1, family=Gamma(link="log")) #equivalent to using log link function in Gamma is to log transform distance, except it won't fuss about non-0 values
summary(model_test)
##
## Call:
## glm(formula = distance ~ chamber, family = Gamma(link = "log"),
## data = d_T1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.8615 -2.8054 -1.1656 0.2911 3.5908
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.0348 0.5844 15.459 < 2e-16 ***
## chamberA-2 -0.8534 0.6785 -1.258 0.21003
## chamberA-3 -1.4615 0.6578 -2.222 0.02747 *
## chamberA-4 -1.9880 0.6445 -3.084 0.00234 **
## chamberB-1 -0.3511 0.6967 -0.504 0.61493
## chamberB-2 -1.6037 0.6555 -2.446 0.01534 *
## chamberB-3 -1.4753 0.6683 -2.207 0.02849 *
## chamberB-4 -1.3362 0.6749 -1.980 0.04915 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 2.732598)
##
## Null deviance: 940.33 on 197 degrees of freedom
## Residual deviance: 874.46 on 190 degrees of freedom
## AIC: 3166
##
## Number of Fisher Scoring iterations: 18
plot(model_test)
Testing experimental covariates
####### Effect of chamber A-4
d_T1$chamber <- relevel(d_T1$chamber, ref="A-2")
tidy_regression(glm(distance~chamber, data=d_T1, family=Gamma(link="log")), is_color=output_col)
## glm distance ~ chamber Gamma(link = "log") d_T1
## AIC: 3166.011
## (Intercept) coeff: 8.1813765 Pr(>|t|): 9.757755e-59 *
## chamberA-1 coeff: 0.8534011 Pr(>|t|): 0.2100268
## chamberA-3 coeff: -0.6081293 Pr(>|t|): 0.1859764
## chamberA-4 coeff: -1.1346337 Pr(>|t|): 0.0104879 *
## chamberB-1 coeff: 0.5023454 Pr(>|t|): 0.3282162
## chamberB-2 coeff: -0.7502815 Pr(>|t|): 0.1007505
## chamberB-3 coeff: -0.6218561 Pr(>|t|): 0.1903703
## chamberB-4 coeff: -0.4827901 Pr(>|t|): 0.3181477
####### No effect of test date
tidy_regression(glm(distance~days_from_start_c, data=d_T1, family=Gamma(link="log")), is_color=output_col)
## glm distance ~ days_from_start_c Gamma(link = "log") d_T1
## AIC: 3174.778
## (Intercept) coeff: 7.8677051 Pr(>|t|): 4.454649e-138 *
## days_from_start_c coeff: 0.0108183 Pr(>|t|): 0.7178363
####### Effect of Minutes from IncStart time
tidy_regression(glm(distance~min_from_IncStart_c, data=d_T1, family=Gamma(link="log")), is_color=output_col)
## glm distance ~ min_from_IncStart_c Gamma(link = "log") d_T1
## AIC: 3170.724
## (Intercept) coeff: 7.8346109 Pr(>|t|): 5.567605e-136 *
## min_from_IncStart_c coeff: -0.001999 Pr(>|t|): 0.01891553 *
Without Mass
data<-data.frame(R=d_T1$distance, A=d_T1$host_c, B=d_T1$sex_c, C=d_T1$sym_dist, X=d_T1$chamber)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-Gamma glmer 3-FF log link.R"))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## AICs 3179.826 3180.214 3180.372 3180.384 3181.018 3181.594 3181.835
## models 2 18 4 1 6 3 5
## probs 0.1636526 0.1348062 0.1245254 0.1238182 0.0901593 0.06760594 0.05993218
##
## m2 R ~ B + (1 | X)
## m0 R ~ 1 + (1 | X)
## m4 R ~ A + B + (1 | X)
## m1 R ~ A + (1 | X)
## m6 R ~ B + C + (1 | X)
## m3 R ~ C + (1 | X)
## m5 R ~ A + C + (1 | X)
cat("Number of models that failed to converge:", length(errors$warnings))
## Number of models that failed to converge: 8
anova(m2, m6, test="Chisq") # Adding C does not improve fit
## Data: data
## Models:
## m2: R ~ B + (1 | X)
## m6: R ~ B + C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m2 4 3179.8 3193.0 -1585.9 3171.8
## m6 5 3181.0 3197.5 -1585.5 3171.0 0.8077 1 0.3688
anova(m2, m4, test="Chisq") # Adding A does not improve fit
## Data: data
## Models:
## m2: R ~ B + (1 | X)
## m4: R ~ A + B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m2 4 3179.8 3193.0 -1585.9 3171.8
## m4 5 3180.4 3196.8 -1585.2 3170.4 1.4535 1 0.228
best.fit <- glm(distance ~ sex_c, family = Gamma(link = "log"), data = d_T1)
tidy_regression(best.fit, is_color=output_col)
## glm distance ~ sex_c Gamma(link = "log") d_T1
## AIC: 3172.675
## (Intercept) coeff: 7.9697328 Pr(>|t|): 1.143642e-124 *
## sex_c coeff: 0.2186048 Pr(>|t|): 0.1163446
s.test <- paste("pval: ", shapiro.test(residuals(best.fit))$p.value)
qqnorm(resid(best.fit))
qqline(resid(best.fit))
text(-1, 0.1, s.test)
With Mass
d_T1 <- d_T1 %>%
filter(!is.na(mass))
d_T1 <- center_data(d_T1)
#data<-data.frame(R=d_T1$distance, A=d_T1$host_c, B=d_T1$sex_c, C=d_T1$sym_dist_s, D=d_T1$mass_s, X=d_T1$chamber)
data<-data.frame(R=d_T1$distance, A=d_T1$host_c, B=d_T1$sex_c, C=d_T1$mass_c, X=d_T1$chamber)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-Gamma glmer 3-FF log link.R"))
## [,1] [,2] [,3] [,4] [,5]
## AICs 3162.73 3162.968 3164.265 3164.571 3165.18
## models 13 10 16 15 2
## probs 0.2180667 0.1935885 0.101244 0.086852 0.0640745
##
## m13 R ~ B * C + A + (1 | X)
## m10 R ~ B * C + (1 | X)
## m16 R ~ A * C + B * C + (1 | X)
## m15 R ~ A * B + B * C + (1 | X)
## m2 R ~ B + (1 | X)
cat("Number of models that failed to converge:", length(errors$warnings)) # 50 models failed to converge if keep sym_dist_s, but it doesn't show up in top models so can remove it | After removing it get 3 models that fail to converge
## Number of models that failed to converge: 3
# anova(m20, m30, test="Chisq") # Adding A does not improve fit
# anova(m20, m31, test="Chisq") # Adding C does not improve fit
# anova(m30, m50, test="Chisq") # Adding A*D does not improve fit
anova(m10, m13, test="Chisq") # Adding A does not improve fit
## Data: data
## Models:
## m10: R ~ B * C + (1 | X)
## m13: R ~ B * C + A + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m10 6 3163.0 3182.7 -1575.5 3151.0
## m13 7 3162.7 3185.7 -1574.4 3148.7 2.2381 1 0.1346
best.fit <- glm(distance ~ sex_c * mass_c, family = Gamma(link = "log"), data = d_T1) # model failed to converge using glmer and (1|chamber)
#best.fit <- glmer(distance ~ sex_c * mass_s + (1|chamber), family = Gamma(link = "log"), data = d_T1)# model still fails to converge even when you standardize mass.
tidy_regression(best.fit, is_color=output_col)
## glm distance ~ sex_c * mass_c Gamma(link = "log") d_T1
## AIC: 3157.876
## (Intercept) coeff: 8.4477883 Pr(>|t|): 6.968261e-93 *
## sex_c coeff: 0.6128963 Pr(>|t|): 0.005508187 *
## mass_c coeff: -14.6019597 Pr(>|t|): 0.2256213
## sex_c:mass_c coeff: -27.4335186 Pr(>|t|): 0.0234709 *
s.test <- paste("pval: ", shapiro.test(residuals(best.fit))$p.value)
qqnorm(resid(best.fit))
qqline(resid(best.fit))
text(-1, 0.1, s.test)
Min From Inc Start
# sym dist and host isn't really showing up before so left them out
# less models fail if use standardized variables and it fixes the scaling issues.
data<-data.frame(R=d_T1$distance, A=d_T1$min_from_IncStart_s, B=d_T1$sex_c, C=d_T1$mass_s, X=d_T1$chamber)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-Gamma glmer 3-FF log link.R"))
## [,1] [,2] [,3] [,4] [,5]
## AICs 3162.104 3162.11 3162.968 3163.432 3163.979
## models 15 13 10 16 17
## probs 0.1993194 0.1987817 0.1294139 0.1026496 0.07807552
##
## m15 R ~ A * B + B * C + (1 | X)
## m13 R ~ B * C + A + (1 | X)
## m10 R ~ B * C + (1 | X)
## m16 R ~ A * C + B * C + (1 | X)
## m17 R ~ A * B + A * C + B * C + (1 | X)
cat("Number of models that failed to converge:", length(errors$warnings))
## Number of models that failed to converge: 5
best.fit <- glmer(distance ~ min_from_IncStart_s * sex_c + sex_c * mass_s + (1|chamber), family = Gamma(link = "log"), data = d_T1) # converges!
tidy_regression(best.fit, is_color=output_col)
## glmer distance ~ min_from_IncStart_s * sex_c + sex_c * mass_s + (1 | chamber) d_T1 Gamma(link = "log")
## AIC: 3162.104 3162.104
## (Intercept) coeff: 8.6146645 Pr(>|t|): 8.536496e-142 *
## min_from_IncStart_s coeff: -0.3961186 Pr(>|t|): 0.02045934 *
## sex_c coeff: 0.720724 Pr(>|t|): 0.01537051 *
## mass_s coeff: -0.2880028 Pr(>|t|): 0.2755949
## min_from_IncStart_s:sex_c coeff: -0.2386356 Pr(>|t|): 0.1401422
## sex_c:mass_s coeff: -0.7516082 Pr(>|t|): 0.00550398 *
negative effect of min from start, where the later in the day the bug is tested, the less distance the bug will fly
positive effect from sex where if F then more likely to disperse far
no effect of mass
negative effect of min from start*sex, where if F and tested late in the day then less likely to disperse far
negative effect of sex*mass where the more mass and F then less likely to disperse far
s.test <- paste("pval: ", shapiro.test(residuals(best.fit))$p.value)
qqnorm(resid(best.fit))
qqline(resid(best.fit))
text(-1, 0.1, s.test)
Morphology
d_T1 <- d_T1 %>%
filter(!is.na(body))
d_T1 <- center_data(d_T1)
data<-data.frame(R=d_T1$distance, A=d_T1$body_c, B=d_T1$wing_c, C=d_T1$thorax_c, X=d_T1$chamber) # singular fit if run with min_from_IncStart meaning the variance is near 0
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-Gamma glmer 3-FF log link.R"))
## [,1] [,2] [,3] [,4] [,5] [,6]
## AICs 3157.964 3158.236 3158.569 3159.445 3159.584 3159.817
## models 1 2 5 9 8 6
## probs 0.1708906 0.1491773 0.1262721 0.08152204 0.07604676 0.06766991
## [,7]
## AICs 3159.921
## models 4
## probs 0.06422979
##
## m1 R ~ A + (1 | X)
## m2 R ~ B + (1 | X)
## m5 R ~ A + C + (1 | X)
## m9 R ~ A * C + (1 | X)
## m8 R ~ A * B + (1 | X)
## m6 R ~ B + C + (1 | X)
## m4 R ~ A + B + (1 | X)
cat("Number of models that failed to converge:", length(errors$warnings))
## Number of models that failed to converge: 6
anova(m1, m4, test="Chisq") # Adding B does not improve fit
## Data: data
## Models:
## m1: R ~ A + (1 | X)
## m4: R ~ A + B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m1 4 3158.0 3171.1 -1575 3150.0
## m4 5 3159.9 3176.3 -1575 3149.9 0.0429 1 0.8359
anova(m1, m6, test="Chisq") # Adding C does not improve fit
## Data: data
## Models:
## m1: R ~ A + (1 | X)
## m6: R ~ B + C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m1 4 3158.0 3171.1 -1575.0 3150.0
## m6 5 3159.8 3176.2 -1574.9 3149.8 0.1472 1 0.7012
best.fit <- glmer(distance ~ body_c + (1|chamber), family = Gamma(link = "log"), data = d_T1) # model failed to converge if body is centered or standardized
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0313566 (tol = 0.001, component 1)
tidy_regression(best.fit, is_color=output_col)
## glmer distance ~ body_c + (1 | chamber) d_T1 Gamma(link = "log")
## AIC: 3157.964 3157.964
## (Intercept) coeff: 7.790395 Pr(>|t|): 0 *
## body_c coeff: 0.4503914 Pr(>|t|): 0 *
d_T1 <- d_T1 %>%
filter(!is.na(mass))
d_T1 <- center_data(d_T1)
data<-data.frame(R=d_T1$distance, A=d_T1$wing2body_c, B=d_T1$thorax_c, X=d_T1$chamber)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-Gamma glmer 2-FF log link.R"))
## [,1] [,2] [,3] [,4]
## AICs 3162.043 3163.911 3165.731 3165.802
## models 2 3 4 5
## probs 0.5678268 0.2231618 0.08984961 0.08667887
##
## m2 R ~ B + (1 | X)
## m3 R ~ A + B + (1 | X)
## m4 R ~ A * B + (1 | X)
## m0 R ~ 1 + (1 | X)
cat("Number of models that failed to converge:", length(errors$warnings))
## Number of models that failed to converge: 1
anova(m2, m4, test="Chisq") # Adding A does not improve fit
## Data: data
## Models:
## m2: R ~ B + (1 | X)
## m4: R ~ A * B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m2 4 3162.0 3175.2 -1577.0 3154.0
## m4 6 3165.7 3185.4 -1576.9 3153.7 0.3126 2 0.8553
anova(m0, m2, test="Chisq") # Adding B does improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m2: R ~ B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 3165.8 3175.7 -1579.9 3159.8
## m2 4 3162.0 3175.2 -1577.0 3154.0 5.7592 1 0.0164 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m3, m4, test="Chisq") # Adding A*B does not improve fit
## Data: data
## Models:
## m3: R ~ A + B + (1 | X)
## m4: R ~ A * B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m3 5 3163.9 3180.3 -1577.0 3153.9
## m4 6 3165.7 3185.4 -1576.9 3153.7 0.1805 1 0.671
best.fit <- glmer(distance ~ thorax_c +(1|chamber), family = Gamma(link = "log"), data = d_T1)
tidy_regression(best.fit, is_color=output_col)
## glmer distance ~ thorax_c + (1 | chamber) d_T1 Gamma(link = "log")
## AIC: 3162.043 3162.043
## (Intercept) coeff: 7.818454 Pr(>|t|): 2.975171e-301 *
## thorax_c coeff: 1.1417757 Pr(>|t|): 0.02205273 *
s.test <- paste("pval: ", shapiro.test(residuals(best.fit))$p.value)
qqnorm(resid(best.fit))
qqline(resid(best.fit))
text(-1, 0.1, s.test)
h1 <- as.grob(expression(
hist(d_T1$distance)))
p1 <- as.grob(expression(
plot(distance ~ mass_c, data=d_T1)))
p2 <- as.grob(expression(
plot(distance ~ days_from_start_c, data=d_T1)))
p3 <- as.grob(expression(
plot(distance ~ min_from_IncStart, data=d_T1)))
p4 <- as.grob(expression(
plot(distance ~ wing_c, data=d_T1)))
p5 <- as.grob(expression(
plot(distance ~ beak_c, data=d_T1)))
p6 <- as.grob(expression(
plot(distance ~ thorax_c, data=d_T1)))
p7 <- as.grob(expression(
plot(distance ~ body_c, data=d_T1)))
grid.arrange(h1,p1,p2,p3,p4,p5,p6,p7, ncol=3)
gf_point(distance~sym_dist, col=~host_plant, alpha=~sex_c, data=d_T1)
summary <- aggregate(distance~sym_dist*sex*host_plant, data=d_T1, FUN=mean)
plot(summary$distance~summary$sym_dist,
col=c(1,2)[as.factor(summary$sex)], # Female = Black, Male = Red
pch=c(19,22)[as.factor(summary$host_plant)], # Filled circle is GRT, Open square is BV
main="T1 Observed Data: distance ~ sex*host_plant*sym_dist",
xlab = "Distance from Sympatric Zone (°)",
ylab= "Distance Flew",
#sub=eq_glmer
)
legend("topright",
legend = c("F and K.elegans", "M and C.corindum", "F and C.corindum","M and K.elegans"),
#inset=c(-0.27,0.2),
col= 1:2,
pch = c(0,16,19),
title="Groups")
Testing link functions
model_test<-glm(distance~chamber, data=d_T2, family=Gamma(link="log"))
summary(model_test)
##
## Call:
## glm(formula = distance ~ chamber, family = Gamma(link = "log"),
## data = d_T2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.8498 -2.8198 -1.1911 0.1958 3.0538
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.63088 0.36802 20.735 <2e-16 ***
## chamberA-2 0.03823 0.53472 0.072 0.943
## chamberA-3 0.63517 0.54294 1.170 0.244
## chamberA-4 -0.73274 0.50850 -1.441 0.152
## chamberB-1 0.78258 0.57352 1.365 0.175
## chamberB-2 0.07789 0.52046 0.150 0.881
## chamberB-3 0.31483 0.60098 0.524 0.601
## chamberB-4 -0.41279 0.55203 -0.748 0.456
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 2.708818)
##
## Null deviance: 658.2 on 138 degrees of freedom
## Residual deviance: 628.1 on 131 degrees of freedom
## AIC: 2208.6
##
## Number of Fisher Scoring iterations: 25
plot(model_test)
Testing experimental Covariates
####### No effect of chamber
tidy_regression(glm(distance~chamber, data=d_T2, family=Gamma(link="log")), is_color=output_col)
## glm distance ~ chamber Gamma(link = "log") d_T2
## AIC: 2208.622
## (Intercept) coeff: 7.6308764 Pr(>|t|): 3.364553e-43 *
## chamberA-2 coeff: 0.0382343 Pr(>|t|): 0.9431066
## chamberA-3 coeff: 0.6351714 Pr(>|t|): 0.2441751
## chamberA-4 coeff: -0.732738 Pr(>|t|): 0.1519739
## chamberB-1 coeff: 0.7825848 Pr(>|t|): 0.1747422
## chamberB-2 coeff: 0.077891 Pr(>|t|): 0.8812652
## chamberB-3 coeff: 0.3148289 Pr(>|t|): 0.601261
## chamberB-4 coeff: -0.4127859 Pr(>|t|): 0.4559488
####### No effect of test date
tidy_regression(glm(distance~days_from_start, data=d_T2, family=Gamma(link="log")), is_color=output_col)
## glm distance ~ days_from_start Gamma(link = "log") d_T2
## AIC: 2205.78
## (Intercept) coeff: 7.283076 Pr(>|t|): 2.764261e-18 *
## days_from_start coeff: 0.0351562 Pr(>|t|): 0.4903084
####### No effect of Minutes from IncStart time
tidy_regression(glm(distance~min_from_IncStart, data=d_T2, family=Gamma(link="log")), is_color=output_col)
## glm distance ~ min_from_IncStart Gamma(link = "log") d_T2
## AIC: 2205.068
## (Intercept) coeff: 8.0161962 Pr(>|t|): 4.769248e-67 *
## min_from_IncStart coeff: -0.0012413 Pr(>|t|): 0.207321
Without Mass
data<-data.frame(R=d_T2$distance, A=d_T2$host_c, B=d_T2$sex_c, C=d_T2$sym_dist, X=d_T2$chamber)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-Gamma glmer 3-FF log link.R"))
## [,1] [,2] [,3] [,4] [,5] [,6]
## AICs 2208.082 2209.965 2210.051 2210.063 2211.916 2211.935
## models 18 1 2 3 5 4
## probs 0.3483625 0.1358679 0.1301166 0.1293592 0.0512112 0.05074772
##
## m0 R ~ 1 + (1 | X)
## m1 R ~ A + (1 | X)
## m2 R ~ B + (1 | X)
## m3 R ~ C + (1 | X)
## m5 R ~ A + C + (1 | X)
## m4 R ~ A + B + (1 | X)
cat("Number of models that failed to converge:", length(errors$warnings))
## Number of models that failed to converge: 0
anova(m0, m1, test="Chisq") # Adding A does not improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m1: R ~ A + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 2208.1 2216.9 -1101 2202.1
## m1 4 2210.0 2221.7 -1101 2202.0 0.1169 1 0.7324
anova(m1, m4, test="Chisq") # Adding B does not improve fit
## Data: data
## Models:
## m1: R ~ A + (1 | X)
## m4: R ~ A + B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m1 4 2210.0 2221.7 -1101 2202.0
## m4 5 2211.9 2226.6 -1101 2201.9 0.0304 1 0.8617
anova(m1, m5, test="Chisq") # Adding C does not improve fit
## Data: data
## Models:
## m1: R ~ A + (1 | X)
## m5: R ~ A + C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m1 4 2210.0 2221.7 -1101 2202.0
## m5 5 2211.9 2226.6 -1101 2201.9 0.0485 1 0.8256
best.fit <- glm(distance ~ 1, family = Gamma(link = "log"), data = d_T2)
tidy_regression(best.fit, is_color=output_col)
## glm distance ~ 1 Gamma(link = "log") d_T2
## AIC: 2204.082
## 1 coeff: 7.7754958 Pr(>|t|): 1.47395e-96 *
s.test <- paste("pval: ", shapiro.test(residuals(best.fit))$p.value)
qqnorm(resid(best.fit))
qqline(resid(best.fit))
text(-1, 0.1, s.test)
With Mass
d_T2 <- d_T2 %>%
filter(!is.na(mass))
d_T2 <- center_data(d_T2)
data<-data.frame(R=d_T2$distance, A=d_T2$host_c, B=d_T2$sex_c, C=d_T2$sym_dist, D=d_T2$mass_c, X=d_T2$chamber)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-Gamma glmer 4-FF log link.R"))
## [,1] [,2] [,3] [,4] [,5] [,6]
## AICs 2208.082 2209.543 2209.965 2209.972 2210.051 2210.063
## models 113 4 1 9 2 3
## probs 0.1542606 0.07430719 0.06016456 0.0599454 0.05761776 0.05728237
## [,7]
## AICs 2210.177
## models 20
## probs 0.05410542
##
## m0 R ~ 1 + (1 | X)
## m4 R ~ D + (1 | X)
## m1 R ~ A + (1 | X)
## m9 R ~ B + D + (1 | X)
## m2 R ~ B + (1 | X)
## m3 R ~ C + (1 | X)
## m20 R ~ B * D + (1 | X)
cat("Number of models that failed to converge:", length(errors$warnings))
## Number of models that failed to converge: 2
anova(m0, m4, test="Chisq") # Adding D does not improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m4: R ~ D + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 2208.1 2216.9 -1101.0 2202.1
## m4 4 2209.5 2221.3 -1100.8 2201.5 0.5391 1 0.4628
anova(m0, m1, test="Chisq") # Adding A does not improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m1: R ~ A + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 2208.1 2216.9 -1101 2202.1
## m1 4 2210.0 2221.7 -1101 2202.0 0.1169 1 0.7324
anova(m0, m3, test="Chisq") # Ading C does not improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m3: R ~ C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 2208.1 2216.9 -1101 2202.1
## m3 4 2210.1 2221.8 -1101 2202.1 0.0187 1 0.8912
anova(m0, m2, test="Chisq") # Adding B does not improve fit; null model the best model
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m2: R ~ B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 2208.1 2216.9 -1101 2202.1
## m2 4 2210.1 2221.8 -1101 2202.1 0.0304 1 0.8616
best.fit <- glm(distance ~ 1, family = Gamma(link = "log"), data = d_T2)
tidy_regression(best.fit, is_color=output_col)
## glm distance ~ 1 Gamma(link = "log") d_T2
## AIC: 2204.082
## 1 coeff: 7.7754958 Pr(>|t|): 1.47395e-96 *
s.test <- paste("pval: ", shapiro.test(residuals(best.fit))$p.value)
qqnorm(resid(best.fit))
qqline(resid(best.fit))
text(-1, 0.1, s.test)
Morphology
d_T2 <- d_T2 %>%
filter(!is.na(body))
d_T2 <- center_data(d_T2)
data<-data.frame(R=d_T2$distance, A=d_T2$body_c, B=d_T2$wing_c, C=d_T2$thorax_c, X=d_T2$chamber)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-Gamma glmer 3-FF log link.R"))
## [,1] [,2] [,3] [,4] [,5] [,6]
## AICs 2208.082 2209.023 2209.101 2209.311 2210.239 2210.784
## models 18 2 3 1 10 4
## probs 0.1981216 0.1237692 0.1190463 0.1071628 0.06738853 0.05130382
##
## m0 R ~ 1 + (1 | X)
## m2 R ~ B + (1 | X)
## m3 R ~ C + (1 | X)
## m1 R ~ A + (1 | X)
## m10 R ~ B * C + (1 | X)
## m4 R ~ A + B + (1 | X)
cat("Number of models that failed to converge:", length(errors$warnings))
## Number of models that failed to converge: 0
anova(m0, m2, test="Chisq") # Adding B does not improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m2: R ~ B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 2208.1 2216.9 -1101.0 2202.1
## m2 4 2209.0 2220.8 -1100.5 2201.0 1.0591 1 0.3034
anova(m0, m3, test="Chisq") # Adding C does not improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m3: R ~ C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 2208.1 2216.9 -1101.0 2202.1
## m3 4 2209.1 2220.8 -1100.5 2201.1 0.9813 1 0.3219
best.fit <- glm(distance ~ 1, family = Gamma(link = "log"), data = d_T2)
tidy_regression(best.fit, is_color=output_col)
## glm distance ~ 1 Gamma(link = "log") d_T2
## AIC: 2204.082
## 1 coeff: 7.7754958 Pr(>|t|): 1.47395e-96 *
s.test <- paste("pval: ", shapiro.test(residuals(best.fit))$p.value)
qqnorm(resid(best.fit))
qqline(resid(best.fit))
text(-1, 0.1, s.test)
data<-data.frame(R=d_T2$distance, A=d_T2$wing2body, B=d_T2$thorax_c, X=d_T2$chamber)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-Gamma glmer 2-FF log link.R"))
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## [,1] [,2] [,3] [,4] [,5]
## AICs 2208.082 2209.101 2209.766 2210.406 2211.52
## models 5 2 1 3 4
## probs 0.3962349 0.2380877 0.1706826 0.1239728 0.0710221
##
## m0 R ~ 1 + (1 | X)
## m2 R ~ B + (1 | X)
## m1 R ~ A + (1 | X)
## m3 R ~ A + B + (1 | X)
## m4 R ~ A * B + (1 | X)
cat("Number of models that failed to converge:", length(errors$warnings))
## Number of models that failed to converge: 0
anova(m0, m2, test="Chisq") # Adding B does not improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m2: R ~ B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 2208.1 2216.9 -1101.0 2202.1
## m2 4 2209.1 2220.8 -1100.5 2201.1 0.9813 1 0.3219
anova(m0, m3, test="Chisq") # Adding C does not improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m3: R ~ A + B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 2208.1 2216.9 -1101.0 2202.1
## m3 5 2210.4 2225.1 -1100.2 2200.4 1.6761 2 0.4326
best.fit <- glm(distance ~ 1, family = Gamma(link = "log"), data = d_T2)
tidy_regression(best.fit, is_color=output_col)
## glm distance ~ 1 Gamma(link = "log") d_T2
## AIC: 2204.082
## 1 coeff: 7.7754958 Pr(>|t|): 1.47395e-96 *
s.test <- paste("pval: ", shapiro.test(residuals(best.fit))$p.value)
qqnorm(resid(best.fit))
qqline(resid(best.fit))
text(-1, 0.1, s.test)
h1 <- as.grob(expression(
hist(d_T2$distance)))
p1 <- as.grob(expression(
plot(distance ~ mass_c, data=d_T2)))
p2 <- as.grob(expression(
plot(distance ~ days_from_start_c, data=d_T2)))
p3 <- as.grob(expression(
plot(distance ~ min_from_IncStart, data=d_T2)))
p4 <- as.grob(expression(
plot(distance ~ wing_c, data=d_T2)))
p5 <- as.grob(expression(
plot(distance ~ beak_c, data=d_T2)))
p6 <- as.grob(expression(
plot(distance ~ thorax_c, data=d_T2)))
p7 <- as.grob(expression(
plot(distance ~ body_c, data=d_T2)))
grid.arrange(h1,p1,p2,p3,p4,p5,p6,p7, ncol=3)
gf_point(distance~sym_dist, col=~host_plant, alpha=~sex_c, data=d_T2)
summary <- aggregate(distance~sym_dist*sex*host_plant, data=d_T2, FUN=mean)
plot(summary$distance~summary$sym_dist,
col=c(1,2)[as.factor(summary$sex)], # Female = Black, Male = Red
pch=c(19,22)[as.factor(summary$host_plant)], # Filled circle is GRT, Open square is BV
main="T2 Observed Data: distance ~ sex*host_plant*sym_dist",
xlab = "Distance from Sympatric Zone (°)",
ylab= "Distance Flew",
#sub=eq_glmer
)
legend("topright",
legend = c("F and K.elegans", "M and C.corindum", "F and C.corindum","M and K.elegans"),
#inset=c(-0.27,0.2),
col= 1:2,
pch = c(0,16,19),
title="Groups")
Testing link functions
model_test<-glm(distance~chamber, data=d_fem, family=Gamma(link="log"))
## Warning: glm.fit: algorithm did not converge
summary(model_test)
##
## Call:
## glm(formula = distance ~ chamber, family = Gamma(link = "log"),
## data = d_fem)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.775 -2.748 -1.325 0.358 2.769
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.1371 0.8483 4.877 7.64e-06 ***
## chamberA-2 4.2412 0.9795 4.330 5.45e-05 ***
## chamberA-3 3.6666 0.9947 3.686 0.000476 ***
## chamberA-4 3.2755 0.9411 3.481 0.000914 ***
## chamberB-1 4.8294 0.9795 4.931 6.28e-06 ***
## chamberB-2 9.1827 0.9947 9.232 2.58e-13 ***
## chamberB-3 4.0452 0.9947 4.067 0.000135 ***
## chamberB-4 3.1403 0.9411 3.337 0.001424 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 2.158639)
##
## Null deviance: 334.04 on 70 degrees of freedom
## Residual deviance: 364.18 on 63 degrees of freedom
## AIC: 1190
##
## Number of Fisher Scoring iterations: 25
plot(model_test)
Testing covariates
####### Effect of chamber A-1, B-2, and marginal effect of B-4 (also algorithm did not converge)
d_fem$chamber <- relevel(d_fem$chamber, ref="A-2")
tidy_regression(glm(distance~chamber, data=d_fem, family=Gamma(link="log")), is_color=output_col)
## Warning: glm.fit: algorithm did not converge
## glm distance ~ chamber Gamma(link = "log") d_fem
## AIC: 1189.956
## (Intercept) coeff: 8.3783002 Pr(>|t|): 2.305649e-25 *
## chamberA-1 coeff: -4.2412064 Pr(>|t|): 5.446826e-05 *
## chamberA-3 coeff: -0.5745623 Pr(>|t|): 0.4239609
## chamberA-4 coeff: -0.9656697 Pr(>|t|): 0.1345908
## chamberB-1 coeff: 0.5882037 Pr(>|t|): 0.3989487
## chamberB-2 coeff: 4.9414892 Pr(>|t|): 2.736347e-09 *
## chamberB-3 coeff: -0.1960001 Pr(>|t|): 0.7845667
## chamberB-4 coeff: -1.1008694 Pr(>|t|): 0.08889911 .
####### No effect of test date
tidy_regression(glm(distance~days_from_start, data=d_fem, family=Gamma(link="log")), is_color=output_col)
## glm distance ~ days_from_start Gamma(link = "log") d_fem
## AIC: 1167.3
## (Intercept) coeff: 8.3845521 Pr(>|t|): 2.592199e-37 *
## days_from_start coeff: -0.0368596 Pr(>|t|): 0.1572935
####### Marginal effect of Minutes from IncStart time
tidy_regression(glm(distance~min_from_IncStart, data=d_fem, family=Gamma(link="log")), is_color=output_col)
## glm distance ~ min_from_IncStart Gamma(link = "log") d_fem
## AIC: 1167.291
## (Intercept) coeff: 8.4058366 Pr(>|t|): 9.627332e-40 *
## min_from_IncStart coeff: -0.0022066 Pr(>|t|): 0.08940462 .
#### No effect of eggs laid
tidy_regression(glm(distance~eggs_b, data=d_fem, family=Gamma(link="log")), is_color=output_col)
## glm distance ~ eggs_b Gamma(link = "log") d_fem
## AIC: 1168.935
## (Intercept) coeff: 8.0201812 Pr(>|t|): 1.0538e-46 *
## eggs_b coeff: 0.0652519 Pr(>|t|): 0.8698485
### No effect of total eggs laid
tidy_regression(glm(distance~total_eggs, data=d_fem, family=Gamma(link="log")), is_color=output_col)
## glm distance ~ total_eggs Gamma(link = "log") d_fem
## AIC: 887.4532
## (Intercept) coeff: 8.2954087 Pr(>|t|): 2.489175e-36 *
## total_eggs coeff: -0.0020464 Pr(>|t|): 0.4708562
Without Mass
data<-data.frame(R=d_fem$distance, A=d_fem$host_c, B=d_fem$sym_dist, X=d_fem$chamber)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-Gamma glmer 2-FF log link.R"))
## [,1] [,2] [,3] [,4]
## AICs 1170.955 1172.908 1172.927 1174.739
## models 5 1 2 3
## probs 0.5111442 0.1925138 0.1906746 0.07705344
##
## m0 R ~ 1 + (1 | X)
## m1 R ~ A + (1 | X)
## m2 R ~ B + (1 | X)
## m3 R ~ A + B + (1 | X)
cat("Number of models that failed to converge:", length(errors$warnings))
## Number of models that failed to converge: 0
anova(m0, m2, test="Chisq") # Adding B does not improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m2: R ~ B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 1171.0 1177.7 -582.48 1165.0
## m2 4 1172.9 1182.0 -582.46 1164.9 0.0278 1 0.8675
anova(m0, m1, test="Chisq") # Adding A does not improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m1: R ~ A + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 1171.0 1177.7 -582.48 1165.0
## m1 4 1172.9 1182.0 -582.45 1164.9 0.047 1 0.8283
anova(m0, m3, test="Chisq") # Adding C does not improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m3: R ~ A + B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 1171.0 1177.7 -582.48 1165.0
## m3 5 1174.7 1186.0 -582.37 1164.7 0.2157 2 0.8978
best.fit <- glm(distance ~ 1, family = Gamma(link = "log"), data = d_fem)
tidy_regression(best.fit, is_color=output_col)
## glm distance ~ 1 Gamma(link = "log") d_fem
## AIC: 1166.955
## 1 coeff: 8.0408591 Pr(>|t|): 6.160243e-53 *
s.test <- paste("pval: ", shapiro.test(residuals(best.fit))$p.value)
qqnorm(resid(best.fit))
qqline(resid(best.fit))
text(-1, 0.1, s.test)
With Mass
d_fem <- d_fem %>%
filter(!is.na(mass))
d_fem <- center_data(d_fem)
data<-data.frame(R=d_fem$distance, A=d_fem$host_c, B=d_fem$sym_dist, C=d_fem$mass_c, X=d_fem$chamber)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-Gamma glmer 3-FF log link.R"))
## [,1] [,2] [,3] [,4] [,5] [,6]
## AICs 1156.411 1157.122 1158.344 1158.391 1158.961 1159.121
## models 18 3 1 2 5 6
## probs 0.2567159 0.1798691 0.09761403 0.09534921 0.07171886 0.0662067
## [,7]
## AICs 1159.397
## models 9
## probs 0.05767738
##
## m0 R ~ 1 + (1 | X)
## m3 R ~ C + (1 | X)
## m1 R ~ A + (1 | X)
## m2 R ~ B + (1 | X)
## m5 R ~ A + C + (1 | X)
## m6 R ~ B + C + (1 | X)
## m9 R ~ A * C + (1 | X)
cat("Number of models that failed to converge:", length(errors$warnings))
## Number of models that failed to converge: 0
anova(m0, m3, test="Chisq") # Adding C does not improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m3: R ~ C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 1156.4 1163.2 -575.21 1150.4
## m3 4 1157.1 1166.1 -574.56 1149.1 1.2885 1 0.2563
anova(m3, m6, test="Chisq") # Adding B does not improve fit
## Data: data
## Models:
## m3: R ~ C + (1 | X)
## m6: R ~ B + C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m3 4 1157.1 1166.1 -574.56 1149.1
## m6 5 1159.1 1170.4 -574.56 1149.1 0.0011 1 0.9735
anova(m3, m5, test="Chisq") # Adding A does not improve fit
## Data: data
## Models:
## m3: R ~ C + (1 | X)
## m5: R ~ A + C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m3 4 1157.1 1166.1 -574.56 1149.1
## m5 5 1159.0 1170.2 -574.48 1149.0 0.161 1 0.6882
best.fit <- glm(distance ~ 1, family = Gamma(link = "log"), data = d_fem)
tidy_regression(best.fit, is_color=output_col)
## glm distance ~ 1 Gamma(link = "log") d_fem
## AIC: 1152.411
## 1 coeff: 8.0544993 Pr(>|t|): 1.593179e-52 *
s.test <- paste("pval: ", shapiro.test(residuals(best.fit))$p.value)
qqnorm(resid(best.fit))
qqline(resid(best.fit))
text(-1, 0.1, s.test)
Min From Inc Start
data<-data.frame(R=d_fem$distance, A=d_fem$min_from_IncStart_s, B=d_fem$sym_dist_s, C=d_fem$mass_s, X=d_fem$chamber)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-Gamma glmer 3-FF log link.R"))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## AICs 1156.411 1157.105 1157.122 1157.944 1158.391 1159.097 1159.121
## models 18 1 3 5 2 4 6
## probs 0.217025 0.153332 0.1520595 0.1008294 0.08060724 0.05663251 0.05597046
##
## m0 R ~ 1 + (1 | X)
## m1 R ~ A + (1 | X)
## m3 R ~ C + (1 | X)
## m5 R ~ A + C + (1 | X)
## m2 R ~ B + (1 | X)
## m4 R ~ A + B + (1 | X)
## m6 R ~ B + C + (1 | X)
cat("Number of models that failed to converge:", length(errors$warnings))
## Number of models that failed to converge: 0
anova(m0, m1, test="Chisq") # Adding A does not improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m1: R ~ A + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 1156.4 1163.2 -575.21 1150.4
## m1 4 1157.1 1166.1 -574.55 1149.1 1.3052 1 0.2533
anova(m0, m3, test="Chisq") # Adding C does not impove fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m3: R ~ C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 1156.4 1163.2 -575.21 1150.4
## m3 4 1157.1 1166.1 -574.56 1149.1 1.2885 1 0.2563
anova(m0, m2, test="Chisq") # Adding B does not improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m2: R ~ B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 1156.4 1163.2 -575.21 1150.4
## m2 4 1158.4 1167.4 -575.20 1150.4 0.0192 1 0.8899
best.fit <- glm(distance ~ 1, family = Gamma(link = "log"), data = d_fem)
tidy_regression(best.fit, is_color=output_col)
## glm distance ~ 1 Gamma(link = "log") d_fem
## AIC: 1152.411
## 1 coeff: 8.0544993 Pr(>|t|): 1.593179e-52 *
s.test <- paste("pval: ", shapiro.test(residuals(best.fit))$p.value)
qqnorm(resid(best.fit))
qqline(resid(best.fit))
text(-1, 0.1, s.test)
Morphology
d_fem <- d_fem %>%
filter(!is.na(mass))
d_fem <- center_data(d_fem)
data<-data.frame(R=d_fem$distance, A=d_fem$body_c, B=d_fem$wing_c, C=d_fem$thorax_c, X=d_fem$chamber)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-Gamma glmer 3-FF log link.R"))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## AICs 1146.469 1147.011 1147.097 1147.313 1147.94 1148.59 1148.604
## models 16 11 13 10 17 9 15
## probs 0.1997698 0.1523673 0.1459453 0.1310071 0.09576182 0.06918828 0.06870863
## [,8] [,9]
## AICs 1148.949 1149.01
## models 12 14
## probs 0.05781565 0.05608072
##
## m16 R ~ A * C + B * C + (1 | X)
## m11 R ~ A * B + C + (1 | X)
## m13 R ~ B * C + A + (1 | X)
## m10 R ~ B * C + (1 | X)
## m17 R ~ A * B + A * C + B * C + (1 | X)
## m9 R ~ A * C + (1 | X)
## m15 R ~ A * B + B * C + (1 | X)
## m12 R ~ A * C + B + (1 | X)
## m14 R ~ A * B + A * C + (1 | X)
cat("Number of models that failed to converge:", length(errors$warnings))
## Number of models that failed to converge: 1
anova(m13, m16, test="Chisq") # Adding A*C does not improve fit
## Data: data
## Models:
## m13: R ~ B * C + A + (1 | X)
## m16: R ~ A * C + B * C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m13 7 1147.1 1162.8 -566.55 1133.1
## m16 8 1146.5 1164.5 -565.23 1130.5 2.6279 1 0.105
anova(m15, m16, test="Chisq") # Adding B*C does improve fit
## Data: data
## Models:
## m15: R ~ A * B + B * C + (1 | X)
## m16: R ~ A * C + B * C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m15 8 1148.6 1166.6 -566.30 1132.6
## m16 8 1146.5 1164.5 -565.23 1130.5 2.1346 0 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
best.fit <- glmer(distance ~ wing_c*thorax_c + body_c + (1|chamber), family = Gamma(link = "log"), data = d_fem)
## boundary (singular) fit: see ?isSingular
tidy_regression(best.fit, is_color=output_col)
## glmer distance ~ wing_c * thorax_c + body_c + (1 | chamber) d_fem Gamma(link = "log")
## AIC: 1147.097 1147.097
## (Intercept) coeff: 8.3502545 Pr(>|t|): 9.567285e-273 *
## wing_c coeff: 2.4752182 Pr(>|t|): 0.133795
## thorax_c coeff: 5.2488674 Pr(>|t|): 0.006770463 *
## body_c coeff: -2.3662178 Pr(>|t|): 0.1237028
## wing_c:thorax_c coeff: -4.1743786 Pr(>|t|): 1.41786e-07 *
s.test <- paste("pval: ", shapiro.test(residuals(best.fit))$p.value)
qqnorm(resid(best.fit))
qqline(resid(best.fit))
text(-1, 0.1, s.test)
d_fem <- d_fem %>%
filter(!is.na(mass))
d_fem <- center_data(d_fem)
data<-data.frame(R=d_fem$distance, A=d_fem$wing2body, B=d_fem$thorax_c, X=d_fem$chamber)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-Gamma glmer 2-FF log link.R"))
## [,1]
## AICs 1147.522
## models 4
## probs 0.9400417
##
## m4 R ~ A * B + (1 | X)
cat("Number of models that failed to converge:", length(errors$warnings))
## Number of models that failed to converge: 0
best.fit <- glmer(distance ~ wing2body*thorax_c + (1|chamber), family = Gamma(link = "log"), data = d_fem)
## boundary (singular) fit: see ?isSingular
tidy_regression(best.fit, is_color=output_col) # the coeffients are crazy large
## glmer distance ~ wing2body * thorax_c + (1 | chamber) d_fem Gamma(link = "log")
## AIC: 1147.522 1147.522
## (Intercept) coeff: -4.5550138 Pr(>|t|): 0.7621587
## wing2body coeff: 17.6741654 Pr(>|t|): 0.4011636
## thorax_c coeff: 237.6687481 Pr(>|t|): 4.936288e-07 *
## wing2body:thorax_c coeff: -327.3587622 Pr(>|t|): 6.068854e-07 *
s.test <- paste("pval: ", shapiro.test(residuals(best.fit))$p.value)
qqnorm(resid(best.fit))
qqline(resid(best.fit))
text(-1, 0.1, s.test)
Eggs
d_fem <- d_fem %>%
filter(!is.na(mass))
d_fem <- center_data(d_fem)
data<-data.frame(R=d_fem$distance, A=d_fem$eggs_b, B=d_fem$mass_c, C=d_fem$thorax_c, X=d_fem$chamber) # adding D=d_fem$wing2body_c leads to a massive converging error
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-Gamma glmer 3-FF log link.R"))
## [,1] [,2] [,3] [,4]
## AICs 1144.766 1145.28 1145.762 1146.565
## models 16 13 15 17
## probs 0.3414776 0.2640009 0.20753 0.1388907
##
## m16 R ~ A * C + B * C + (1 | X)
## m13 R ~ B * C + A + (1 | X)
## m15 R ~ A * B + B * C + (1 | X)
## m17 R ~ A * B + A * C + B * C + (1 | X)
cat("Number of models that failed to converge:", length(errors$warnings))
## Number of models that failed to converge: 0
anova(m13, m16, test="Chisq") # Adding A*C does not improve fit
## Data: data
## Models:
## m13: R ~ B * C + A + (1 | X)
## m16: R ~ A * C + B * C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m13 7 1145.3 1161.0 -565.64 1131.3
## m16 8 1144.8 1162.8 -564.38 1128.8 2.5147 1 0.1128
best.fit <- glmer(distance ~ mass_c * thorax_c + eggs_b + (1|chamber), family = Gamma(link = "log"), data = d_fem)
## boundary (singular) fit: see ?isSingular
tidy_regression(best.fit, is_color=output_col) # the coeffients are crazy large
## glmer distance ~ mass_c * thorax_c + eggs_b + (1 | chamber) d_fem Gamma(link = "log")
## AIC: 1145.28 1145.28
## (Intercept) coeff: 7.6864677 Pr(>|t|): 3.64821e-175 *
## mass_c coeff: -37.7874322 Pr(>|t|): 0.005697995 *
## thorax_c coeff: 4.6818129 Pr(>|t|): 6.693082e-05 *
## eggs_b coeff: 1.4369808 Pr(>|t|): 0.01810517 *
## mass_c:thorax_c coeff: -220.0590346 Pr(>|t|): 0.0004688367 *
s.test <- paste("pval: ", shapiro.test(residuals(best.fit))$p.value)
qqnorm(resid(best.fit))
qqline(resid(best.fit))
text(-1, 0.1, s.test)
h1 <- as.grob(expression(
hist(d_fem$distance)))
p1 <- as.grob(expression(
plot(distance ~ mass_c, data=d_fem)))
p2 <- as.grob(expression(
plot(distance ~ days_from_start_c, data=d_fem)))
p3 <- as.grob(expression(
plot(distance ~ min_from_IncStart, data=d_fem)))
p4 <- as.grob(expression(
plot(distance ~ wing_c, data=d_fem)))
p5 <- as.grob(expression(
plot(distance ~ beak_c, data=d_fem)))
p6 <- as.grob(expression(
plot(distance ~ thorax_c, data=d_fem)))
p7 <- as.grob(expression(
plot(distance ~ body_c, data=d_fem)))
p8 <- as.grob(expression(
plot(distance ~ eggs_b, data=d_fem)))
grid.arrange(h1,p1,p2,p3,p4,p5,p6,p7,p8, ncol=3)
gf_point(distance~sym_dist, col=~host_plant, alpha=~sex_c, data=d_fem)
summary <- aggregate(distance~sym_dist*sex*host_plant, data=d_fem, FUN=mean)
plot(summary$distance~summary$sym_dist,
#col=c(1,2)[as.factor(summary$sex)], # Female = Black, Male = Red
pch=c(19,22)[as.factor(summary$host_plant)], # Filled circle is GRT, Open square is BV
main="Female Observed Data: distance ~ sex*host_plant*sym_dist",
xlab = "Distance from Sympatric Zone (°)",
ylab= "Distance Flew",
#sub=eq_glmer
)
legend("topright",
legend = c("F and K.elegans","F and C.corindum"),
#inset=c(-0.27,0.2),
col= 1,
pch = c(0,16,19),
title="Groups")
Testing link functions
model_test<-glm(distance~chamber, data=d_male, family=Gamma(link="log"))
summary(model_test)
##
## Call:
## glm(formula = distance ~ chamber, family = Gamma(link = "log"),
## data = d_male)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.7686 -2.8238 -1.2142 0.1598 3.6257
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.372380 0.336174 24.905 < 2e-16 ***
## chamberA-2 -0.528857 0.448670 -1.179 0.239595
## chamberA-3 -0.475296 0.430648 -1.104 0.270762
## chamberA-4 -1.537467 0.417652 -3.681 0.000283 ***
## chamberB-1 0.009289 0.480349 0.019 0.984586
## chamberB-2 -0.910431 0.422751 -2.154 0.032199 *
## chamberB-3 -0.855647 0.455182 -1.880 0.061264 .
## chamberB-4 -0.736305 0.466535 -1.578 0.115736
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 2.825331)
##
## Null deviance: 1260.8 on 265 degrees of freedom
## Residual deviance: 1201.5 on 258 degrees of freedom
## AIC: 4204.3
##
## Number of Fisher Scoring iterations: 19
plot(model_test)
Testing covariates
####### Effect of chamber A-4
d_male$chamber <- relevel(d_male$chamber, ref="A-2")
tidy_regression(glm(distance~chamber, data=d_male, family=Gamma(link="log")), is_color=output_col)
## glm distance ~ chamber Gamma(link = "log") d_male
## AIC: 4204.251
## (Intercept) coeff: 7.8435232 Pr(>|t|): 2.845936e-75 *
## chamberA-1 coeff: 0.5288572 Pr(>|t|): 0.239595
## chamberA-3 coeff: 0.0535614 Pr(>|t|): 0.8938257
## chamberA-4 coeff: -1.0086102 Pr(>|t|): 0.009673145 *
## chamberB-1 coeff: 0.5381463 Pr(>|t|): 0.2368559
## chamberB-2 coeff: -0.3815741 Pr(>|t|): 0.331787
## chamberB-3 coeff: -0.3267894 Pr(>|t|): 0.4449586
## chamberB-4 coeff: -0.2074475 Pr(>|t|): 0.6371224
####### No effect of test date
tidy_regression(glm(distance~days_from_start, data=d_male, family=Gamma(link="log")), is_color=output_col)
## glm distance ~ days_from_start Gamma(link = "log") d_male
## AIC: 4210.209
## (Intercept) coeff: 7.618847 Pr(>|t|): 3.945263e-115 *
## days_from_start coeff: 0.0133765 Pr(>|t|): 0.3637083
####### Effect of Minutes from IncStart time
tidy_regression(glm(distance~min_from_IncStart, data=d_male, family=Gamma(link="log")), is_color=output_col)
## glm distance ~ min_from_IncStart Gamma(link = "log") d_male
## AIC: 4207.366
## (Intercept) coeff: 8.0297648 Pr(>|t|): 5.940797e-133 *
## min_from_IncStart coeff: -0.0015705 Pr(>|t|): 0.03088531 *
#### No effect of eggs laid
tidy_regression(glm(distance~eggs_b, data=d_male, family=Gamma(link="log")), is_color=output_col)
## glm distance ~ eggs_b Gamma(link = "log") d_male
## AIC: 4208.864
## 1 coeff: 7.7669607 Pr(>|t|): 6.735041e-182 *
Without Mass
data<-data.frame(R=d_male$distance, A=d_male$host_c, B=d_male$sym_dist, X=d_male$chamber)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-Gamma glmer 2-FF log link.R"))
## [,1] [,2] [,3] [,4]
## AICs 4217.296 4218.924 4219.24 4220.453
## models 5 1 2 3
## probs 0.4740966 0.2100251 0.1793261 0.0977906
##
## m0 R ~ 1 + (1 | X)
## m1 R ~ A + (1 | X)
## m2 R ~ B + (1 | X)
## m3 R ~ A + B + (1 | X)
cat("Number of models that failed to converge:", length(errors$warnings))
## Number of models that failed to converge: 1
anova(m0, m2, test="Chisq") # Adding B does not improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m2: R ~ B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 4217.3 4228.0 -2105.7 4211.3
## m2 4 4219.2 4233.6 -2105.6 4211.2 0.0556 1 0.8136
anova(m0, m1, test="Chisq") # Adding A does not improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m1: R ~ A + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 4217.3 4228.0 -2105.7 4211.3
## m1 4 4218.9 4233.3 -2105.5 4210.9 0.3716 1 0.5421
anova(m0, m3, test="Chisq") # Adding C does not improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m3: R ~ A + B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 4217.3 4228.0 -2105.7 4211.3
## m3 5 4220.5 4238.4 -2105.2 4210.5 0.8428 2 0.6561
best.fit <- glm(distance ~ 1, family = Gamma(link = "log"), data = d_male)
tidy_regression(best.fit, is_color=output_col)
## glm distance ~ 1 Gamma(link = "log") d_male
## AIC: 4208.864
## 1 coeff: 7.7669607 Pr(>|t|): 6.735041e-182 *
s.test <- paste("pval: ", shapiro.test(residuals(best.fit))$p.value)
qqnorm(resid(best.fit))
qqline(resid(best.fit))
text(-1, 0.1, s.test)
With Mass
d_male <- d_male %>%
filter(!is.na(mass))
d_male <- center_data(d_male)
data<-data.frame(R=d_male$distance, A=d_male$host_c, B=d_male$sym_dist, C=d_male$mass_c, X=d_male$chamber)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-Gamma glmer 3-FF log link.R"))
## [,1] [,2] [,3] [,4] [,5] [,6]
## AICs 4215.425 4216.027 4216.943 4217.296 4217.482 4217.736
## models 3 5 6 18 7 9
## probs 0.2191577 0.1622013 0.1026281 0.08601288 0.07836854 0.0690146
##
## m3 R ~ C + (1 | X)
## m5 R ~ A + C + (1 | X)
## m6 R ~ B + C + (1 | X)
## m0 R ~ 1 + (1 | X)
## m7 R ~ A + B + C + (1 | X)
## m9 R ~ A * C + (1 | X)
cat("Number of models that failed to converge:", length(errors$warnings))
## Number of models that failed to converge: 4
anova(m0, m3, test="Chisq") # Adding C improves fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m3: R ~ C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 4217.3 4228.0 -2105.7 4211.3
## m3 4 4215.4 4229.8 -2103.7 4207.4 3.8706 1 0.04914 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m3, m5, test="Chisq") # Adding A does not improve fit
## Data: data
## Models:
## m3: R ~ C + (1 | X)
## m5: R ~ A + C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m3 4 4215.4 4229.8 -2103.7 4207.4
## m5 5 4216.0 4233.9 -2103.0 4206.0 1.3981 1 0.237
best.fit <- glm(distance ~ mass_c, family = Gamma(link = "log"), data = d_male)
tidy_regression(best.fit, is_color=output_col)
## glm distance ~ mass_c Gamma(link = "log") d_male
## AIC: 4208.523
## (Intercept) coeff: 7.7527125 Pr(>|t|): 5.624763e-180 *
## mass_c coeff: 26.9125719 Pr(>|t|): 0.08121491 .
s.test <- paste("pval: ", shapiro.test(residuals(best.fit))$p.value)
qqnorm(resid(best.fit))
qqline(resid(best.fit))
text(-1, 0.1, s.test)
Min From Inc Start
data<-data.frame(R=d_male$distance, A=d_male$min_from_IncStart_s, B=d_male$sym_dist_s, C=d_male$mass_s, X=d_male$chamber)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-Gamma glmer 3-FF log link.R"))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## AICs 4214.843 4214.848 4215.425 4216.699 4216.757 4216.769 4216.943
## models 5 9 3 7 1 12 6
## probs 0.1626666 0.1622848 0.1215637 0.06430577 0.06246035 0.0621024 0.05692638
## [,8]
## AICs 4216.947
## models 11
## probs 0.05680757
##
## m5 R ~ A + C + (1 | X)
## m9 R ~ A * C + (1 | X)
## m3 R ~ C + (1 | X)
## m7 R ~ A + B + C + (1 | X)
## m1 R ~ A + (1 | X)
## m12 R ~ A * C + B + (1 | X)
## m6 R ~ B + C + (1 | X)
## m11 R ~ A * B + C + (1 | X)
cat("Number of models that failed to converge:", length(errors$warnings))
## Number of models that failed to converge: 0
anova(m5, m9, test="Chisq") # adding A*C does not improve fit
## Data: data
## Models:
## m5: R ~ A + C + (1 | X)
## m9: R ~ A * C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m5 5 4214.8 4232.8 -2102.4 4204.8
## m9 6 4214.8 4236.3 -2101.4 4202.8 1.9953 1 0.1578
anova(m3, m5, test="Chisq") # Adding A does not improve fit
## Data: data
## Models:
## m3: R ~ C + (1 | X)
## m5: R ~ A + C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m3 4 4215.4 4229.8 -2103.7 4207.4
## m5 5 4214.8 4232.8 -2102.4 4204.8 2.5825 1 0.108
anova(m0, m1, test="Chisq") # Adding A does not improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m1: R ~ A + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 4217.3 4228.0 -2105.7 4211.3
## m1 4 4216.8 4231.1 -2104.4 4208.8 2.5388 1 0.1111
anova(m0, m3, test="Chisq") # Adding C improves fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m3: R ~ C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 4217.3 4228.0 -2105.7 4211.3
## m3 4 4215.4 4229.8 -2103.7 4207.4 3.8706 1 0.04914 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Do we go with anova or AIC? I went with anova step-wise comparisons
best.fit <- glmer(distance ~ mass_s + (1|chamber), family = Gamma(link = "log"), data = d_fem)
## boundary (singular) fit: see ?isSingular
tidy_regression(best.fit, is_color=output_col)
## glmer distance ~ mass_s + (1 | chamber) d_fem Gamma(link = "log")
## AIC: 1157.122 1157.122
## (Intercept) coeff: 8.0249416 Pr(>|t|): 0 *
## mass_s coeff: -0.2856174 Pr(>|t|): 0.240334
s.test <- paste("pval: ", shapiro.test(residuals(best.fit))$p.value)
qqnorm(resid(best.fit))
qqline(resid(best.fit))
text(-1, 0.1, s.test)
Morphology
d_male <- d_male %>%
filter(!is.na(body))
d_male <- center_data(d_male)
data<-data.frame(R=d_male$distance, A=d_male$body_c, B=d_male$wing_c, C=d_male$thorax_c, X=d_male$chamber)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-Gamma glmer 3-FF log link.R"))
## [,1] [,2] [,3] [,4] [,5] [,6]
## AICs 4208.922 4209.268 4210.413 4210.943 4210.981 4211.369
## models 9 10 12 1 13 2
## probs 0.2092432 0.1759886 0.09929567 0.07617022 0.07473462 0.06155006
##
## m9 R ~ A * C + (1 | X)
## m10 R ~ B * C + (1 | X)
## m12 R ~ A * C + B + (1 | X)
## m1 R ~ A + (1 | X)
## m13 R ~ B * C + A + (1 | X)
## m2 R ~ B + (1 | X)
cat("Number of models that failed to converge:", length(errors$warnings))
## Number of models that failed to converge: 0
anova(m9, m12, test="Chisq") # Adding B does not improve fit
## Data: data
## Models:
## m9: R ~ A * C + (1 | X)
## m12: R ~ A * C + B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m9 6 4208.9 4230.4 -2098.5 4196.9
## m12 7 4210.4 4235.5 -2098.2 4196.4 0.5092 1 0.4755
best.fit <- glmer(distance ~ body_c*thorax_c + (1|chamber), family = Gamma(link = "log"), data = d_male)
tidy_regression(best.fit, is_color=output_col)
## glmer distance ~ body_c * thorax_c + (1 | chamber) d_male Gamma(link = "log")
## AIC: 4208.922 4208.922
## (Intercept) coeff: 7.8807005 Pr(>|t|): 0 *
## body_c coeff: 0.7235282 Pr(>|t|): 0.03223517 *
## thorax_c coeff: -1.0900972 Pr(>|t|): 0.3426866
## body_c:thorax_c coeff: -2.4635727 Pr(>|t|): 0.01603995 *
positive effect of body, where the longer the body the more likely the bug will disperse farther
negative effect of thorax where the wider the thorax, the less likely the bug will disperse farther
negative effect of body*thorax where the wider the thorax and longer the body, the less likely the bug will disperse farther distances
s.test <- paste("pval: ", shapiro.test(residuals(best.fit))$p.value)
qqnorm(resid(best.fit))
qqline(resid(best.fit))
text(-1, 0.1, s.test)
data<-data.frame(R=d_male$distance, A=d_male$wing2body, B=d_male$thorax_c, X=d_male$chamber)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-Gamma glmer 2-FF log link.R"))
## [,1] [,2] [,3] [,4] [,5]
## AICs 4216.041 4217.296 4217.733 4218.403 4219.606
## models 2 5 3 1 4
## probs 0.4101434 0.2190038 0.1759692 0.1258788 0.06900482
##
## m2 R ~ B + (1 | X)
## m0 R ~ 1 + (1 | X)
## m3 R ~ A + B + (1 | X)
## m1 R ~ A + (1 | X)
## m4 R ~ A * B + (1 | X)
cat("Number of models that failed to converge:", length(errors$warnings))
## Number of models that failed to converge: 1
anova(m0, m2, test="Chisq") # Adding B marginally improves fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m2: R ~ B + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 4217.3 4228.0 -2105.7 4211.3
## m2 4 4216.0 4230.4 -2104.0 4208.0 3.2548 1 0.07121 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m0, m1, test="Chisq") # Adding A does not improve fit
## Data: data
## Models:
## m0: R ~ 1 + (1 | X)
## m1: R ~ A + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m0 3 4217.3 4228.0 -2105.7 4211.3
## m1 4 4218.4 4232.7 -2105.2 4210.4 0.8925 1 0.3448
best.fit <- glmer(distance ~ thorax_c + (1|chamber), family = Gamma(link = "log"), data = d_male)
tidy_regression(best.fit, is_color=output_col)
## glmer distance ~ thorax_c + (1 | chamber) d_male Gamma(link = "log")
## AIC: 4216.041 4216.041
## (Intercept) coeff: 7.7069979 Pr(>|t|): 0 *
## thorax_c coeff: 1.307645 Pr(>|t|): 0.06927084 .
*marginal positive effect of thorax where the wider the thorax the more likely the bug will disperse far.
s.test <- paste("pval: ", shapiro.test(residuals(best.fit))$p.value)
qqnorm(resid(best.fit))
qqline(resid(best.fit))
text(-1, 0.1, s.test)
data<-data.frame(R=d_male$distance, A=d_male$wing2body_s, B=d_male$thorax_s, C=d_male$mass_s, X=d_male$chamber)
source("src/compare_models.R")
errors <- withWarnings(model_comparisonsAIC("src/generic models-Gamma glmer 3-FF log link.R"))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## AICs 4215.425 4215.553 4216.041 4216.725 4216.899 4217.206 4217.296
## models 3 10 2 6 5 9 18
## probs 0.153712 0.1441791 0.1129792 0.0802498 0.07356673 0.06310452 0.06032738
## [,8]
## AICs 4217.473
## models 13
## probs 0.05522882
##
## m3 R ~ C + (1 | X)
## m10 R ~ B * C + (1 | X)
## m2 R ~ B + (1 | X)
## m6 R ~ B + C + (1 | X)
## m5 R ~ A + C + (1 | X)
## m9 R ~ A * C + (1 | X)
## m0 R ~ 1 + (1 | X)
## m13 R ~ B * C + A + (1 | X)
cat("Number of models that failed to converge:", length(errors$warnings))
## Number of models that failed to converge: 1
anova(m6, m10, test="Chisq") # Adding B*C marginally improves fit
## Data: data
## Models:
## m6: R ~ B + C + (1 | X)
## m10: R ~ B * C + (1 | X)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m6 5 4216.7 4234.6 -2103.4 4206.7
## m10 6 4215.6 4237.1 -2101.8 4203.6 3.1718 1 0.07492 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
best.fit <- glmer(distance ~ mass_s * thorax_s + (1|chamber), family = Gamma(link = "log"), data = d_male)
tidy_regression(best.fit, is_color=output_col)
## glmer distance ~ mass_s * thorax_s + (1 | chamber) d_male Gamma(link = "log")
## AIC: 4215.553 4215.553
## (Intercept) coeff: 7.818271 Pr(>|t|): 0 *
## mass_s coeff: 0.1917155 Pr(>|t|): 0.1894174
## thorax_s coeff: 0.0820825 Pr(>|t|): 0.61803
## mass_s:thorax_s coeff: -0.2161038 Pr(>|t|): 0.06425545 .
s.test <- paste("pval: ", shapiro.test(residuals(best.fit))$p.value)
qqnorm(resid(best.fit))
qqline(resid(best.fit))
text(-1, 0.1, s.test)
h1 <- as.grob(expression(
hist(d_male$distance)))
p1 <- as.grob(expression(
plot(distance ~ mass_c, data=d_male)))
p2 <- as.grob(expression(
plot(distance ~ days_from_start_c, data=d_male)))
p3 <- as.grob(expression(
plot(distance ~ min_from_IncStart, data=d_male)))
p4 <- as.grob(expression(
plot(distance ~ wing_c, data=d_male)))
p5 <- as.grob(expression(
plot(distance ~ beak_c, data=d_male)))
p6 <- as.grob(expression(
plot(distance ~ thorax_c, data=d_male)))
p7 <- as.grob(expression(
plot(distance ~ body_c, data=d_male)))
grid.arrange(h1,p1,p2,p3,p4,p5,p6,p7, ncol=3)
gf_point(distance~sym_dist, col=~host_plant, alpha=~sex_c, data=d_male)
summary <- aggregate(distance~sym_dist*sex*host_plant, data=d_male, FUN=mean)
plot(summary$distance~summary$sym_dist,
#col=c(1,2)[as.factor(summary$sex)], # Female = Black, Male = Red
pch=c(19,22)[as.factor(summary$host_plant)], # Filled circle is GRT, Open square is BV
main="Male Observed Data: distance ~ sex*host_plant*sym_dist",
xlab = "Distance from Sympatric Zone (°)",
ylab= "Distance Flew",
#sub=eq_glmer
)
legend("topright",
legend = c("M and K.elegans","M and C.corindum"),
#inset=c(-0.27,0.2),
col= 1,
pch = c(0,16,19),
title="Groups")