#knitr::purl("wing_stats.Rmd", output = "wing_stats.R") # convert Rmd to R script
Base statistics of all morphology for wing morph and wing2body ratio (in long-winged bugs).
Conclusion from the wing_timeseries.Rmd: Found no significant temporal dependencies. Determined that most time series were nonstationary and all ts were an AR(0) process [white noise with moving average]. Concluded that the time values were independent of each other.
source_path = "~/Desktop/git_repositories/SBB-dispersal/avbernat/Rsrc/"
script_names = c("compare_models.R",
"regression_output.R",
"clean_morph_data.R", # two functions: read_morph_data and remove_torn_wings
"AICprobabilities.R",
"remove_torn_wings.R",
"get_Akaike_weights.R")
for (script in script_names) {
path = paste0(source_path, script)
source(path)
}
source("~/Desktop/git_repositories/SBB-dispersal/avbernat/RSsrc/spatial_dependencies.R") # space
data_list <- read_morph_data("data/allmorphology05.10.21-coors.csv")
## morph types: L S NA LS L/S SL
## recoding missing morph types...
## S if wing2thorax <=2.2, L if wing2thorax >=2.5
##
## ambiguous wing morph bug count: 32
raw_data = data_list[[1]]
data_long = data_list[[2]]
dotplot = function(var, data) {
plot(data[,var], seq(1:nrow(data)),
col=data$color, ylab="Order of the data from text file", xlab=paste0("Value of ", var))
}
# remove NA dates
d = data_long %>%
filter(!is.na(wing2body))
range(d$wing2body) # 0.5773723 0.8472103
## [1] 0.6623794 0.8472103
MyVar <- c("wing2body", "wing2thorax", "body", "wing", "beak", "thorax", "lat", "long")
d$color = "red"
d$color[d$year == "2019" & d$month =="May"] = "black"
par(mfrow=c(3,3))
for (v in MyVar) {
dotplot(v, d)
}
# remove outliers?
d2 = d %>%
filter(!beak<4.5) %>%
filter(!wing2body <0.62) #%>%
#filter(!body < 8) %>%
#filter(!wing2body > 0.79)
d3 = remove_torn_wings(d2)
##
## number of bugs with torn wings: 193
par(mfrow=c(3,3))
for (v in MyVar) {
dotplot(v, d3)
}
Wing overmeasured? Body measured too small? Probably the body measured off? wing seems off.
raw_data_missing = raw_data %>%
filter(w_morph=="" | is.na(w_morph)) # had 30 that are hard to identify as either S or L based on the wing2thorax thresholds.
head(raw_data_missing) # need to determine the morph of these bugs manually. In the long term...need to do this manually because the boundaries of what is defined as a morph can change over time.
## pophost population sex beak thorax wing body month year
## 1 K. elegans Gainesville M 5.65 2.80 6.36 NA April 2014.0
## 2 K. elegans Gainesville M 5.62 2.79 NA NA April 2014.0
## 3 K. elegans Leesburg M 5.06 2.63 6.14 NA May 2013.0
## 4 K. elegans Leesburg F 7.54 3.79 8.68 NA December 2013.6
## 5 K. elegans Leesburg F 7.50 3.04 6.86 NA December 2013.6
## 6 K. elegans Leesburg F 6.26 3.32 7.47 NA December 2013.6
## months_since_start season w_morph lat long diapause
## 1 11 spring 29.66374 -82.36091
## 2 11 spring <NA> 29.66374 -82.36091
## 3 0 spring 28.79622 -81.87803
## 4 7 winter 28.79602 -81.87777
## 5 7 winter 28.79602 -81.87777
## 6 7 winter 28.79602 -81.87777
## field_date_collected notes site sex_b
## 1 23rd & 8th -1
## 2 23rd & 8th -1
## 3 05.02.2013 date estimated; GPS estimated Mount & 8th -1
## 4 Mount & 8th 1
## 5 Mount & 8th 1
## 6 Mount & 8th 1
## pophost_b date datetime dates month_of_year wing2thorax
## 1 1 April/2014 Apr 2014 2014-04-01 4 2.271429
## 2 1 April/2014 Apr 2014 2014-04-01 4 NA
## 3 1 April/2013 Apr 2013 2013-04-01 4 2.334601
## 4 1 December/2013.6 Dec 2013 2013-12-01 12 2.290237
## 5 1 December/2013.6 Dec 2013 2013-12-01 12 2.256579
## 6 1 December/2013.6 Dec 2013 2013-12-01 12 2.250000
## wing_morph_b
## 1 NA
## 2 NA
## 3 NA
## 4 NA
## 5 NA
## 6 NA
par(mfrow=c(3,1))
hist(raw_data$wing[raw_data$w_morph=="L"]/raw_data$thorax[raw_data$w_morph=="L"],
main="Histogram of wing length/thorax length for long winged SBB",
xlab="wing length/thorax length",
breaks=seq(0.5, 3.8, by=0.05))
hist(raw_data_missing$wing/raw_data_missing$thorax,
main="Histogram of wing length/thorax length for SBB w/o recorded wing morph",
xlab="wing length/thorax length",
breaks=seq(0.5, 3.8, by=0.05))
hist(raw_data$wing[raw_data$w_morph=="S"]/raw_data$thorax[raw_data$w_morph=="S"],
main="Histogram of wing length/thorax length for short winged SBB",
xlab="wing length/thorax length",
breaks=seq(0.5,3.8,by=0.05))
Grouping the data for group counts.
group = raw_data %>% group_by(sex,
datetime) %>%
summarize(count = n())
## `summarise()` has grouped output by 'sex'. You can override using the `.groups` argument.
Checking over bug numbers per season.
#raw_data[raw_data$year =="2020",] #476 survived shipment, so I"m guessing that about 103 bugs did not survive shipment but were still measured for morphology?
#raw_data[raw_data$year =="2019" & raw_data$month=="October",] #similar here. 372 bugs measured but 207 survived. So that means that 165 bugs did not survive and were still measured for morph.
Plotting the histograms.
data = raw_data
data$population[data$population=="Ft.Myers"]<-"Ft. Myers"
data$population[data$population=="Key_Largo"]<-"Key Largo"
data$population[data$population=="Lake_Placid"]<-"Lake Placid"
data$population[data$population=="Lake_Wales"]<-"Lake Wales"
data$population[data$population=="North_Key_Largo"]<-"North Key Largo"
data$population[data$population=="Plantation_Key"]<-"Plantation Key"
data$pophost[data$pophost=="C.corindum"]<-"C. corindum"
data$pophost[data$pophost=="K.elegans"]<-"K. elegans"
data = data[order(-data$lat),]
#lat_order = unique(data$population) # should work but lat and long not right so need to do it manually:
lat_order = c("Gainesville", "Leesburg", "Lake Wales", "Lake Placid", "Ft. Myers", "Homestead", "North Key Largo", "Key Largo", "Plantation Key")
data$population = as.factor(data$population)
data$population = factor(data$population,levels=lat_order)
I have not removed the bugs with torn wings yet.
What determines wing morph using all the data? Does sex? host plant? month of the year?
m = glm(wing_morph_b ~ months_since_start, data=raw_data, family="binomial")
tidy_regression(m, is_color=FALSE)
## glm wing_morph_b ~ months_since_start binomial raw_data
## AIC: 3585.838
## (Intercept) coeff: 1.4966103 Pr(>|t|): 1.462966e-71 *
## months_since_start coeff: -0.0034902 Pr(>|t|): 0.013932 *
m = glm(wing_morph_b ~ month_of_year, data=raw_data, family="binomial")
tidy_regression(m, is_color=FALSE)
## glm wing_morph_b ~ month_of_year binomial raw_data
## AIC: 3548.031
## (Intercept) coeff: 0.8065427 Pr(>|t|): 8.994927e-21 *
## month_of_year coeff: 0.0803754 Pr(>|t|): 7.132571e-11 *
nrow(data)
## [1] 3532
data<-data.frame(R=raw_data$wing_morph_b,
A=raw_data$sex_b,
B=raw_data$pophost_b,
C=(raw_data$month_of_year),
D=raw_data$months_since_start)
model_script = paste0(source_path,"generic models-binomial glm 3-FF.R")
model_comparisonsAIC(model_script)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## AICs 3185.995 3186.871 3187.155 3187.895 3187.898 3188.452 3189.02
## models 15 13 11 7 17 16 14
## probs 0.2635026 0.1700435 0.1475361 0.1019236 0.1017702 0.07714955 0.05806098
##
## m15 glm(formula = R ~ A * B + B * C, family = binomial, data = data)
## m13 glm(formula = R ~ B * C + A, family = binomial, data = data)
## m11 glm(formula = R ~ A * B + C, family = binomial, data = data)
## m7 glm(formula = R ~ A + B + C, family = binomial, data = data)
## m17 glm(formula = R ~ A * B + A * C + B * C, family = binomial, data = data)
## m16 glm(formula = R ~ A * C + B * C, family = binomial, data = data)
## m14 glm(formula = R ~ A * B + A * C, family = binomial, data = data)
anova(m13, m15, test="Chisq") # Adding A*B marginally improves fit
anova(m7, m13, test="Chisq") # Adding B*C marginally improves fit
anova(m5, m7, test="Chisq")
anova(m6, m7, test="Chisq")
anova(m4, m7, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: R ~ B * C + A
## Model 2: R ~ A * B + B * C
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 3480 3176.9
## 2 3479 3174.0 1 2.876 0.08991 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Analysis of Deviance Table
##
## Model 1: R ~ A + B + C
## Model 2: R ~ B * C + A
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 3481 3179.9
## 2 3480 3176.9 1 3.0237 0.08206 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Analysis of Deviance Table
##
## Model 1: R ~ A + C
## Model 2: R ~ A + B + C
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 3482 3541.3
## 2 3481 3179.9 1 361.36 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Analysis of Deviance Table
##
## Model 1: R ~ B + C
## Model 2: R ~ A + B + C
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 3482 3186.3
## 2 3481 3179.9 1 6.4204 0.01128 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Analysis of Deviance Table
##
## Model 1: R ~ A + B
## Model 2: R ~ A + B + C
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 3482 3188.9
## 2 3481 3179.9 1 8.9915 0.002712 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m = glm(wing_morph_b ~ sex_b + pophost_b + month_of_year, data=raw_data, family="binomial")
tidy_regression(m, is_color=FALSE) # m7
## glm wing_morph_b ~ sex_b + pophost_b + month_of_year binomial raw_data
## AIC: 3187.895
## (Intercept) coeff: 1.1984718 Pr(>|t|): 7.734972e-36 *
## sex_b coeff: -0.1122176 Pr(>|t|): 0.01134836 *
## pophost_b coeff: 0.8607509 Pr(>|t|): 1.162125e-69 *
## month_of_year coeff: 0.0389051 Pr(>|t|): 0.00284751 *
summary(m)
##
## Call:
## glm(formula = wing_morph_b ~ sex_b + pophost_b + month_of_year,
## family = "binomial", data = raw_data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3270 0.3716 0.4315 0.8476 1.0516
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.19847 0.09590 12.497 < 2e-16 ***
## sex_b -0.11222 0.04432 -2.532 0.01135 *
## pophost_b 0.86075 0.04879 17.642 < 2e-16 ***
## month_of_year 0.03891 0.01304 2.984 0.00285 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3588.0 on 3484 degrees of freedom
## Residual deviance: 3179.9 on 3481 degrees of freedom
## (47 observations deleted due to missingness)
## AIC: 3187.9
##
## Number of Fisher Scoring iterations: 5
Top model is m7 * Males are more likely to be long-winged. * K.elegans more likely to be long-winged. * (+) effect later in the season such that long wing morphs become increasingly likely later in the year.
temp = raw_data %>%
filter(!is.na(wing_morph_b))
check_spatial_dependencies(m, temp, temp$long, temp$lat, zone = 16, cutoff=14000, is_glm=TRUE)
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on WGS84 ellipsoid in Proj4
## definition
## Warning in proj4string(res): CRS object has comment, which is lost in output
A variogram is used to display the variability between data points as a function of distance. Here, from distances 0 to 4000 m, the variability between points starts high and then decreases (the points become more similar) until it levels out because at some point the distance between data points will be great enough that the points are no longer considered to be related to each other. The variability will flatten out into a “sill”.
No spatial dependencies for wing morph.
model_script = paste0(source_path,"generic models-gaussian glm 4-FF.R")
model_comparisonsAIC(model_script)
## [,1] [,2] [,3] [,4]
## AICs 3199.444 3200.118 3200.972 3201.607
## models 110 98 112 107
## probs 0.3416081 0.2438092 0.1591071 0.1158448
##
## m110 glm(formula = R ~ A * B + A * D + B * C + B * D + C * D, family = gaussian,
## data = data)
## m98 glm(formula = R ~ A * B + A * D + B * C + C * D, family = gaussian,
## data = data)
## m112 glm(formula = R ~ A * B + A * C + A * D + B * C + B * D + C *
## D, family = gaussian, data = data)
## m107 glm(formula = R ~ A * B + A * C + A * D + B * C + C * D, family = gaussian,
## data = data)
anova(m98, m110, test="Chisq") # adding B*D does not improve fit
anova(m84, m98, test="Chisq") # adding A*B improves fit
anova(m63, m84, test="Chisq") # Adding C*D improves fit
anova(m51, m63, test="Chisq") # Adding B improves fit
## Analysis of Deviance Table
##
## Model 1: R ~ A * B + A * D + B * C + C * D
## Model 2: R ~ A * B + A * D + B * C + B * D + C * D
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 3476 508.19
## 2 3475 507.80 1 0.38986 0.1024
## Analysis of Deviance Table
##
## Model 1: R ~ A * D + B * C + C * D
## Model 2: R ~ A * B + A * D + B * C + C * D
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 3477 509.64
## 2 3476 508.19 1 1.4483 0.001647 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Analysis of Deviance Table
##
## Model 1: R ~ A * D + C * D + B
## Model 2: R ~ A * D + B * C + C * D
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 3478 511.66
## 2 3477 509.64 1 2.0211 0.0002046 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Analysis of Deviance Table
##
## Model 1: R ~ A * D + C * D
## Model 2: R ~ A * D + C * D + B
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 3479 569.30
## 2 3478 511.66 1 57.636 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m = glm(wing_morph_b ~ sex_b * pophost_b + sex_b * months_since_start + pophost_b * month_of_year + month_of_year * months_since_start, data=raw_data, family="binomial") # m98 top model
tidy_regression(m, is_color=FALSE)
## glm wing_morph_b ~ sex_b * pophost_b + sex_b * months_since_start + pophost_b * month_of_year + month_of_year * months_since_start binomial raw_data
## AIC: 3173.032
## (Intercept) coeff: 0.7420895 Pr(>|t|): 5.207138e-05 *
## sex_b coeff: -0.2706994 Pr(>|t|): 0.002618832 *
## pophost_b coeff: 1.1107917 Pr(>|t|): 9.999362e-23 *
## months_since_start coeff: 0.0106835 Pr(>|t|): 0.0002869606 *
## month_of_year coeff: 0.1004066 Pr(>|t|): 8.030013e-05 *
## sex_b:pophost_b coeff: 0.09893 Pr(>|t|): 0.04409693 *
## sex_b:months_since_start coeff: 0.003887 Pr(>|t|): 0.01091935 *
## pophost_b:month_of_year coeff: -0.0371429 Pr(>|t|): 0.01294977 *
## months_since_start:month_of_year coeff: -0.0014746 Pr(>|t|): 0.001144355 *
Males are more likely to be long-winged.
K.elegans more likely to be long-winged.
(+) effect of getting later in the season (month_of_year), such that long wing morphs become increasingly likely later in the year.
(+) effect of months_since_start, such that long wing morphs become increasingly likely across the decade.
(+) effect of sex*host where if F and from K.elegans then more likely to be long-winged
(+) effect of sex*months_since_start where if F and later in the decade then more likely to be long-winged
(-) effect of pophost*month_of_year where if from C.corindum and later in the season, then more likely long wing morphs will be present
weak (-) effect of month_of_year*months_since_start where the later in the year and later in the decade, the more likely short wing morphs will be present.
# m = glm(wing_morph_b ~ sex_b*months_since_start + month_of_year*months_since_start + pophost_b*month_of_year, data=raw_data, family="binomial") # m84 was the top model
temp = raw_data %>%
filter(!is.na(wing_morph_b)) # filter out NA's that glm did automatically
check_spatial_dependencies(m, temp, temp$long, temp$lat, zone = 16, cutoff=14000, is_glm=TRUE)
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on WGS84 ellipsoid in Proj4
## definition
## Warning in proj4string(res): CRS object has comment, which is lost in output
Model validation looks fine. Nothing to worry about.
SE = function(x){sd(x)/sqrt(length(x))}
wmorph_summaryt<-aggregate(wing_morph_b~sex_b*pophost_b*month_of_year*months_since_start, data=raw_data, FUN=mean)
wmorph_summaryt$sd<-aggregate(wing_morph_b~sex_b*pophost_b*month_of_year*months_since_start, data=raw_data,
FUN=sd)$wing_morph_b
wmorph_summaryt$se<-aggregate(wing_morph_b~sex_b*pophost_b*month_of_year*months_since_start, data=raw_data,
FUN=SE)$wing_morph_b
data = wmorph_summaryt
data<-data.frame(R=data$sd,
A=data$sex_b,
B=data$pophost_b,
C=(data$month_of_year),
D=data$months_since_start)
model_script = paste0(source_path,"generic models-gaussian glm 3-FF.R")
model_comparisonsAIC(model_script)
## [,1] [,2] [,3] [,4] [,5] [,6]
## AICs -91.69472 -90.25875 -89.97386 -88.65427 -88.54186 -88.5364
## models 2 4 6 10 7 8
## probs 0.3392973 0.1654864 0.1435158 0.07419158 0.07013674 0.06994547
##
## m2 glm(formula = R ~ B, family = gaussian, data = data)
## m4 glm(formula = R ~ A + B, family = gaussian, data = data)
## m6 glm(formula = R ~ B + C, family = gaussian, data = data)
## m10 glm(formula = R ~ B * C, family = gaussian, data = data)
## m7 glm(formula = R ~ A + B + C, family = gaussian, data = data)
## m8 glm(formula = R ~ A * B, family = gaussian, data = data)
anova(m2, m6, test="Chisq") # Adding C does not improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ B
## Model 2: R ~ B + C
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 38 0.20365
## 2 37 0.20223 1 0.0014162 0.6107
anova(m2, m4, test="Chisq") # Adding B does not improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ B
## Model 2: R ~ A + B
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 38 0.20365
## 2 37 0.20080 1 0.0028514 0.4685
anova(m0, m2, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: R ~ 1
## Model 2: R ~ B
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 39 0.61828
## 2 38 0.20365 1 0.41464 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m = glm(sd ~ pophost_b, data=wmorph_summaryt, family="gaussian")
tidy_regression(m, is_color=FALSE) # -1 = C.corindum
## glm sd ~ pophost_b gaussian wmorph_summaryt
## AIC: -91.69472
## (Intercept) coeff: 0.3531599 Pr(>|t|): 2.477074e-28 *
## pophost_b coeff: -0.101813 Pr(>|t|): 1.063916e-10 *
only a (-) effect of pophost, where if from K.elegans then have less variation vs. if from C.cordinum have more variation.
plot(m$fitted.values, m$residuals)
#data_long = remove_torn_wings(data_long)
data_long$compare_dates <- -1
data_long$compare_dates[data_long$months_since_start==81] <- 1
compare_dates_model <- glm(wing2body~compare_dates + pophost_b*sex_b, data=data_long)
tidy_regression(compare_dates_model, is_color=FALSE)
## glm wing2body ~ compare_dates + pophost_b * sex_b data_long
## AIC: -10712.03
## (Intercept) coeff: 0.726583 Pr(>|t|): 0 *
## compare_dates coeff: -0.0040607 Pr(>|t|): 2.930608e-15 *
## pophost_b coeff: 0.0042202 Pr(>|t|): 5.753746e-22 *
## sex_b coeff: -0.0018677 Pr(>|t|): 1.213464e-05 *
## pophost_b:sex_b coeff: 0.0014735 Pr(>|t|): 0.0005211045 *
Yes, bugs from this collection date have detectably smaller wing2body ratios on average than other collection dates, even when controlling for sex and host plant.
What effects wing2body ratio? sex? host plant? month of the year? months since start?
data_long$wing2body_c = (data_long$wing2body-mean(data_long$wing2body, na.rm=TRUE))
data_long$month_of_year_c = (data_long$month_of_year-mean(data_long$month_of_year, na.rm=TRUE))
data_long$months_since_start_c = (data_long$months_since_start-mean(data_long$months_since_start, na.rm=TRUE))
data<-data.frame(R=data_long$wing2body_c, # centered
A=data_long$sex_b, # sex
B=data_long$pophost_b, # host
C=data_long$month_of_year_c,
D=data_long$months_since_start_c)
model_script = paste0(source_path,"generic models-gaussian glm 3-FF.R")
model_comparisonsAIC(model_script)
## [,1] [,2] [,3] [,4]
## AICs -10694.74 -10694.11 -10692.75 -10692.12
## models 11 14 15 17
## probs 0.4217301 0.3074314 0.1558672 0.1138381
##
## m11 glm(formula = R ~ A * B + C, family = gaussian, data = data)
## m14 glm(formula = R ~ A * B + A * C, family = gaussian, data = data)
## m15 glm(formula = R ~ A * B + B * C, family = gaussian, data = data)
## m17 glm(formula = R ~ A * B + A * C + B * C, family = gaussian, data = data)
anova(m15, m17, test="Chisq") # Adding A*C does not improve fit
anova(m11, m15, test="Chisq") # Adding B*C does not improve fit
anova(m11, m14, test="Chisq") # Adding A*C does not improve fit
anova(m8, m11, test="Chisq") # Adding C does improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ A * B + B * C
## Model 2: R ~ A * B + A * C + B * C
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 2090 0.74210
## 2 2089 0.74161 1 0.00048544 0.2423
## Analysis of Deviance Table
##
## Model 1: R ~ A * B + C
## Model 2: R ~ A * B + B * C
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 2091 0.7421
## 2 2090 0.7421 1 3.2849e-06 0.9234
## Analysis of Deviance Table
##
## Model 1: R ~ A * B + C
## Model 2: R ~ A * B + A * C
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 2091 0.74210
## 2 2090 0.74162 1 0.00048411 0.2428
## Analysis of Deviance Table
##
## Model 1: R ~ A * B
## Model 2: R ~ A * B + C
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 2092 0.75827
## 2 2091 0.74210 1 0.016172 1.474e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m = glm(wing2body_c ~ sex_b*pophost_b + month_of_year_c, data=data_long, family=gaussian)
tidy_regression(m, is_color=FALSE) # m11
## glm wing2body_c ~ sex_b * pophost_b + month_of_year_c gaussian data_long
## AIC: -10694.74
## (Intercept) coeff: -0.001235 Pr(>|t|): 0.003762096 *
## sex_b coeff: -0.0016493 Pr(>|t|): 0.000111303 *
## pophost_b coeff: 0.0045941 Pr(>|t|): 3.84725e-26 *
## month_of_year_c coeff: 0.0008385 Pr(>|t|): 1.903239e-11 *
## sex_b:pophost_b coeff: 0.001668 Pr(>|t|): 9.33909e-05 *
summary(m)
##
## Call:
## glm(formula = wing2body_c ~ sex_b * pophost_b + month_of_year_c,
## family = gaussian, data = data_long)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.069822 -0.011139 -0.000314 0.011127 0.115009
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0012350 0.0004258 -2.901 0.003762 **
## sex_b -0.0016493 0.0004260 -3.872 0.000111 ***
## pophost_b 0.0045941 0.0004285 10.720 < 2e-16 ***
## month_of_year_c 0.0008385 0.0001242 6.750 1.90e-11 ***
## sex_b:pophost_b 0.0016680 0.0004261 3.915 9.34e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.0003549032)
##
## Null deviance: 0.80932 on 2095 degrees of freedom
## Residual deviance: 0.74210 on 2091 degrees of freedom
## AIC: -10695
##
## Number of Fisher Scoring iterations: 2
nrow(data_long)
## [1] 2096
temp = data_long %>%
filter(!is.na(wing2body_c))
check_spatial_dependencies(m, temp, temp$long, temp$lat, zone = 16, cutoff=14000, is_glm=TRUE)
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on WGS84 ellipsoid in Proj4
## definition
## Warning in proj4string(res): CRS object has comment, which is lost in output
There might be some spatial dependencies for long wing freq because seeing clusters of red and clusters of black.
Also the variogram shows distances 0 to 10,000 m where the variability between points starts high and then decreases (the points become more similar) until it levels out because at some point the distance between data points will be great enough that the points are no longer considered to be related to each other. The variability will flatten out into a “sill”.
Strange that it’s decreases over distance. But if you do 20,000 m then see it begins to increase again.
model_script = paste0(source_path,"generic models-gaussian glm 4-FF.R")
model_comparisonsAIC(model_script)
## [,1] [,2] [,3] [,4] [,5]
## AICs -10698.64 -10697.49 -10697.26 -10697.14 -10696.5
## models 88 99 92 97 58
## probs 0.1720245 0.09639838 0.08606771 0.08118068 0.05886596
##
## m88 glm(formula = R ~ A * B + A * D + B * D + C, family = gaussian,
## data = data)
## m99 glm(formula = R ~ A * B + A * D + B * D + C * D, family = gaussian,
## data = data)
## m92 glm(formula = R ~ A * B + A * C + A * D + B * D, family = gaussian,
## data = data)
## m97 glm(formula = R ~ A * B + A * D + B * C + B * D, family = gaussian,
## data = data)
## m58 glm(formula = R ~ A * B + B * D + C, family = gaussian, data = data)
anova(m22, m42, test="Chisq") # adding B*C does not improve fit
## Analysis of Deviance Table
##
## Model 1: R ~ A * B + C
## Model 2: R ~ A * B + B * C
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 2091 0.7421
## 2 2090 0.7421 1 3.2849e-06 0.9234
anova(m16, m22, test="Chisq") # adding C does improve fit # same top model as before
## Analysis of Deviance Table
##
## Model 1: R ~ A * B
## Model 2: R ~ A * B + C
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 2092 0.75827
## 2 2091 0.74210 1 0.016172 1.474e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Same best fit model (no months_since_start effect). But can plot the positive effect of month of year on wing2body ratio. It looks cyclical by pophost and more linear by sex:
wing2body_summary<-aggregate(wing2body~pophost*month_of_year, data=data_long, FUN=mean)
plot(wing2body~month_of_year, data=wing2body_summary, pch=19, col=c(1,2)[as.factor(pophost)])
wing2body_summary<-aggregate(wing2body~sex_b*month_of_year, data=data_long, FUN=mean) # black is M; Red is F
plot(wing2body~month_of_year, data=wing2body_summary, pch=19, col=c(1,2)[as.factor(sex_b)]) # black is C. ; Red is K.
These plots are plotted cleanly in the wing_plots.Rmd script.
SE = function(x){sd(x)/sqrt(length(x))}
w2b_summaryt<-aggregate(wing2body~sex_b*pophost_b*month_of_year*months_since_start, data=data_long, FUN=mean)
w2b_summaryt$sd<-aggregate(wing2body~sex_b*pophost_b*month_of_year*months_since_start, data=data_long,
FUN=sd)$wing2body
w2b_summaryt$se<-aggregate(wing2body~sex_b*pophost_b*month_of_year*months_since_start, data=data_long,
FUN=SE)$wing2body
data = w2b_summaryt
data<-data.frame(R=data$sd,
A=data$sex_b,
B=data$pophost_b,
C=(data$month_of_year),
D=data$months_since_start)
model_script = paste0(source_path,"generic models-gaussian glm 3-FF.R")
model_comparisonsAIC(model_script)
## [,1] [,2] [,3] [,4] [,5] [,6]
## AICs -282.1703 -282.1217 -281.8742 -280.1703 -280.1217 -279.8742
## models 6 10 2 7 13 4
## probs 0.1912932 0.1866967 0.1649671 0.0703737 0.06868278 0.06068871
##
## m6 glm(formula = R ~ B + C, family = gaussian, data = data)
## m10 glm(formula = R ~ B * C, family = gaussian, data = data)
## m2 glm(formula = R ~ B, family = gaussian, data = data)
## m7 glm(formula = R ~ A + B + C, family = gaussian, data = data)
## m13 glm(formula = R ~ B * C + A, family = gaussian, data = data)
## m4 glm(formula = R ~ A + B, family = gaussian, data = data)
anova(m2, m6, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: R ~ B
## Model 2: R ~ B + C
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 34 0.00070956
## 2 33 0.00066572 1 4.3844e-05 0.1404
anova(m0, m2, test="Chisq") # Adding B marginally improves fit
## Analysis of Deviance Table
##
## Model 1: R ~ 1
## Model 2: R ~ B
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 35 0.00081657
## 2 34 0.00070956 1 0.00010701 0.02355 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m = glm(sd ~ pophost_b, data=w2b_summaryt, family=gaussian)
tidy_regression(m, is_color=FALSE)
## glm sd ~ pophost_b gaussian w2b_summaryt
## AIC: -281.8742
## (Intercept) coeff: 0.0167934 Pr(>|t|): 1.007012e-21 *
## pophost_b coeff: 0.0017241 Pr(>|t|): 0.03004314 *
Nothing seems to effect the w2b variance.
Plot the histograms between the sexes and conduct var tests.
time_var_tests = function(d, print_test=FALSE) {
months = sort(unique(d$month_of_year))
month_labs <- c("Feb", "Apr", "May", "Aug", "Sept", "Oct", "Dec")
table = matrix(nrow=length(months), ncol=9)
i = 0
for (m in months) {
i = i + 1
data = d[d$month_of_year==m, ]
VAR = var.test(wing2body ~ sex, data = data) # F test to compare the variances of two samples from normal pops
TTEST= t.test(wing2body ~ sex, data=data) # t.test to find significant difference between the means of two groups
AOV = aov(wing2body ~ sex, data=data) # used to analyze the differences among means.
p = TukeyHSD(AOV)$sex[,"p adj"]
diff = TukeyHSD(AOV)$sex[,"diff"]
if (print_test) {
print(month_labs[i])
print("t.test")
print(TTEST)
print("anova")
print(summary(AOV))
print(TukeyHSD(AOV))
cat("-------------------------------------------------")
}
# plot histograms
h = data %>%
ggplot( aes(x=wing2body, fill=sex)) +
geom_histogram( color="#e9ecef", alpha=0.6, position = 'identity') +
scale_fill_manual(values=c("#69b3a2", "#404080")) +
labs(fill="") + labs(title=month_labs[i])
print(h)
# populate matrix
table[i,1] = month_labs[i]
table[i,2] = "M-F"
table[i,3] = round(diff,4)
table[i,4] = round(p,5)
table[i,5] = "M=-1 | F=1"
table[i,6] = round(TTEST$statistic,4)
table[i,7] = round(TTEST$p.value,5)
table[i,8] = round(VAR$statistic,2)
table[i,9] = round(VAR$p.value, 3)
}
colnames(table) = c("month","sex", "Tukey-diff", "p-val", "sex", "ttest", "p-val", "F-test", "p-val")
return(table)
}
summary_table = time_var_tests(data_long)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
summary_table
## month sex Tukey-diff p-val sex ttest p-val F-test
## [1,] "Feb" "M-F" "0.0069" "1e-05" "M=-1 | F=1" "-4.513" "1e-05" "1.01"
## [2,] "Apr" "M-F" "0.0057" "0.03909" "M=-1 | F=1" "-2.0508" "0.04159" "1.24"
## [3,] "May" "M-F" "-0.0056" "0.04002" "M=-1 | F=1" "2.1789" "0.02992" "1.69"
## [4,] "Aug" "M-F" "0.0029" "0.07841" "M=-1 | F=1" "-1.7611" "0.0792" "1.05"
## [5,] "Sept" "M-F" "0.0024" "0.35468" "M=-1 | F=1" "-0.9363" "0.35087" "1.23"
## [6,] "Oct" "M-F" "0.0047" "0.0083" "M=-1 | F=1" "-2.6536" "0.00836" "1.04"
## [7,] "Dec" "M-F" "0.005" "0.0342" "M=-1 | F=1" "-2.1903" "0.02964" "0.7"
## p-val
## [1,] "0.952"
## [2,] "0.261"
## [3,] "0"
## [4,] "0.772"
## [5,] "0.408"
## [6,] "0.8"
## [7,] "0.082"
y = summary_table[,"F-test"]
xlab = summary_table[, "month"]
x = sort(unique(d$month_of_year))
plot(x,y, ylab="F-test value", xlab="Month of Year", xaxt = "n", main="wing-to-body ~ sex")
axis(1, at=seq(min(x),max(x),2), labels=xlab[-5])
abline(h=1, col=2) # non-linear relationship where at the beginning of the year the variance between sexes is similar but then in spring it deviates a lot. In the summer the variance is similar and then towards the winter the variance deviates.
time_var_tests = function(d, print_test=FALSE) {
months = sort(unique(d$month_of_year))
month_labs <- c("Feb", "Apr", "May", "Aug", "Sept", "Oct", "Dec")
table = matrix(nrow=length(months), ncol=9)
i = 0
for (m in months) {
i = i + 1
data = d[d$month_of_year==m, ]
VAR = var.test(wing2body ~ pophost, data = data) # F test to compare the variances of two samples from normal pops
TTEST= t.test(wing2body ~ pophost, data=data) # t.test to find significant difference between the means of two groups
AOV = aov(wing2body ~ pophost, data=data) # used to analyze the differences among means.
p = TukeyHSD(AOV)$pophost[,"p adj"]
diff = TukeyHSD(AOV)$pophost[,"diff"]
if (print_test) {
print(month_labs[i])
print("t.test")
print(TTEST)
print("anova")
print(summary(AOV))
print(TukeyHSD(AOV))
cat("-------------------------------------------------")
}
# plot histograms
h = data %>%
ggplot( aes(x=wing2body, fill=pophost)) +
geom_histogram( color="#e9ecef", alpha=0.6, position = 'identity') +
scale_fill_manual(values=c("#69b3a2", "#404080")) +
labs(fill="") + labs(title=month_labs[i])
print(h)
# populate matrix
table[i,1] = month_labs[i]
table[i,2] = "GRT-BV"
table[i,3] = round(diff,4)
table[i,4] = round(p,5)
table[i,5] = "GRT=1 | BV=-1"
table[i,6] = round(TTEST$statistic,4)
table[i,7] = round(TTEST$p.value,5)
table[i,8] = round(VAR$statistic,2)
table[i,9] = round(VAR$p.value, 3)
}
colnames(table) = c("month","sex", "Tukey-diff", "p-val", "sex", "ttest", "p-val", "F-test", "p-val")
return(table)
}
summary_table = time_var_tests(data_long)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
summary_table
## month sex Tukey-diff p-val sex ttest p-val
## [1,] "Feb" "GRT-BV" "0.0077" "0" "GRT=1 | BV=-1" "-5.3374" "0"
## [2,] "Apr" "GRT-BV" "0.003" "0.36637" "GRT=1 | BV=-1" "-0.9536" "0.34306"
## [3,] "May" "GRT-BV" "0.0108" "6e-05" "GRT=1 | BV=-1" "-4.5295" "1e-05"
## [4,] "Aug" "GRT-BV" "0.0065" "0.0013" "GRT=1 | BV=-1" "-3.372" "0.00104"
## [5,] "Sept" "GRT-BV" "0.0082" "0.0047" "GRT=1 | BV=-1" "-2.6132" "0.01198"
## [6,] "Oct" "GRT-BV" "0.0087" "0" "GRT=1 | BV=-1" "-5.0098" "0"
## [7,] "Dec" "GRT-BV" "0.0107" "1e-05" "GRT=1 | BV=-1" "-4.7534" "0"
## F-test p-val
## [1,] "1.12" "0.407"
## [2,] "0.83" "0.457"
## [3,] "0.22" "0"
## [4,] "0.88" "0.54"
## [5,] "1.47" "0.156"
## [6,] "1.12" "0.456"
## [7,] "0.8" "0.298"
y = summary_table[,"F-test"]
xlab = summary_table[, "month"]
x = sort(unique(d$month_of_year))
plot(x,y, ylab="F-test value", xlab="Month of Year", xaxt = "n", main="wing-to-body ~ pophost")
axis(1, at=seq(min(x),max(x),2), labels=xlab[-5])
abline(h=1, col=2) # non-linear relationship where at the beginning of the year the variance between hostplants is similar but then in spring it deviates a lot. In the summer and winter the variance is similar. (GRT/BV var = F-test value where in Sept and Feb only months where GRT is higher and the rest is BV)
temp = data_long %>%
filter(!is.na(months_since_start), !is.na(sex), !is.na(wing2body))
time_var_tests = function(d, print_test=FALSE) {
months = unique(d$months_since_start)
#month_labs <- c("Feb", "Apr", "May", "Aug", "Sept", "Oct", "Dec")
table = matrix(nrow=length(months), ncol=9)
i = 0
for (m in months) {
i = i + 1
data = d[d$months_since_start==m, ]
VAR = var.test(wing2body ~ sex, data = data) # F test to compare the variances of two samples from normal pops
TTEST= t.test(wing2body ~ sex, data=data) # t.test to find significant difference between the means of two groups
AOV = aov(wing2body ~ sex, data=data) # used to analyze the differences among means.
p = TukeyHSD(AOV)$sex[,"p adj"]
diff = TukeyHSD(AOV)$sex[,"diff"]
if (print_test) {
print(m)
print("t.test")
print(TTEST)
print("anova")
print(summary(AOV))
print(TukeyHSD(AOV))
cat("-------------------------------------------------")
}
# plot histograms
h = data %>%
ggplot( aes(x=wing2body, fill=sex)) +
geom_histogram( color="#e9ecef", alpha=0.6, position = 'identity') +
scale_fill_manual(values=c("#69b3a2", "#404080")) +
labs(fill="") + labs(title=m)
print(h)
# populate matrix
table[i,1] = m
table[i,2] = "M-F"
table[i,3] = round(diff,4)
table[i,4] = round(p,5)
table[i,5] = "M=-1 | F=1"
table[i,6] = round(TTEST$statistic,4)
table[i,7] = round(TTEST$p.value,5)
table[i,8] = round(VAR$statistic,2)
table[i,9] = round(VAR$p.value, 3)
}
colnames(table) = c("month","sex", "Tukey-diff", "p-val", "sex", "ttest", "p-val", "F-test", "p-val")
return(table)
}
summary_table = time_var_tests(temp)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
summary_table
## month sex Tukey-diff p-val sex ttest p-val F-test
## [1,] "72" "M-F" "-0.0056" "0.04002" "M=-1 | F=1" "2.1789" "0.02992" "1.69"
## [2,] "51" "M-F" "0.0029" "0.07841" "M=-1 | F=1" "-1.7611" "0.0792" "1.05"
## [3,] "64" "M-F" "0.0024" "0.35468" "M=-1 | F=1" "-0.9363" "0.35087" "1.23"
## [4,] "43" "M-F" "0.005" "0.0342" "M=-1 | F=1" "-2.1903" "0.02964" "0.7"
## [5,] "81" "M-F" "0.0069" "1e-05" "M=-1 | F=1" "-4.513" "1e-05" "1.01"
## [6,] "77" "M-F" "0.0047" "0.0083" "M=-1 | F=1" "-2.6536" "0.00836" "1.04"
## [7,] "23" "M-F" "0.0043" "0.32845" "M=-1 | F=1" "-0.9809" "0.32901" "1.25"
## [8,] "0" "M-F" "-0.0011" "0.84031" "M=-1 | F=1" "0.1929" "0.84844" "1.22"
## [9,] "11" "M-F" "0.0137" "0.00601" "M=-1 | F=1" "-2.9604" "0.00471" "1.5"
## p-val
## [1,] "0"
## [2,] "0.772"
## [3,] "0.408"
## [4,] "0.082"
## [5,] "0.952"
## [6,] "0.8"
## [7,] "0.429"
## [8,] "0.57"
## [9,] "0.343"
y = summary_table[,"F-test"]
xlab = summary_table[, "month"]
x = sort(unique(temp$months_since_start))
plot(x,y, ylab="F-test value", xlab="Months Since Start", main="wing-to-body ~ pophost")
#axis(1, at=seq(min(x),max(x),2), labels=xlab[-5])
abline(h=1, col=2)
temp = data_long %>%
filter(!is.na(months_since_start), !is.na(pophost), !is.na(wing2body))
time_var_tests = function(d, print_test=FALSE) {
months = unique(d$months_since_start)
#month_labs <- c("Feb", "Apr", "May", "Aug", "Sept", "Oct", "Dec")
table = matrix(nrow=length(months), ncol=9)
i = 0
for (m in months) {
i = i + 1
data = d[d$months_since_start==m, ]
VAR = var.test(wing2body ~ pophost, data = data) # F test to compare the variances of two samples from normal pops
TTEST= t.test(wing2body ~ pophost, data=data) # t.test to find significant difference between the means of two groups
AOV = aov(wing2body ~ pophost, data=data) # used to analyze the differences among means.
p = TukeyHSD(AOV)$pophost[,"p adj"]
diff = TukeyHSD(AOV)$pophost[,"diff"]
if (print_test) {
print(m)
print("t.test")
print(TTEST)
print("anova")
print(summary(AOV))
print(TukeyHSD(AOV))
cat("-------------------------------------------------")
}
# plot histograms
h = data %>%
ggplot( aes(x=wing2body, fill=pophost)) +
geom_histogram( color="#e9ecef", alpha=0.6, position = 'identity') +
scale_fill_manual(values=c("#69b3a2", "#404080")) +
labs(fill="") + labs(title=m)
print(h)
# populate matrix
table[i,1] = m
table[i,2] = "GRT-BV"
table[i,3] = round(diff,4)
table[i,4] = round(p,5)
table[i,5] = "GRT=1 | BV=-1"
table[i,6] = round(TTEST$statistic,4)
table[i,7] = round(TTEST$p.value,5)
table[i,8] = round(VAR$statistic,2)
table[i,9] = round(VAR$p.value, 3)
}
colnames(table) = c("month","sex", "Tukey-diff", "p-val", "sex", "ttest", "p-val", "F-test", "p-val")
return(table)
}
summary_table = time_var_tests(temp)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
summary_table
## month sex Tukey-diff p-val sex ttest p-val
## [1,] "72" "GRT-BV" "0.0108" "6e-05" "GRT=1 | BV=-1" "-4.5295" "1e-05"
## [2,] "51" "GRT-BV" "0.0065" "0.0013" "GRT=1 | BV=-1" "-3.372" "0.00104"
## [3,] "64" "GRT-BV" "0.0082" "0.0047" "GRT=1 | BV=-1" "-2.6132" "0.01198"
## [4,] "43" "GRT-BV" "0.0107" "1e-05" "GRT=1 | BV=-1" "-4.7534" "0"
## [5,] "81" "GRT-BV" "0.0077" "0" "GRT=1 | BV=-1" "-5.3374" "0"
## [6,] "77" "GRT-BV" "0.0087" "0" "GRT=1 | BV=-1" "-5.0098" "0"
## [7,] "23" "GRT-BV" "0.0131" "0.02364" "GRT=1 | BV=-1" "-2.447" "0.02201"
## [8,] "0" "GRT-BV" "-0.0025" "0.62359" "GRT=1 | BV=-1" "0.5289" "0.59996"
## [9,] "11" "GRT-BV" "0" "0.9978" "GRT=1 | BV=-1" "-0.0035" "0.99727"
## F-test p-val
## [1,] "0.22" "0"
## [2,] "0.88" "0.54"
## [3,] "1.47" "0.156"
## [4,] "0.8" "0.298"
## [5,] "1.12" "0.407"
## [6,] "1.12" "0.456"
## [7,] "0.83" "0.702"
## [8,] "0.73" "0.471"
## [9,] "0.4" "0.089"
y = summary_table[,"F-test"]
xlab = summary_table[, "month"]
x = sort(unique(temp$months_since_start))
plot(x,y, ylab="F-test value", xlab="Months Since Start", main="wing-to-body ~ pophost")
#axis(1, at=seq(min(x),max(x),2), labels=xlab[-5])
abline(h=1, col=2)