Start by imputing pollen data and creating a new data frame with average pollen consumption on a per-colony basis
### Figure out average pollen consumption by treatment
pollen <- read_csv("pollen1.csv", col_types = cols(round = col_factor(levels = c("1",
"2")), treatment = col_factor(levels = c("1",
"2", "3", "4", "5")), replicate = col_factor(levels = c("1",
"2", "3", "4", "5", "6", "7", "9", "11",
"12")), start_date = col_date(format = "%m/%d/%Y"),
start_time = col_character(), end_date = col_date(format = "%m/%d/%Y"),
end_time = col_character()))
pollen$colony <- as.factor(pollen$colony)
pollen$pid <- as.factor(pollen$pid)
pollen$count <- as.factor(pollen$count)
pollen <- na.omit(pollen)
range(pollen$difference)
## [1] -0.98780 1.56542
pollen <- subset(pollen, pollen$round != 1)
pollen
## # A tibble: 928 × 15
## round colony treatment replicate count start_date start_…¹ start…² end_date
## <fct> <fct> <fct> <fct> <fct> <date> <chr> <dbl> <date>
## 1 2 1.11R2 1 11 2 2022-08-22 10:30 1.18 2022-08-24
## 2 2 1.11R2 1 11 3 2022-08-24 12:00 1.15 2022-08-26
## 3 2 1.11R2 1 11 4 2022-08-26 10:30 1.09 2022-08-28
## 4 2 1.11R2 1 11 5 2022-08-28 1:00 1.26 2022-08-30
## 5 2 1.11R2 1 11 5 2022-08-30 11:30 1.14 2022-09-01
## 6 2 1.11R2 1 11 21 2022-09-01 10:30 1.07 2022-09-03
## 7 2 1.11R2 1 11 8 2022-09-03 12:20 0.844 2022-09-05
## 8 2 1.11R2 1 11 9 2022-09-05 12:40 1.30 2022-09-07
## 9 2 1.11R2 1 11 10 2022-09-07 10:30 1.22 2022-09-09
## 10 2 1.11R2 1 11 11 2022-09-09 11:30 1.26 2022-09-11
## # … with 918 more rows, 6 more variables: end_time <chr>, end_weight <dbl>,
## # whole_dif <chr>, difference <dbl>, pid <fct>, bees_alive <dbl>, and
## # abbreviated variable names ¹start_time, ²start_weight
# get rid of negative numbers
pollen$difference[pollen$difference < 0] <- NA
pollen <- na.omit(pollen)
range(pollen$difference)
## [1] 0.002715 1.565420
pollen$whole_dif <- as.numeric(pollen$difference)
wd.1 <- lm(difference ~ treatment, data = pollen)
summary(wd.1)
##
## Call:
## lm(formula = difference ~ treatment, data = pollen)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4853 -0.2416 -0.1504 0.1576 1.0638
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.429541 0.024170 17.772 <2e-16 ***
## treatment2 0.072099 0.034886 2.067 0.0390 *
## treatment3 0.078078 0.034406 2.269 0.0235 *
## treatment4 0.058490 0.034988 1.672 0.0949 .
## treatment5 0.004982 0.035040 0.142 0.8870
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3375 on 915 degrees of freedom
## Multiple R-squared: 0.009927, Adjusted R-squared: 0.005599
## F-statistic: 2.294 on 4 and 915 DF, p-value: 0.05773
wd.emm <- emmeans(wd.1, "treatment")
summary(wd.emm)
## treatment emmean SE df lower.CL upper.CL
## 1 0.430 0.0242 915 0.382 0.477
## 2 0.502 0.0252 915 0.452 0.551
## 3 0.508 0.0245 915 0.460 0.556
## 4 0.488 0.0253 915 0.438 0.538
## 5 0.435 0.0254 915 0.385 0.484
##
## Confidence level used: 0.95
wd.summary <- pollen %>%
group_by(colony) %>%
summarize(whole.mean = mean(difference))
as.data.frame(wd.summary) # this is the data frame I will merge with subsequent data frames to contain average pollen consumption per colony as a new column
## colony whole.mean
## 1 1.11R2 0.2829509
## 2 1.12R2 0.1697964
## 3 1.1R2 0.5213458
## 4 1.2R2 0.3374200
## 5 1.3R2 0.4512891
## 6 1.4R2 0.6063016
## 7 1.5R2 0.7079516
## 8 1.7R2 0.7400381
## 9 1.9R2 0.2240081
## 10 2.11R2 0.4178270
## 11 2.12R2 0.4035568
## 12 2.1R2 0.6101589
## 13 2.2R2 0.5109234
## 14 2.3R2 0.3280036
## 15 2.4R2 0.3881394
## 16 2.5R2 0.7194915
## 17 2.7R2 0.5299685
## 18 2.9R2 0.5857152
## 19 3.11R2 0.4216410
## 20 3.12R2 0.3390993
## 21 3.1R2 0.3711948
## 22 3.2R2 0.3461010
## 23 3.3R2 0.8465806
## 24 3.4R2 0.4120433
## 25 3.5R2 0.8906211
## 26 3.7R2 0.6266680
## 27 3.9R2 0.4409511
## 28 4.11R2 0.3456011
## 29 4.12R2 0.2496295
## 30 4.1R2 0.7074755
## 31 4.2R2 0.3871742
## 32 4.3R2 0.5800074
## 33 4.4R2 0.8356247
## 34 4.5R2 0.2335967
## 35 4.7R2 0.6237400
## 36 4.9R2 0.5322950
## 37 5.11R2 0.4113960
## 38 5.12R2 0.2077741
## 39 5.1R2 0.3782286
## 40 5.2R2 0.4912468
## 41 5.3R2 0.2128778
## 42 5.4R2 0.4563045
## 43 5.5R2 0.7095479
## 44 5.7R2 0.6113189
## 45 5.9R2 0.4188290
drones <- read_csv("drones_rf.csv", col_types = cols(round = col_factor(levels = c("1","2")), treatment = col_factor(levels = c("1","2", "3", "4", "5")), notes = col_skip(), colony_start = col_skip(), colony_count = col_skip(), alive = col_skip(), emerge_date = col_skip()))
## Warning: The following named parsers don't match the column names: alive
drones <- subset(drones, drones$round != 1)
drones
## # A tibble: 422 × 16
## round treatment replicate colony id emerg…¹ wet_w…² dry_w…³ alive…⁴ radial
## <fct> <fct> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr>
## 1 2 1 4 1.4R2 1.4R… 33 1.16 0.0486 1 2.4522
## 2 2 3 3 3.3R2 3.3R… 31 1.17 0.0362 1 2.3211
## 3 2 3 3 3.3R2 3.3R… 31 1.14 0.0826 1 2.0436
## 4 2 3 3 3.3R2 3.3R… 31 1.20 0.0297 1 2.2313
## 5 2 4 3 4.3R2 4.3R… 33 1.17 0.0429 1 2.3051
## 6 2 4 3 4.3R2 4.3R… 33 1.15 0.0323 1 1.932
## 7 2 4 3 4.3R2 4.3R… 33 1.06 0.0369 1 2.1584
## 8 2 4 3 4.3R2 4.3R… 33 1.11 0.0449 1 2.2778
## 9 2 4 4 4.4R2 4.4R… 33 1.15 0.0435 1 2.3155
## 10 2 4 4 4.4R2 4.4R… 33 1.14 0.0515 1 2.4122
## # … with 412 more rows, 6 more variables: abdomen_dry <dbl>,
## # order_on_sheet <dbl>, abdomen_post_ethyl <dbl>, fat_content <dbl>,
## # relative_fat <dbl>, workers_alive <dbl>, and abbreviated variable names
## # ¹emerge_time, ²wet_weight, ³dry_weight, ⁴`alive?`
drones$order_on_sheet <- as.factor(drones$order_on_sheet)
drones$replicate <- as.factor(drones$replicate)
drones$colony <- as.factor(drones$colony)
drones$id <- as.factor(drones$id)
drones$relative_fat <- as.double(drones$relative_fat)
drones$radial <- as.double(drones$radial)
drones$`alive?` <- as.double(drones$`alive?`)
## Warning: NAs introduced by coercion
drf.pollen <- merge(wd.summary, drones, by.x = "colony") # this is a new data frame with average pollen consumption data per colony
drf.pollen$alive <- as.logical(drf.pollen$`alive?`)
drone.sum <- drf.pollen %>%
group_by(colony) %>%
summarise( count.drone = length(id))
drone.sum
## # A tibble: 40 × 2
## colony count.drone
## <fct> <int>
## 1 1.11R2 4
## 2 1.1R2 6
## 3 1.2R2 12
## 4 1.3R2 11
## 5 1.4R2 16
## 6 1.5R2 22
## 7 1.7R2 12
## 8 2.11R2 9
## 9 2.12R2 5
## 10 2.1R2 11
## # … with 30 more rows
drone.sum <- as.data.frame(drone.sum)
qro <- read_csv("qro.no1.csv")
## Rows: 45 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): colony, qro
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
qro <- as.data.frame(qro)
qro$colony <- as.factor(qro$colony)
qro$qro <- as.factor(qro$qro)
drf.pollen <- merge(drf.pollen, qro, by.x = "colony")
plot(drf.pollen$treatment, drf.pollen$emerge_time)
hist(drf.pollen$emerge_time)
emerge.gn <- glm(emerge_time ~ treatment + whole.mean + workers_alive + qro, data = drf.pollen, family = "poisson")
summary(emerge.gn) # underdispersed?
##
## Call:
## glm(formula = emerge_time ~ treatment + whole.mean + workers_alive +
## qro, family = "poisson", data = drf.pollen)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.26941 -0.44081 0.00457 0.34338 1.62521
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.730413 0.056809 65.665 < 2e-16 ***
## treatment2 -0.004578 0.025687 -0.178 0.858545
## treatment3 0.016647 0.027150 0.613 0.539780
## treatment4 -0.062109 0.026631 -2.332 0.019687 *
## treatment5 -0.048792 0.026431 -1.846 0.064887 .
## whole.mean -0.206148 0.059590 -3.459 0.000541 ***
## workers_alive 0.011857 0.010467 1.133 0.257301
## qroB3 -0.033837 0.028026 -1.207 0.227305
## qroB4 0.010593 0.029048 0.365 0.715359
## qroB5 0.051076 0.023569 2.167 0.030228 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 181.32 on 421 degrees of freedom
## Residual deviance: 143.80 on 412 degrees of freedom
## AIC: 2480.8
##
## Number of Fisher Scoring iterations: 4
emerge.gnb <- glm.nb(emerge_time ~ treatment + whole.mean + workers_alive + qro, data = drf.pollen)
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
summary(emerge.gnb)
##
## Call:
## glm.nb(formula = emerge_time ~ treatment + whole.mean + workers_alive +
## qro, data = drf.pollen, init.theta = 2094163.514, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.26940 -0.44081 0.00457 0.34338 1.62519
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.730413 0.056810 65.665 < 2e-16 ***
## treatment2 -0.004578 0.025688 -0.178 0.858548
## treatment3 0.016647 0.027150 0.613 0.539781
## treatment4 -0.062109 0.026631 -2.332 0.019688 *
## treatment5 -0.048792 0.026431 -1.846 0.064890 .
## whole.mean -0.206148 0.059591 -3.459 0.000541 ***
## workers_alive 0.011857 0.010468 1.133 0.257306
## qroB3 -0.033837 0.028027 -1.207 0.227309
## qroB4 0.010593 0.029048 0.365 0.715361
## qroB5 0.051076 0.023569 2.167 0.030229 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(2094164) family taken to be 1)
##
## Null deviance: 181.32 on 421 degrees of freedom
## Residual deviance: 143.80 on 412 degrees of freedom
## AIC: 2482.8
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 2094164
## Std. Err.: 16033036
## Warning while fitting theta: iteration limit reached
##
## 2 x log-likelihood: -2460.777
emerge.gn1 <- glm(emerge_time ~ treatment + whole.mean + workers_alive , data = drf.pollen, family = "poisson")
summary(emerge.gn1)
##
## Call:
## glm(formula = emerge_time ~ treatment + whole.mean + workers_alive,
## family = "poisson", data = drf.pollen)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.27518 -0.42982 -0.01115 0.29792 1.88381
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.810843 0.044280 86.063 < 2e-16 ***
## treatment2 -0.004077 0.025450 -0.160 0.8727
## treatment3 0.021528 0.026975 0.798 0.4248
## treatment4 -0.049250 0.024871 -1.980 0.0477 *
## treatment5 -0.035474 0.025769 -1.377 0.1686
## whole.mean -0.206292 0.050452 -4.089 4.33e-05 ***
## workers_alive -0.005011 0.008082 -0.620 0.5353
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 181.32 on 421 degrees of freedom
## Residual deviance: 151.22 on 415 degrees of freedom
## AIC: 2482.2
##
## Number of Fisher Scoring iterations: 4
emerge.gn2 <- glm(emerge_time ~ treatment + whole.mean + workers_alive + qro, data = drf.pollen, family = "poisson")
summary(emerge.gn2)
##
## Call:
## glm(formula = emerge_time ~ treatment + whole.mean + workers_alive +
## qro, family = "poisson", data = drf.pollen)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.26941 -0.44081 0.00457 0.34338 1.62521
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.730413 0.056809 65.665 < 2e-16 ***
## treatment2 -0.004578 0.025687 -0.178 0.858545
## treatment3 0.016647 0.027150 0.613 0.539780
## treatment4 -0.062109 0.026631 -2.332 0.019687 *
## treatment5 -0.048792 0.026431 -1.846 0.064887 .
## whole.mean -0.206148 0.059590 -3.459 0.000541 ***
## workers_alive 0.011857 0.010467 1.133 0.257301
## qroB3 -0.033837 0.028026 -1.207 0.227305
## qroB4 0.010593 0.029048 0.365 0.715359
## qroB5 0.051076 0.023569 2.167 0.030228 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 181.32 on 421 degrees of freedom
## Residual deviance: 143.80 on 412 degrees of freedom
## AIC: 2480.8
##
## Number of Fisher Scoring iterations: 4
emerge.gn3 <- glm(emerge_time ~ treatment + whole.mean + qro, data = drf.pollen, family = "poisson")
summary(emerge.gn3)
##
## Call:
## glm(formula = emerge_time ~ treatment + whole.mean + qro, family = "poisson",
## data = drf.pollen)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.25365 -0.44810 0.01175 0.35429 1.71104
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.779508 0.036762 102.811 < 2e-16 ***
## treatment2 -0.003302 0.025662 -0.129 0.897620
## treatment3 0.022325 0.026694 0.836 0.402973
## treatment4 -0.053069 0.025397 -2.090 0.036658 *
## treatment5 -0.039700 0.025212 -1.575 0.115332
## whole.mean -0.200260 0.059425 -3.370 0.000752 ***
## qroB3 -0.031801 0.027974 -1.137 0.255613
## qroB4 0.003560 0.028375 0.125 0.900166
## qroB5 0.034807 0.018749 1.856 0.063393 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 181.32 on 421 degrees of freedom
## Residual deviance: 145.09 on 413 degrees of freedom
## AIC: 2480.1
##
## Number of Fisher Scoring iterations: 4
emerge.gn4 <- glm(emerge_time ~ treatment, data = drf.pollen, family = "poisson")
summary(emerge.gn4)
##
## Call:
## glm(formula = emerge_time ~ treatment, family = "poisson", data = drf.pollen)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.42212 -0.40209 -0.06728 0.25644 2.19716
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.674316 0.017482 210.176 < 2e-16 ***
## treatment2 0.001985 0.025273 0.079 0.93741
## treatment3 0.004666 0.026233 0.178 0.85883
## treatment4 -0.065168 0.023636 -2.757 0.00583 **
## treatment5 -0.026259 0.024392 -1.077 0.28169
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 181.32 on 421 degrees of freedom
## Residual deviance: 168.69 on 417 degrees of freedom
## AIC: 2495.7
##
## Number of Fisher Scoring iterations: 4
emerge.gnb1 <- glm.nb(emerge_time ~ treatment, data = drf.pollen)
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
summary(emerge.gnb1)
##
## Call:
## glm.nb(formula = emerge_time ~ treatment, data = drf.pollen,
## init.theta = 1732930.259, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.42211 -0.40208 -0.06728 0.25643 2.19713
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.674316 0.017482 210.174 < 2e-16 ***
## treatment2 0.001985 0.025273 0.079 0.93741
## treatment3 0.004666 0.026233 0.178 0.85883
## treatment4 -0.065168 0.023636 -2.757 0.00583 **
## treatment5 -0.026259 0.024392 -1.077 0.28170
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(1732930) family taken to be 1)
##
## Null deviance: 181.32 on 421 degrees of freedom
## Residual deviance: 168.68 on 417 degrees of freedom
## AIC: 2497.7
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 1732930
## Std. Err.: 12677401
## Warning while fitting theta: iteration limit reached
##
## 2 x log-likelihood: -2485.665
Anova(emerge.gnb)
## Analysis of Deviance Table (Type II tests)
##
## Response: emerge_time
## LR Chisq Df Pr(>Chisq)
## treatment 13.0324 4 0.011118 *
## whole.mean 11.9515 1 0.000546 ***
## workers_alive 1.2853 1 0.256911
## qro 7.4178 3 0.059708 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emerge.em <- emmeans(emerge.gnb, "treatment")
pairs(emerge.em)
## contrast estimate SE df z.ratio p.value
## treatment1 - treatment2 0.00458 0.0257 Inf 0.178 0.9998
## treatment1 - treatment3 -0.01665 0.0272 Inf -0.613 0.9731
## treatment1 - treatment4 0.06211 0.0266 Inf 2.332 0.1347
## treatment1 - treatment5 0.04879 0.0264 Inf 1.846 0.3470
## treatment2 - treatment3 -0.02123 0.0275 Inf -0.771 0.9389
## treatment2 - treatment4 0.05753 0.0265 Inf 2.168 0.1918
## treatment2 - treatment5 0.04421 0.0261 Inf 1.696 0.4364
## treatment3 - treatment4 0.07876 0.0261 Inf 3.023 0.0211
## treatment3 - treatment5 0.06544 0.0273 Inf 2.400 0.1152
## treatment4 - treatment5 -0.01332 0.0258 Inf -0.516 0.9858
##
## Results are averaged over the levels of: qro
## Results are given on the log (not the response) scale.
## P value adjustment: tukey method for comparing a family of 5 estimates
hist(drf.pollen$radial)
shapiro.test(drf.pollen$radial)
##
## Shapiro-Wilk normality test
##
## data: drf.pollen$radial
## W = 0.98646, p-value = 0.0006803
drf.pollen <- na.omit(drf.pollen)
drf.pollen$logr <- log(drf.pollen$radial)
hist(drf.pollen$logr)
shapiro.test(drf.pollen$logr) # worse
##
## Shapiro-Wilk normality test
##
## data: drf.pollen$logr
## W = 0.96301, p-value = 3.223e-08
range(drf.pollen$radial)
## [1] 1.6924 3.1542
hist(drf.pollen$radial)
shapiro.test(drf.pollen$radial)
##
## Shapiro-Wilk normality test
##
## data: drf.pollen$radial
## W = 0.9833, p-value = 0.000217
range(drf.pollen$radial)
## [1] 1.6924 3.1542
Data is normal enough, use lmer
rad.g1 <- lmer(radial~ treatment + whole.mean + workers_alive + alive + emerge_time + qro + (1|colony), data = drf.pollen)
summary(rad.g1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## radial ~ treatment + whole.mean + workers_alive + alive + emerge_time +
## qro + (1 | colony)
## Data: drf.pollen
##
## REML criterion at convergence: -118.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.0129 -0.5602 0.0631 0.6033 3.6480
##
## Random effects:
## Groups Name Variance Std.Dev.
## colony (Intercept) 0.004667 0.06831
## Residual 0.035119 0.18740
## Number of obs: 381, groups: colony, 39
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 2.131450 0.232718 87.390136 9.159 2.05e-14 ***
## treatment2 -0.029660 0.048947 22.214703 -0.606 0.5507
## treatment3 -0.111055 0.051980 24.661311 -2.136 0.0428 *
## treatment4 0.024485 0.052514 22.856041 0.466 0.6455
## treatment5 -0.018120 0.051648 21.443459 -0.351 0.7291
## whole.mean 0.012284 0.122235 27.385521 0.100 0.9207
## workers_alive -0.006088 0.021341 20.302906 -0.285 0.7783
## aliveTRUE 0.284111 0.142949 328.809655 1.988 0.0477 *
## emerge_time 0.001807 0.003545 97.656369 0.510 0.6114
## qroB3 0.029097 0.053032 24.599687 0.549 0.5882
## qroB4 0.035231 0.058405 18.155479 0.603 0.5538
## qroB5 0.060713 0.048091 22.248301 1.262 0.2199
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trtmn2 trtmn3 trtmn4 trtmn5 whl.mn wrkrs_ alTRUE emrg_t
## treatment2 -0.175
## treatment3 -0.083 0.491
## treatment4 -0.094 0.506 0.508
## treatment5 -0.105 0.527 0.493 0.529
## whole.mean -0.270 0.039 -0.113 -0.174 0.103
## workers_alv -0.363 -0.060 -0.163 -0.232 -0.284 -0.041
## aliveTRUE -0.595 0.043 0.106 0.022 -0.006 -0.133 0.015
## emerge_time -0.631 0.071 0.022 0.186 0.141 0.168 -0.082 0.019
## qroB3 -0.078 0.101 0.044 0.075 0.197 -0.022 -0.084 0.059 0.043
## qroB4 -0.011 0.046 0.028 0.205 -0.041 -0.512 0.221 0.035 -0.025
## qroB5 -0.195 0.066 -0.090 -0.136 -0.120 0.035 0.620 -0.038 -0.170
## qroB3 qroB4
## treatment2
## treatment3
## treatment4
## treatment5
## whole.mean
## workers_alv
## aliveTRUE
## emerge_time
## qroB3
## qroB4 0.158
## qroB5 0.141 0.277
Anova(rad.g1)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: radial
## Chisq Df Pr(>Chisq)
## treatment 7.8642 4 0.09668 .
## whole.mean 0.0101 1 0.91995
## workers_alive 0.0814 1 0.77543
## alive 3.9502 1 0.04687 *
## emerge_time 0.2598 1 0.61029
## qro 1.7816 3 0.61894
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(resid(rad.g1)) +
abline(h=0, col="red", lwd=2)
## integer(0)
qqnorm(resid(rad.g1));qqline(resid(rad.g1))
rad.emm <- emmeans(rad.g1, "treatment")
pairs(rad.emm)
## contrast estimate SE df t.ratio p.value
## treatment1 - treatment2 0.0297 0.0491 23.9 0.604 0.9732
## treatment1 - treatment3 0.1111 0.0523 26.5 2.122 0.2406
## treatment1 - treatment4 -0.0245 0.0528 24.6 -0.464 0.9899
## treatment1 - treatment5 0.0181 0.0518 23.0 0.350 0.9966
## treatment2 - treatment3 0.0814 0.0513 28.6 1.586 0.5182
## treatment2 - treatment4 -0.0541 0.0508 25.1 -1.066 0.8219
## treatment2 - treatment5 -0.0115 0.0491 23.2 -0.235 0.9993
## treatment3 - treatment4 -0.1355 0.0522 26.3 -2.597 0.0999
## treatment3 - treatment5 -0.0929 0.0525 26.8 -1.771 0.4102
## treatment4 - treatment5 0.0426 0.0508 22.2 0.839 0.9155
##
## Results are averaged over the levels of: alive, qro
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 5 estimates
summary(rad.emm)
## treatment emmean SE df lower.CL upper.CL
## 1 2.36 0.0802 166 2.20 2.51
## 2 2.33 0.0780 174 2.17 2.48
## 3 2.24 0.0761 223 2.09 2.39
## 4 2.38 0.0802 159 2.22 2.54
## 5 2.34 0.0811 155 2.18 2.50
##
## Results are averaged over the levels of: alive, qro
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
plot(rad.emm)
plot(rad.emm, comparisons = TRUE)
rad.sum <- drf.pollen %>%
group_by(treatment) %>%
summarise(rad.m = mean(radial),
sd.rad = sd(radial),
nrad = length(radial)) %>%
mutate(serad = sd.rad / sqrt(nrad))
plot(drf.pollen$treatment, drf.pollen$radial)
rad.sum
## # A tibble: 5 × 5
## treatment rad.m sd.rad nrad serad
## <fct> <dbl> <dbl> <int> <dbl>
## 1 1 2.52 0.163 75 0.0188
## 2 2 2.45 0.183 75 0.0212
## 3 3 2.37 0.245 59 0.0319
## 4 4 2.50 0.222 89 0.0236
## 5 5 2.47 0.166 83 0.0182
The blue bars on the plot emmeans are the confidence intervals. The red arrow lines represent a scheme to determine homogeneous groups. If the red lines overlap for two groups, then they are not significantly different using the method chosen.
Based on the diagnostic plots I am going to make the decision that this model fits pretty well.
hist(drf.pollen$relative_fat)
shapiro.test(drf.pollen$relative_fat)
##
## Shapiro-Wilk normality test
##
## data: drf.pollen$relative_fat
## W = 0.71283, p-value < 2.2e-16
range(drf.pollen$relative_fat)
## [1] 0.0002 0.0112
which(drf.pollen$relative_fat %in% c(0.0112)) # same problem drone as dry weights
## [1] 49
drf.pollen <- drf.pollen[-49,]
range(drf.pollen$relative_fat)
## [1] 0.0002 0.0072
shapiro.test(drf.pollen$relative_fat)
##
## Shapiro-Wilk normality test
##
## data: drf.pollen$relative_fat
## W = 0.80183, p-value < 2.2e-16
hist(drf.pollen$relative_fat)
drf.pollen$logrf <- log(drf.pollen$relative_fat)
shapiro.test(drf.pollen$logrf)
##
## Shapiro-Wilk normality test
##
## data: drf.pollen$logrf
## W = 0.93346, p-value = 5.464e-12
hist(drf.pollen$logrf) # this looks a bit *more* normal I guess
rfglmer <- lmer(logrf ~ treatment + whole.mean + workers_alive + emerge_time +alive + (1|colony), data = drf.pollen)
summary(rfglmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: logrf ~ treatment + whole.mean + workers_alive + emerge_time +
## alive + (1 | colony)
## Data: drf.pollen
##
## REML criterion at convergence: 464.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.9708 -0.4762 0.0323 0.4907 3.4507
##
## Random effects:
## Groups Name Variance Std.Dev.
## colony (Intercept) 0.01063 0.1031
## Residual 0.17813 0.4221
## Number of obs: 380, groups: colony, 39
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -7.767428 0.470304 116.474208 -16.516 < 2e-16 ***
## treatment2 -0.066065 0.090507 27.535639 -0.730 0.47159
## treatment3 -0.297527 0.097064 30.619527 -3.065 0.00451 **
## treatment4 -0.096246 0.092875 25.147664 -1.036 0.30992
## treatment5 0.143382 0.092757 26.612367 1.546 0.13397
## whole.mean 0.330950 0.194098 34.019854 1.705 0.09730 .
## workers_alive -0.006639 0.029831 23.774362 -0.223 0.82577
## emerge_time 0.012599 0.007105 77.233011 1.773 0.08012 .
## aliveTRUE 0.747127 0.312429 355.652387 2.391 0.01731 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trtmn2 trtmn3 trtmn4 trtmn5 whl.mn wrkrs_ emrg_t
## treatment2 -0.165
## treatment3 -0.081 0.484
## treatment4 -0.137 0.527 0.503
## treatment5 -0.124 0.533 0.483 0.551
## whole.mean -0.282 0.068 -0.116 -0.040 0.131
## workers_alv -0.309 -0.102 -0.144 -0.219 -0.242 -0.050
## emerge_time -0.674 0.082 -0.010 0.177 0.129 0.254 0.047
## aliveTRUE -0.647 0.042 0.089 -0.002 -0.027 -0.130 0.061 -0.004
Anova(rfglmer)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: logrf
## Chisq Df Pr(>Chisq)
## treatment 22.2444 4 0.0001792 ***
## whole.mean 2.9073 1 0.0881819 .
## workers_alive 0.0495 1 0.8238715
## emerge_time 3.1446 1 0.0761793 .
## alive 5.7185 1 0.0167867 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(rfglmer)
plot(resid(rfglmer)) +
abline(h=0, col="red", lwd=2) #this looks good
## integer(0)
qqnorm(resid(rfglmer));qqline(resid(rfglmer)) # this does not :(
rf.sum <- drf.pollen %>%
group_by(treatment) %>%
summarise(mrf = mean(relative_fat),
sdrf = sd(relative_fat),
nrf = length(relative_fat)) %>%
mutate(serf = sdrf/sqrt(nrf))
rf.sum
## # A tibble: 5 × 5
## treatment mrf sdrf nrf serf
## <fct> <dbl> <dbl> <int> <dbl>
## 1 1 0.00199 0.00102 74 0.000119
## 2 2 0.00168 0.000559 75 0.0000645
## 3 3 0.00143 0.000582 59 0.0000758
## 4 4 0.00169 0.000755 89 0.0000800
## 5 5 0.00212 0.00110 83 0.000121
rfem <- emmeans(rfglmer, "treatment")
pairs(rfem)
## contrast estimate SE df t.ratio p.value
## treatment1 - treatment2 0.0661 0.0910 25.6 0.726 0.9486
## treatment1 - treatment3 0.2975 0.0978 28.4 3.041 0.0372
## treatment1 - treatment4 0.0962 0.0935 23.3 1.029 0.8394
## treatment1 - treatment5 -0.1434 0.0932 24.7 -1.539 0.5484
## treatment2 - treatment3 0.2315 0.0961 32.1 2.408 0.1390
## treatment2 - treatment4 0.0302 0.0897 25.6 0.336 0.9971
## treatment2 - treatment5 -0.2094 0.0888 26.6 -2.358 0.1584
## treatment3 - treatment4 -0.2013 0.0956 26.6 -2.105 0.2474
## treatment3 - treatment5 -0.4409 0.0972 29.3 -4.534 0.0008
## treatment4 - treatment5 -0.2396 0.0884 21.6 -2.709 0.0853
##
## Results are averaged over the levels of: alive
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 5 estimates
ggplot(data = rf.sum, aes(x = treatment, y = mrf, fill = treatment)) +
geom_col(position = "dodge", col = "black")+
coord_cartesian(ylim=c(0.0011, 0.0022)) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
geom_errorbar(aes(ymin = mrf - serf,
ymax = mrf + serf),
position = position_dodge(0.9), width = 0.4) +
labs(y = "Mean Relative Fat (g)",) +
ggtitle("Average Relative Fat (g) of Drones per Treatment") +
scale_x_discrete(name = "Treatment",
labels = c("0 PPB", "150 PPB", "1,500 PPB", "15,000 PPB", "150,000 PPB")) +
theme_cowplot() +
theme_classic(base_size = 12)
hist(drf.pollen$dry_weight)
shapiro.test(drf.pollen$dry_weight) # actually normal wow
##
## Shapiro-Wilk normality test
##
## data: drf.pollen$dry_weight
## W = 0.99386, p-value = 0.1279
dry.g1 <- lmer(dry_weight~ treatment + whole.mean + workers_alive + alive + emerge_time + qro + (1|colony), data = drf.pollen)
plot(drf.pollen$treatment, drf.pollen$dry_weight)
summary(dry.g1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: dry_weight ~ treatment + whole.mean + workers_alive + alive +
## emerge_time + qro + (1 | colony)
## Data: drf.pollen
##
## REML criterion at convergence: -2472.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.66221 -0.60404 -0.01673 0.64927 3.09019
##
## Random effects:
## Groups Name Variance Std.Dev.
## colony (Intercept) 3.839e-06 0.001959
## Residual 5.988e-05 0.007738
## Number of obs: 380, groups: colony, 39
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 2.945e-02 8.827e-03 8.176e+01 3.336 0.00128 **
## treatment2 -3.700e-03 1.697e-03 2.035e+01 -2.180 0.04113 *
## treatment3 -6.636e-03 1.817e-03 2.230e+01 -3.653 0.00138 **
## treatment4 -1.241e-04 1.823e-03 2.052e+01 -0.068 0.94640
## treatment5 -7.731e-04 1.785e-03 1.968e+01 -0.433 0.66970
## whole.mean 3.347e-04 4.310e-03 2.530e+01 0.078 0.93870
## workers_alive -9.352e-04 7.319e-04 1.797e+01 -1.278 0.21761
## aliveTRUE 1.226e-02 5.761e-03 3.418e+02 2.128 0.03404 *
## emerge_time 1.229e-04 1.341e-04 6.380e+01 0.917 0.36275
## qroB3 -4.817e-04 1.857e-03 2.409e+01 -0.259 0.79758
## qroB4 3.762e-03 1.985e-03 1.607e+01 1.895 0.07621 .
## qroB5 8.546e-05 1.663e-03 1.970e+01 0.051 0.95954
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trtmn2 trtmn3 trtmn4 trtmn5 whl.mn wrkrs_ alTRUE emrg_t
## treatment2 -0.178
## treatment3 -0.070 0.481
## treatment4 -0.100 0.504 0.507
## treatment5 -0.118 0.527 0.487 0.537
## whole.mean -0.262 0.042 -0.129 -0.180 0.114
## workers_alv -0.316 -0.054 -0.168 -0.249 -0.291 -0.047
## aliveTRUE -0.630 0.048 0.098 0.021 -0.006 -0.131 0.017
## emerge_time -0.626 0.077 0.013 0.204 0.160 0.189 -0.093 0.009
## qroB3 -0.097 0.113 0.049 0.080 0.208 0.003 -0.084 0.069 0.050
## qroB4 -0.011 0.058 0.037 0.220 -0.032 -0.512 0.219 0.037 -0.026
## qroB5 -0.160 0.065 -0.085 -0.149 -0.116 0.035 0.623 -0.031 -0.185
## qroB3 qroB4
## treatment2
## treatment3
## treatment4
## treatment5
## whole.mean
## workers_alv
## aliveTRUE
## emerge_time
## qroB3
## qroB4 0.153
## qroB5 0.145 0.279
Anova(dry.g1)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: dry_weight
## Chisq Df Pr(>Chisq)
## treatment 20.7427 4 0.0003561 ***
## whole.mean 0.0060 1 0.9380898
## workers_alive 1.6325 1 0.2013566
## alive 4.5288 1 0.0333296 *
## emerge_time 0.8403 1 0.3593005
## qro 4.0927 3 0.2516228
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(resid(dry.g1)) +
abline(h=0, col="red", lwd=2)
## integer(0)
qqnorm(resid(dry.g1));qqline(resid(dry.g1)) #diagnostics look good
dry.emm <- emmeans(dry.g1, "treatment")
pairs(dry.emm)
## contrast estimate SE df t.ratio p.value
## treatment1 - treatment2 0.003700 0.00171 22.7 2.166 0.2280
## treatment1 - treatment3 0.006636 0.00183 24.9 3.621 0.0105
## treatment1 - treatment4 0.000124 0.00184 22.9 0.068 1.0000
## treatment1 - treatment5 0.000773 0.00179 22.0 0.431 0.9923
## treatment2 - treatment3 0.002936 0.00181 27.3 1.623 0.4964
## treatment2 - treatment4 -0.003576 0.00177 23.6 -2.021 0.2869
## treatment2 - treatment5 -0.002927 0.00170 22.8 -1.721 0.4416
## treatment3 - treatment4 -0.006512 0.00182 24.4 -3.571 0.0120
## treatment3 - treatment5 -0.005863 0.00184 25.7 -3.188 0.0282
## treatment4 - treatment5 0.000649 0.00175 20.2 0.372 0.9956
##
## Results are averaged over the levels of: alive, qro
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 5 estimates
plot(dry.emm, comparisons = TRUE)
dry.sum <- drf.pollen %>%
group_by(treatment) %>%
summarise(dry.m = mean(dry_weight),
dry.sd = sd(dry_weight),
dryn = length(dry_weight)) %>%
mutate(sedry = dry.sd/sqrt(dryn))
dry.sum
## # A tibble: 5 × 5
## treatment dry.m dry.sd dryn sedry
## <fct> <dbl> <dbl> <int> <dbl>
## 1 1 0.0445 0.00811 74 0.000942
## 2 2 0.0396 0.00836 75 0.000966
## 3 3 0.0365 0.00844 59 0.00110
## 4 4 0.0417 0.00845 89 0.000895
## 5 5 0.0422 0.00701 83 0.000770
ggplot(data = dry.sum, aes(x = treatment, y = dry.m, fill = treatment)) +
geom_col(col = "black")+
coord_cartesian(ylim=c(0.03, 0.05)) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
geom_errorbar(aes(ymin = dry.m - sedry,
ymax = dry.m + sedry),
position = position_dodge(0.9), width = 0.4) +
labs(y = "Mean Drone Dry Weight (g)",) +
ggtitle("Average Drone Dry Weight (g) per Treatment") +
scale_x_discrete(name = "Treatment",
labels = c("0 PPB", "150 PPB", "1,500 PPB", "15,000 PPB", "150,000 PPB")) +
theme_cowplot()+
theme_classic(base_size = 12)