Start by imputing pollen data and creating a new data frame with average pollen consumption on a per-colony basis
### Figure out average pollen consumption by treatment
pollen <- read_csv("pollen1.csv", col_types = cols(round = col_factor(levels = c("1",
"2")), treatment = col_factor(levels = c("1",
"2", "3", "4", "5")), replicate = col_factor(levels = c("1",
"2", "3", "4", "5", "6", "7", "9", "11",
"12")), start_date = col_date(format = "%m/%d/%Y"),
start_time = col_character(), end_date = col_date(format = "%m/%d/%Y"),
end_time = col_character()))
pollen$colony <- as.factor(pollen$colony)
pollen$pid <- as.factor(pollen$pid)
pollen$count <- as.factor(pollen$count)
pollen <- na.omit(pollen)
range(pollen$difference)
## [1] -0.98780 1.56542
# get rid of negative numbers
pollen$difference[pollen$difference < 0] <- NA
pollen <- na.omit(pollen)
range(pollen$difference)
## [1] 0.002715 1.565420
pollen$whole_dif <- as.numeric(pollen$difference)
wd.1 <- lm(difference ~ treatment, data = pollen)
summary(wd.1)
##
## Call:
## lm(formula = difference ~ treatment, data = pollen)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5268 -0.2476 -0.1363 0.1874 1.0576
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.460679 0.021143 21.789 < 2e-16 ***
## treatment2 0.047174 0.030208 1.562 0.118630
## treatment3 0.100480 0.029931 3.357 0.000812 ***
## treatment4 0.053390 0.029931 1.784 0.074703 .
## treatment5 -0.001879 0.030272 -0.062 0.950524
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3356 on 1231 degrees of freedom
## Multiple R-squared: 0.01281, Adjusted R-squared: 0.009604
## F-statistic: 3.994 on 4 and 1231 DF, p-value: 0.003177
wd.emm <- emmeans(wd.1, "treatment")
summary(wd.emm)
## treatment emmean SE df lower.CL upper.CL
## 1 0.461 0.0211 1231 0.419 0.502
## 2 0.508 0.0216 1231 0.466 0.550
## 3 0.561 0.0212 1231 0.520 0.603
## 4 0.514 0.0212 1231 0.473 0.556
## 5 0.459 0.0217 1231 0.416 0.501
##
## Confidence level used: 0.95
wd.summary <- pollen %>%
group_by(colony) %>%
summarize(whole.mean = mean(difference))
as.data.frame(wd.summary) # this is the data frame I will merge with subsequent data frames to contain average pollen consumption per colony as a new column
## colony whole.mean
## 1 1.11R2 0.2829509
## 2 1.12R2 0.1697964
## 3 1.1R1 0.8049667
## 4 1.1R2 0.5213458
## 5 1.2R1 0.4704294
## 6 1.2R2 0.3374200
## 7 1.3R1 0.3910053
## 8 1.3R2 0.4512891
## 9 1.4R2 0.6063016
## 10 1.5R2 0.7079516
## 11 1.7R2 0.7400381
## 12 1.9R2 0.2240081
## 13 2.11R2 0.4178270
## 14 2.12R2 0.4035568
## 15 2.1R1 0.7282895
## 16 2.1R2 0.6101589
## 17 2.2R1 0.4663045
## 18 2.2R2 0.5109234
## 19 2.3R1 0.4052000
## 20 2.3R2 0.3280036
## 21 2.4R2 0.3881394
## 22 2.5R2 0.7194915
## 23 2.7R2 0.5299685
## 24 2.9R2 0.5857152
## 25 3.11R2 0.4216410
## 26 3.12R2 0.3390993
## 27 3.1R1 0.8014682
## 28 3.1R2 0.3711948
## 29 3.2R1 0.8020500
## 30 3.2R2 0.3461010
## 31 3.3R1 0.5873429
## 32 3.3R2 0.8465806
## 33 3.4R2 0.4120433
## 34 3.5R2 0.8906211
## 35 3.7R2 0.6266680
## 36 3.9R2 0.4409511
## 37 4.11R2 0.3456011
## 38 4.12R2 0.2496295
## 39 4.1R1 0.8837867
## 40 4.1R2 0.7074755
## 41 4.2R1 0.3319238
## 42 4.2R2 0.3871742
## 43 4.3R1 0.3944500
## 44 4.3R2 0.5800074
## 45 4.4R2 0.8356247
## 46 4.5R2 0.2335967
## 47 4.7R2 0.6237400
## 48 4.9R2 0.5322950
## 49 5.11R2 0.4113960
## 50 5.12R2 0.2077741
## 51 5.1R1 0.6799737
## 52 5.1R2 0.3782286
## 53 5.2R1 0.7140056
## 54 5.2R2 0.4912468
## 55 5.3R1 0.2857654
## 56 5.3R2 0.2128778
## 57 5.4R2 0.4563045
## 58 5.5R2 0.7095479
## 59 5.7R2 0.6113189
## 60 5.9R2 0.4188290
drones <- read_csv("drones_rf.csv", col_types = cols(round = col_factor(levels = c("1","2")), treatment = col_factor(levels = c("1","2", "3", "4", "5")), notes = col_skip(), colony_start = col_skip(), colony_count = col_skip(), alive = col_skip(), emerge_date = col_skip()))
drones$order_on_sheet <- as.factor(drones$order_on_sheet)
drones$replicate <- as.factor(drones$replicate)
drones$colony <- as.factor(drones$colony)
drones$id <- as.factor(drones$id)
drones$relative_fat <- as.double(drones$relative_fat)
drones$radial <- as.double(drones$radial)
drones$`alive?` <- as.double(drones$`alive?`)
drf.pollen <- merge(wd.summary, drones, by.x = "colony") # this is a new data frame with average pollen consumption data per colony
drf.pollen$alive <- as.logical(drf.pollen$`alive?`)
drone.sum <- drf.pollen %>%
group_by(colony) %>%
summarise( count.drone = length(id))
drone.sum
## # A tibble: 54 × 2
## colony count.drone
## <fct> <int>
## 1 1.11R2 4
## 2 1.1R1 1
## 3 1.1R2 6
## 4 1.2R1 1
## 5 1.2R2 12
## 6 1.3R1 2
## 7 1.3R2 11
## 8 1.4R2 16
## 9 1.5R2 22
## 10 1.7R2 12
## # … with 44 more rows
drone.sum <- as.data.frame(drone.sum)
qro <- read_csv("qro.csv")
qro$colony <- as.factor(qro$colony)
qro$qro <- as.factor(qro$qro)
drf.pollen <- merge(drf.pollen, qro, by.x = "colony")
plot(drf.pollen$treatment, drf.pollen$emerge_time)
hist(drf.pollen$emerge_time)
emerge.gn <- glm(emerge_time ~ treatment + whole.mean + workers_alive + round + qro, data = drf.pollen, family = "poisson")
summary(emerge.gn) # underdispersed?
##
## Call:
## glm(formula = emerge_time ~ treatment + whole.mean + workers_alive +
## round + qro, family = "poisson", data = drf.pollen)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.23141 -0.40861 -0.00276 0.33278 2.96846
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.785193 0.085306 44.372 < 2e-16 ***
## treatment2 0.002780 0.024700 0.113 0.91037
## treatment3 0.019619 0.026205 0.749 0.45407
## treatment4 -0.052105 0.025937 -2.009 0.04455 *
## treatment5 -0.040382 0.024897 -1.622 0.10481
## whole.mean -0.185521 0.056480 -3.285 0.00102 **
## workers_alive 0.009946 0.010105 0.984 0.32501
## round2 -0.063224 0.066156 -0.956 0.33923
## qroB3 -0.033022 0.027944 -1.182 0.23733
## qroB4 0.007622 0.028554 0.267 0.78952
## qroB5 0.049046 0.023344 2.101 0.03564 *
## qroK1 -0.040369 0.070402 -0.573 0.56637
## qroK2/K1 -0.151610 0.134680 -1.126 0.26029
## qroK3 -0.036130 0.174642 -0.207 0.83610
## qroK3/K2 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 198.74 on 468 degrees of freedom
## Residual deviance: 162.75 on 455 degrees of freedom
## AIC: 2766.3
##
## Number of Fisher Scoring iterations: 4
emerge.gnb <- glm.nb(emerge_time ~ treatment + whole.mean + workers_alive + round + qro, data = drf.pollen)
summary(emerge.gnb)
##
## Call:
## glm.nb(formula = emerge_time ~ treatment + whole.mean + workers_alive +
## round + qro, data = drf.pollen, init.theta = 2024608.931,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.23140 -0.40860 -0.00276 0.33278 2.96843
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.785193 0.085307 44.372 < 2e-16 ***
## treatment2 0.002781 0.024700 0.113 0.91037
## treatment3 0.019619 0.026206 0.749 0.45407
## treatment4 -0.052105 0.025937 -2.009 0.04455 *
## treatment5 -0.040382 0.024897 -1.622 0.10482
## whole.mean -0.185521 0.056481 -3.285 0.00102 **
## workers_alive 0.009946 0.010105 0.984 0.32502
## round2 -0.063224 0.066156 -0.956 0.33924
## qroB3 -0.033022 0.027945 -1.182 0.23733
## qroB4 0.007622 0.028554 0.267 0.78952
## qroB5 0.049046 0.023344 2.101 0.03564 *
## qroK1 -0.040369 0.070403 -0.573 0.56637
## qroK2/K1 -0.151610 0.134681 -1.126 0.26029
## qroK3 -0.036130 0.174644 -0.207 0.83610
## qroK3/K2 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(2024609) family taken to be 1)
##
## Null deviance: 198.74 on 468 degrees of freedom
## Residual deviance: 162.75 on 455 degrees of freedom
## AIC: 2768.3
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 2024609
## Std. Err.: 14525573
## Warning while fitting theta: iteration limit reached
##
## 2 x log-likelihood: -2738.282
emerge.gn1 <- glm(emerge_time ~ treatment + whole.mean + workers_alive + round, data = drf.pollen, family = "poisson")
summary(emerge.gn1)
##
## Call:
## glm(formula = emerge_time ~ treatment + whole.mean + workers_alive +
## round, family = "poisson", data = drf.pollen)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.26850 -0.43520 -0.01592 0.33432 2.95424
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.815588 0.052952 72.057 < 2e-16 ***
## treatment2 0.001649 0.024369 0.068 0.9461
## treatment3 0.027795 0.025593 1.086 0.2775
## treatment4 -0.040113 0.024225 -1.656 0.0978 .
## treatment5 -0.030510 0.024366 -1.252 0.2105
## whole.mean -0.188356 0.047815 -3.939 8.17e-05 ***
## workers_alive -0.005665 0.007888 -0.718 0.4727
## round2 -0.017421 0.025477 -0.684 0.4941
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 198.74 on 468 degrees of freedom
## Residual deviance: 171.16 on 461 degrees of freedom
## AIC: 2762.7
##
## Number of Fisher Scoring iterations: 4
emerge.gn2 <- glm(emerge_time ~ treatment + whole.mean + workers_alive + qro, data = drf.pollen, family = "poisson")
summary(emerge.gn2)
##
## Call:
## glm(formula = emerge_time ~ treatment + whole.mean + workers_alive +
## qro, family = "poisson", data = drf.pollen)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.23141 -0.40861 -0.00276 0.33278 2.96846
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.721969 0.055359 67.234 < 2e-16 ***
## treatment2 0.002780 0.024700 0.113 0.91037
## treatment3 0.019619 0.026205 0.749 0.45407
## treatment4 -0.052105 0.025937 -2.009 0.04455 *
## treatment5 -0.040382 0.024897 -1.622 0.10481
## whole.mean -0.185521 0.056480 -3.285 0.00102 **
## workers_alive 0.009946 0.010105 0.984 0.32501
## qroB3 -0.033022 0.027944 -1.182 0.23733
## qroB4 0.007622 0.028554 0.267 0.78952
## qroB5 0.049046 0.023344 2.101 0.03564 *
## qroK1 0.022855 0.030350 0.753 0.45142
## qroK2/K1 -0.088387 0.118021 -0.749 0.45391
## qroK3 0.027093 0.162039 0.167 0.86721
## qroK3/K2 0.063224 0.066156 0.956 0.33923
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 198.74 on 468 degrees of freedom
## Residual deviance: 162.75 on 455 degrees of freedom
## AIC: 2766.3
##
## Number of Fisher Scoring iterations: 4
emerge.gn3 <- glm(emerge_time ~ treatment + whole.mean + round + qro, data = drf.pollen, family = "poisson")
summary(emerge.gn3)
##
## Call:
## glm(formula = emerge_time ~ treatment + whole.mean + round +
## qro, family = "poisson", data = drf.pollen)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.23606 -0.44454 0.00097 0.33316 2.96095
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.828068 0.073356 52.185 < 2e-16 ***
## treatment2 0.004152 0.024659 0.168 0.86627
## treatment3 0.024330 0.025772 0.944 0.34514
## treatment4 -0.044723 0.024822 -1.802 0.07158 .
## treatment5 -0.034408 0.024166 -1.424 0.15450
## whole.mean -0.181875 0.056413 -3.224 0.00126 **
## round2 -0.063783 0.066154 -0.964 0.33496
## qroB3 -0.031688 0.027914 -1.135 0.25630
## qroB4 0.001903 0.027958 0.068 0.94573
## qroB5 0.035262 0.018729 1.883 0.05973 .
## qroK1 -0.042958 0.070361 -0.611 0.54151
## qroK2/K1 -0.147606 0.134618 -1.096 0.27287
## qroK3 -0.048042 0.174220 -0.276 0.78274
## qroK3/K2 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 198.74 on 468 degrees of freedom
## Residual deviance: 163.72 on 456 degrees of freedom
## AIC: 2765.2
##
## Number of Fisher Scoring iterations: 4
emerge.gn4 <- glm(emerge_time ~ treatment, data = drf.pollen, family = "poisson")
summary(emerge.gn4)
##
## Call:
## glm(formula = emerge_time ~ treatment, family = "poisson", data = drf.pollen)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.43326 -0.39314 -0.04224 0.27845 2.72928
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.670317 0.017110 214.517 <2e-16 ***
## treatment2 0.004544 0.024101 0.189 0.8505
## treatment3 0.010505 0.024574 0.427 0.6690
## treatment4 -0.055026 0.023134 -2.379 0.0174 *
## treatment5 -0.025789 0.023430 -1.101 0.2710
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 198.74 on 468 degrees of freedom
## Residual deviance: 187.44 on 464 degrees of freedom
## AIC: 2773
##
## Number of Fisher Scoring iterations: 4
emerge.gnb1 <- glm.nb(emerge_time ~ treatment, data = drf.pollen)
summary(emerge.gnb1)
##
## Call:
## glm.nb(formula = emerge_time ~ treatment, data = drf.pollen,
## init.theta = 1721174.009, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.43325 -0.39314 -0.04224 0.27844 2.72924
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.670317 0.017110 214.515 <2e-16 ***
## treatment2 0.004544 0.024101 0.189 0.8505
## treatment3 0.010505 0.024574 0.427 0.6690
## treatment4 -0.055026 0.023134 -2.379 0.0174 *
## treatment5 -0.025789 0.023430 -1.101 0.2710
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(1721174) family taken to be 1)
##
## Null deviance: 198.74 on 468 degrees of freedom
## Residual deviance: 187.44 on 464 degrees of freedom
## AIC: 2775
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 1721174
## Std. Err.: 11896922
## Warning while fitting theta: iteration limit reached
##
## 2 x log-likelihood: -2762.974
Anova(emerge.gnb)
## Analysis of Deviance Table (Type II tests)
##
## Response: emerge_time
## LR Chisq Df Pr(>Chisq)
## treatment 12.0116 4 0.017265 *
## whole.mean 10.7606 1 0.001037 **
## workers_alive 0.9700 1 0.324672
## round 0
## qro 8.4107 6 0.209528
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emerge.em <- emmeans(emerge.gnb, "treatment")
pairs(emerge.em)
## contrast estimate SE df z.ratio p.value
## treatment1 - treatment2 -0.00278 0.0247 Inf -0.113 1.0000
## treatment1 - treatment3 -0.01962 0.0262 Inf -0.749 0.9449
## treatment1 - treatment4 0.05210 0.0259 Inf 2.009 0.2616
## treatment1 - treatment5 0.04038 0.0249 Inf 1.622 0.4832
## treatment2 - treatment3 -0.01684 0.0259 Inf -0.651 0.9665
## treatment2 - treatment4 0.05489 0.0252 Inf 2.174 0.1895
## treatment2 - treatment5 0.04316 0.0241 Inf 1.793 0.3778
## treatment3 - treatment4 0.07172 0.0250 Inf 2.873 0.0331
## treatment3 - treatment5 0.06000 0.0255 Inf 2.354 0.1282
## treatment4 - treatment5 -0.01172 0.0246 Inf -0.477 0.9895
##
## Results are averaged over the levels of: qro, round
## Results are given on the log (not the response) scale.
## P value adjustment: tukey method for comparing a family of 5 estimates
hist(drf.pollen$radial)
shapiro.test(drf.pollen$radial)
##
## Shapiro-Wilk normality test
##
## data: drf.pollen$radial
## W = 0.98605, p-value = 0.0002584
drf.pollen <- na.omit(drf.pollen)
drf.pollen$logr <- log(drf.pollen$radial)
hist(drf.pollen$logr)
shapiro.test(drf.pollen$logr) # worse
##
## Shapiro-Wilk normality test
##
## data: drf.pollen$logr
## W = 0.96195, p-value = 6.173e-09
range(drf.pollen$radial)
## [1] 1.6924 3.1542
hist(drf.pollen$radial)
shapiro.test(drf.pollen$radial)
##
## Shapiro-Wilk normality test
##
## data: drf.pollen$radial
## W = 0.98282, p-value = 7.268e-05
range(drf.pollen$radial)
## [1] 1.6924 3.1542
Data is normal enough, use lmer
rad.g1 <- lmer(radial~ treatment + whole.mean + workers_alive + alive + emerge_time + round + qro + (1|colony), data = drf.pollen)
summary(rad.g1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## radial ~ treatment + whole.mean + workers_alive + alive + emerge_time +
## round + qro + (1 | colony)
## Data: drf.pollen
##
## REML criterion at convergence: -126.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9837 -0.5509 0.0389 0.5956 3.5865
##
## Random effects:
## Groups Name Variance Std.Dev.
## colony (Intercept) 0.007254 0.08517
## Residual 0.034378 0.18541
## Number of obs: 418, groups: colony, 51
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 2.366310 0.253370 61.702390 9.339 2.05e-13 ***
## treatment2 -0.029409 0.051845 25.039638 -0.567 0.5756
## treatment3 -0.111904 0.055361 28.026629 -2.021 0.0529 .
## treatment4 -0.034054 0.055458 27.002621 -0.614 0.5443
## treatment5 -0.038272 0.053243 25.221179 -0.719 0.4789
## whole.mean 0.145428 0.122057 32.847978 1.191 0.2420
## workers_alive 0.001636 0.023319 22.012207 0.070 0.9447
## aliveTRUE 0.209244 0.103027 372.383541 2.031 0.0430 *
## emerge_time 0.001515 0.003400 148.506085 0.446 0.6565
## round2 -0.238790 0.127626 36.085475 -1.871 0.0695 .
## qroB3 0.019311 0.059818 23.718156 0.323 0.7497
## qroB4 -0.003562 0.065061 19.690133 -0.055 0.9569
## qroB5 0.077464 0.053741 23.247978 1.441 0.1628
## qroK1 -0.197545 0.134460 38.392230 -1.469 0.1499
## qroK2/K1 0.048180 0.204870 63.409058 0.235 0.8148
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 1 column / coefficient
Anova(rad.g1)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: radial
## Chisq Df Pr(>Chisq)
## treatment 4.4362 4 0.35018
## whole.mean 1.4196 1 0.23347
## workers_alive 0.0049 1 0.94405
## alive 4.1248 1 0.04226 *
## emerge_time 0.1986 1 0.65582
## round 3.5007 1 0.06134 .
## qro 6.2139 5 0.28596
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(resid(rad.g1)) +
abline(h=0, col="red", lwd=2)
## integer(0)
qqnorm(resid(rad.g1));qqline(resid(rad.g1))
rad.emm <- emmeans(rad.g1, "treatment")
pairs(rad.emm)
## contrast estimate SE df t.ratio p.value
## treatment1 - treatment2 0.02941 0.0521 31.0 0.565 0.9792
## treatment1 - treatment3 0.11190 0.0557 34.6 2.009 0.2831
## treatment1 - treatment4 0.03405 0.0558 33.4 0.610 0.9725
## treatment1 - treatment5 0.03827 0.0535 31.2 0.716 0.9513
## treatment2 - treatment3 0.08249 0.0542 34.4 1.521 0.5563
## treatment2 - treatment4 0.00464 0.0533 32.0 0.087 1.0000
## treatment2 - treatment5 0.00886 0.0509 28.8 0.174 0.9998
## treatment3 - treatment4 -0.07785 0.0555 33.9 -1.402 0.6303
## treatment3 - treatment5 -0.07363 0.0550 33.5 -1.338 0.6703
## treatment4 - treatment5 0.00422 0.0533 29.9 0.079 1.0000
##
## Results are averaged over the levels of: alive, qro, round
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 5 estimates
summary(rad.emm)
## treatment emmean SE df lower.CL upper.CL
## 1 2.49 0.0716 118 2.35 2.63
## 2 2.46 0.0690 135 2.32 2.60
## 3 2.38 0.0664 146 2.25 2.51
## 4 2.45 0.0744 108 2.31 2.60
## 5 2.45 0.0744 112 2.30 2.60
##
## Results are averaged over the levels of: alive, qro, round
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
plot(rad.emm, comparisons = TRUE)
rad.sum <- drf.pollen %>%
group_by(treatment) %>%
summarise(rad.m = mean(radial),
sd.rad = sd(radial),
nrad = length(radial))
plot(drf.pollen$treatment, drf.pollen$radial)
rad.sum
## # A tibble: 5 × 4
## treatment rad.m sd.rad nrad
## <fct> <dbl> <dbl> <int>
## 1 1 2.52 0.166 79
## 2 2 2.47 0.184 86
## 3 3 2.40 0.243 69
## 4 4 2.49 0.232 92
## 5 5 2.47 0.167 92
The blue bars on the plot emmeans are the confidence intervals. The red arrow lines represent a scheme to determine homogeneous groups. If the red lines overlap for two groups, then they are not significantly different using the method chosen.
Based on the diagnostic plots I am going to make the decision that this model fits pretty well.
hist(drf.pollen$relative_fat)
shapiro.test(drf.pollen$relative_fat)
##
## Shapiro-Wilk normality test
##
## data: drf.pollen$relative_fat
## W = 0.72228, p-value < 2.2e-16
range(drf.pollen$relative_fat)
## [1] 0.0002 0.0112
which(drf.pollen$relative_fat %in% c(0.0112)) # same problem drone as dry weights
## [1] 63
drf.pollen <- drf.pollen[-61,]
range(drf.pollen$relative_fat)
## [1] 0.0002 0.0112
shapiro.test(drf.pollen$relative_fat)
##
## Shapiro-Wilk normality test
##
## data: drf.pollen$relative_fat
## W = 0.72128, p-value < 2.2e-16
hist(drf.pollen$relative_fat)
drf.pollen$logrf <- log(drf.pollen$relative_fat)
shapiro.test(drf.pollen$logrf)
##
## Shapiro-Wilk normality test
##
## data: drf.pollen$logrf
## W = 0.94399, p-value = 1.912e-11
hist(drf.pollen$logrf) # this looks a bit *more* normal I guess, I'm going to stick with glmer though
rfglmer <- glmer(logrf ~ treatment + whole.mean + workers_alive + emerge_time + round +alive + (1|colony), data = drf.pollen)
summary(rfglmer)
## Linear mixed model fit by REML ['lmerMod']
## Formula: logrf ~ treatment + whole.mean + workers_alive + emerge_time +
## round + alive + (1 | colony)
## Data: drf.pollen
##
## REML criterion at convergence: 509.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.9606 -0.4851 0.0528 0.4817 4.0401
##
## Random effects:
## Groups Name Variance Std.Dev.
## colony (Intercept) 0.0137 0.1170
## Residual 0.1758 0.4193
## Number of obs: 417, groups: colony, 51
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -8.227893 0.412260 -19.958
## treatment2 -0.062856 0.089155 -0.705
## treatment3 -0.270002 0.094969 -2.843
## treatment4 -0.099254 0.093154 -1.065
## treatment5 0.111941 0.090718 1.234
## whole.mean 0.237731 0.186112 1.277
## workers_alive -0.001905 0.030711 -0.062
## emerge_time 0.010166 0.006689 1.520
## round2 0.431498 0.089127 4.841
## aliveTRUE 0.907953 0.222761 4.076
##
## Correlation of Fixed Effects:
## (Intr) trtmn2 trtmn3 trtmn4 trtmn5 whl.mn wrkrs_ emrg_t round2
## treatment2 -0.114
## treatment3 -0.022 0.511
## treatment4 -0.068 0.532 0.507
## treatment5 -0.103 0.550 0.504 0.544
## whole.mean -0.337 0.036 -0.139 -0.064 0.067
## workers_alv -0.385 -0.114 -0.148 -0.208 -0.185 -0.021
## emerge_time -0.720 0.048 -0.055 0.113 0.094 0.189 0.052
## round2 -0.257 0.064 0.035 -0.049 0.042 0.181 0.108 0.077
## aliveTRUE -0.490 -0.035 0.043 -0.049 -0.072 -0.115 0.063 -0.006 -0.129
Anova(rfglmer)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: logrf
## Chisq Df Pr(>Chisq)
## treatment 18.3306 4 0.001063 **
## whole.mean 1.6316 1 0.201477
## workers_alive 0.0038 1 0.950538
## emerge_time 2.3100 1 0.128545
## round 23.4392 1 1.289e-06 ***
## alive 16.6129 1 4.584e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(rfglmer)
plot(resid(rfglmer)) +
abline(h=0, col="red", lwd=2) #this looks good
## integer(0)
qqnorm(resid(rfglmer));qqline(resid(rfglmer)) # this does not :(
rf.sum <- drf.pollen %>%
group_by(treatment) %>%
summarise(mrf = mean(relative_fat),
sdrf = sd(relative_fat),
nrf = length(relative_fat)) %>%
mutate(serf = sdrf/sqrt(nrf))
rf.sum
## # A tibble: 5 × 5
## treatment mrf sdrf nrf serf
## <fct> <dbl> <dbl> <int> <dbl>
## 1 1 0.00204 0.00147 78 0.000167
## 2 2 0.00161 0.000562 86 0.0000606
## 3 3 0.00137 0.000575 69 0.0000692
## 4 4 0.00167 0.000749 92 0.0000781
## 5 5 0.00203 0.00109 92 0.000114
rfem <- emmeans(rfglmer, "treatment")
pairs(rfem)
## contrast estimate SE df t.ratio p.value
## treatment1 - treatment2 0.0629 0.0898 32.6 0.700 0.9550
## treatment1 - treatment3 0.2700 0.0958 36.8 2.820 0.0559
## treatment1 - treatment4 0.0993 0.0939 30.1 1.056 0.8269
## treatment1 - treatment5 -0.1119 0.0913 31.6 -1.226 0.7363
## treatment2 - treatment3 0.2071 0.0918 39.2 2.256 0.1811
## treatment2 - treatment4 0.0364 0.0888 31.4 0.410 0.9938
## treatment2 - treatment5 -0.1748 0.0857 32.1 -2.040 0.2706
## treatment3 - treatment4 -0.1707 0.0942 33.7 -1.813 0.3833
## treatment3 - treatment5 -0.3819 0.0931 36.6 -4.102 0.0019
## treatment4 - treatment5 -0.2112 0.0884 27.3 -2.390 0.1484
##
## Results are averaged over the levels of: round, alive
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 5 estimates
ggplot(data = rf.sum, aes(x = treatment, y = mrf, fill = treatment)) +
geom_col(position = "dodge", col = "black")+
coord_cartesian(ylim=c(0.0011, 0.0022)) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
geom_errorbar(aes(ymin = mrf - serf,
ymax = mrf + serf),
position = position_dodge(0.9), width = 0.4) +
labs(y = "Mean Relative Fat (g)",) +
ggtitle("Average Relative Fat (g) of Drones per Treatment") +
scale_x_discrete(name = "Treatment",
labels = c("0 PPB", "150 PPB", "1,500 PPB", "15,000 PPB", "150,000 PPB")) +
theme_cowplot() +
theme_classic(base_size = 12)
## Drone Dry Weight
hist(drf.pollen$dry_weight)
shapiro.test(drf.pollen$dry_weight) # actually normal wow
##
## Shapiro-Wilk normality test
##
## data: drf.pollen$dry_weight
## W = 0.27303, p-value < 2.2e-16
dry.g1 <- lmer(dry_weight~ treatment + whole.mean + workers_alive + alive + emerge_time + round + qro + (1|colony), data = drf.pollen)
plot(drf.pollen$treatment, drf.pollen$dry_weight)
summary(dry.g1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: dry_weight ~ treatment + whole.mean + workers_alive + alive +
## emerge_time + round + qro + (1 | colony)
## Data: drf.pollen
##
## REML criterion at convergence: -1880.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.1372 -0.2936 -0.0309 0.2002 18.3707
##
## Random effects:
## Groups Name Variance Std.Dev.
## colony (Intercept) 0.0000000 0.00000
## Residual 0.0004733 0.02175
## Number of obs: 417, groups: colony, 51
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 4.241e-02 2.131e-02 4.020e+02 1.990 0.04730 *
## treatment2 -8.049e-03 3.485e-03 4.020e+02 -2.309 0.02144 *
## treatment3 -1.107e-02 3.820e-03 4.020e+02 -2.899 0.00395 **
## treatment4 -5.276e-03 3.767e-03 4.020e+02 -1.401 0.16208
## treatment5 -6.405e-03 3.593e-03 4.020e+02 -1.783 0.07541 .
## whole.mean -7.195e-04 8.878e-03 4.020e+02 -0.081 0.93545
## workers_alive -5.582e-04 1.490e-03 4.020e+02 -0.375 0.70817
## aliveTRUE 1.405e-02 1.128e-02 4.020e+02 1.245 0.21380
## emerge_time -1.499e-04 2.987e-04 4.020e+02 -0.502 0.61596
## round2 -1.046e-03 1.024e-02 4.020e+02 -0.102 0.91865
## qroB3 -1.756e-03 4.088e-03 4.020e+02 -0.430 0.66775
## qroB4 9.777e-03 4.021e-03 4.020e+02 2.432 0.01547 *
## qroB5 5.083e-04 3.497e-03 4.020e+02 0.145 0.88450
## qroK1 -2.272e-03 1.095e-02 4.020e+02 -0.207 0.83576
## qroK2/K1 5.735e-04 1.866e-02 4.020e+02 0.031 0.97550
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 1 column / coefficient
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
Anova(dry.g1)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: dry_weight
## Chisq Df Pr(>Chisq)
## treatment 9.8161 4 0.04364 *
## whole.mean 0.0066 1 0.93541
## workers_alive 0.1403 1 0.70798
## alive 1.5504 1 0.21308
## emerge_time 0.2520 1 0.61569
## round 0.0104 1 0.91860
## qro 7.0617 5 0.21609
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(resid(dry.g1)) +
abline(h=0, col="red", lwd=2)
## integer(0)
qqnorm(resid(dry.g1));qqline(resid(dry.g1)) #diagnostics look good
dry.emm <- emmeans(dry.g1, "treatment")
pairs(dry.emm)
## contrast estimate SE df t.ratio p.value
## treatment1 - treatment2 0.00805 0.00354 22.9 2.273 0.1898
## treatment1 - treatment3 0.01107 0.00389 24.9 2.851 0.0603
## treatment1 - treatment4 0.00528 0.00382 23.5 1.380 0.6457
## treatment1 - treatment5 0.00640 0.00365 23.7 1.757 0.4207
## treatment2 - treatment3 0.00303 0.00382 27.4 0.793 0.9305
## treatment2 - treatment4 -0.00277 0.00364 24.1 -0.761 0.9392
## treatment2 - treatment5 -0.00164 0.00343 26.1 -0.479 0.9887
## treatment3 - treatment4 -0.00580 0.00383 23.5 -1.513 0.5645
## treatment3 - treatment5 -0.00467 0.00385 26.9 -1.212 0.7443
## treatment4 - treatment5 0.00113 0.00353 19.6 0.320 0.9975
##
## Results are averaged over the levels of: alive, qro, round
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 5 estimates
plot(dry.emm, comparisons = TRUE)
dry.sum <- drf.pollen %>%
group_by(treatment) %>%
summarise(dry.m = mean(dry_weight),
dry.sd = sd(dry_weight),
dryn = length(dry_weight)) %>%
mutate(sedry = dry.sd/sqrt(dryn))
dry.sum
## # A tibble: 5 × 5
## treatment dry.m dry.sd dryn sedry
## <fct> <dbl> <dbl> <int> <dbl>
## 1 1 0.0489 0.0476 78 0.00539
## 2 2 0.0397 0.00824 86 0.000889
## 3 3 0.0369 0.00837 69 0.00101
## 4 4 0.0413 0.00873 92 0.000910
## 5 5 0.0417 0.00717 92 0.000748
ggplot(data = dry.sum, aes(x = treatment, y = dry.m, fill = treatment)) +
geom_col(col = "black")+
coord_cartesian(ylim=c(0.03, 0.05)) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
geom_errorbar(aes(ymin = dry.m - sedry,
ymax = dry.m + sedry),
position = position_dodge(0.9), width = 0.4) +
labs(y = "Mean Drone Dry Weight (g)",) +
ggtitle("Average Drone Dry Weight (g) per Treatment") +
scale_x_discrete(name = "Treatment",
labels = c("0 PPB", "150 PPB", "1,500 PPB", "15,000 PPB", "150,000 PPB")) +
theme_cowplot()+
theme_classic(base_size = 12)