Bees are frequently exposed to fungicides in agricultural landscapes, and while these chemicals are generally not considered to be harmful to insect pollinators, the sublethal effects of fungicides are not well understood. We investigated the non-target effects of exposure to field-realistic concentrations of the fungicide, Pristine®, for the common eastern bumble bee (Bombus impatiens). We fed queenless microcolonies of 5 bumble bees pollen patties contaminated with Pristine® ranging from 0 ppb-150,000 ppb, and evaluated the effects of chronic fungicide exposure on brood production, colony weight change, pollen consumption, and drone body quality. We found that while microcolonies fed pollen containing 1,500 ppb consumed the most pollen and produced the most brood, microcolonies exposed to 15,000 ppb gained weight at significantly lower rates. Fungicide exposure also negatively impacted brood development time and drone body quality. Our findings indicate that Pristine® has sublethal effects on bumble bee microcolony growth and brood development, highlighting the potential negative impacts of chronic fungicide exposure to bee health. In the face of the global decline of pollinator species, it is important for us to broaden our understanding of how bees react towards exposure of often overlooked stressors, such as fungicides, to account for potential consequences for pollinator health and pollination services.
### Figure out average pollen consumption by treatment
pollen <- read_csv("pollen1.csv", col_types = cols(round = col_factor(levels = c("1",
"2")), treatment = col_factor(levels = c("1",
"2", "3", "4", "5")), replicate = col_factor(levels = c("1",
"2", "3", "4", "5", "6", "7", "9", "11",
"12")), start_date = col_date(format = "%m/%d/%Y"),
start_time = col_character(), end_date = col_date(format = "%m/%d/%Y"),
end_time = col_character()))
pollen$colony <- as.factor(pollen$colony)
pollen$pid <- as.factor(pollen$pid)
pollen$count <- as.factor(pollen$count)
pollen <- subset(pollen, pollen$round != 1)
pollen <- na.omit(pollen)
range(pollen$difference)
## [1] -0.30646 1.56542
# get rid of negative numbers
pollen$difference[pollen$difference < 0] <- NA
pollen <- na.omit(pollen)
range(pollen$difference)
## [1] 0.002715 1.565420
# add queenright original colony column
qro <- read_csv("qro.csv")
qro$colony <- as.factor(qro$colony)
qro$qro <- as.factor(qro$qro)
pollen <- merge(pollen, qro, by.x = "colony")
pollen$pid <- as.factor(pollen$pid)
pollen$dose_consumed <- as.numeric(pollen$dose_consumed)
pollen$whole_dif <- as.numeric(pollen$difference)
wd.1 <- lm(difference ~ treatment, data = pollen)
summary(wd.1)
##
## Call:
## lm(formula = difference ~ treatment, data = pollen)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4853 -0.2416 -0.1504 0.1576 1.0638
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.429541 0.024170 17.772 <2e-16 ***
## treatment2 0.072099 0.034886 2.067 0.0390 *
## treatment3 0.078078 0.034406 2.269 0.0235 *
## treatment4 0.058490 0.034988 1.672 0.0949 .
## treatment5 0.004982 0.035040 0.142 0.8870
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3375 on 915 degrees of freedom
## Multiple R-squared: 0.009927, Adjusted R-squared: 0.005599
## F-statistic: 2.294 on 4 and 915 DF, p-value: 0.05773
wd.emm <- emmeans(wd.1, "treatment")
summary(wd.emm)
## treatment emmean SE df lower.CL upper.CL
## 1 0.430 0.0242 915 0.382 0.477
## 2 0.502 0.0252 915 0.452 0.551
## 3 0.508 0.0245 915 0.460 0.556
## 4 0.488 0.0253 915 0.438 0.538
## 5 0.435 0.0254 915 0.385 0.484
##
## Confidence level used: 0.95
wd.summary <- pollen %>%
group_by(colony) %>%
summarize(whole.mean = mean(difference),
mean.dose = mean(dose_consumed))
as.data.frame(wd.summary) # this is the data frame I will merge with subsequent data frames to contain average pollen consumption per colony as a new column
## colony whole.mean mean.dose
## 1 1.11R2 0.2829509 0.00000
## 2 1.12R2 0.1697964 0.00000
## 3 1.1R2 0.5213458 0.00000
## 4 1.2R2 0.3374200 0.00000
## 5 1.3R2 0.4512891 0.00000
## 6 1.4R2 0.6063016 0.00000
## 7 1.5R2 0.7079516 0.00000
## 8 1.7R2 0.7400381 0.00000
## 9 1.9R2 0.2240081 0.00000
## 10 2.11R2 0.4178270 35.99958
## 11 2.12R2 0.4035568 35.62453
## 12 2.1R2 0.6101589 55.13368
## 13 2.2R2 0.5109234 47.65174
## 14 2.3R2 0.3280036 24.29676
## 15 2.4R2 0.3881394 32.86886
## 16 2.5R2 0.7194915 73.77696
## 17 2.7R2 0.5299685 46.45851
## 18 2.9R2 0.5857152 52.49618
## 19 3.11R2 0.4216410 357.65779
## 20 3.12R2 0.3390993 281.21803
## 21 3.1R2 0.3711948 307.88606
## 22 3.2R2 0.3461010 310.28287
## 23 3.3R2 0.8465806 919.09823
## 24 3.4R2 0.4120433 406.20875
## 25 3.5R2 0.8906211 969.89690
## 26 3.7R2 0.6266680 720.47811
## 27 3.9R2 0.4409511 417.98849
## 28 4.11R2 0.3456011 3697.92253
## 29 4.12R2 0.2496295 2085.61694
## 30 4.1R2 0.7074755 8936.65823
## 31 4.2R2 0.3871742 4380.60484
## 32 4.3R2 0.5800074 6846.91060
## 33 4.4R2 0.8356247 10230.80238
## 34 4.5R2 0.2335967 1757.69297
## 35 4.7R2 0.6237400 7030.06345
## 36 4.9R2 0.5322950 5682.26011
## 37 5.11R2 0.4113960 43070.09972
## 38 5.12R2 0.2077741 15383.81515
## 39 5.1R2 0.3782286 37367.48316
## 40 5.2R2 0.4912468 52985.85612
## 41 5.3R2 0.2128778 16364.60071
## 42 5.4R2 0.4563045 48624.01684
## 43 5.5R2 0.7095479 82144.93157
## 44 5.7R2 0.6113189 67415.74103
## 45 5.9R2 0.4188290 42601.27230
mortality <- read_csv("surviving workers per colony.csv")
mortality$colony <- as.factor(mortality$colony)
brood <- read_csv("brood.csv", col_types = cols(round = col_factor(levels = c("1",
"2")), treatment = col_factor(levels = c("1",
"2", "3", "4", "5")), replicate = col_factor(levels = c("1",
"2", "3", "4", "5", "7", "9", "11", "12"))))
brood$colony <- as.factor(brood$colony)
brood <- subset(brood, brood$round != 1)
brood <- merge(wd.summary, brood, by.x = "colony")
brood <- merge(brood, mortality, by.x = "colony")
brood.sub <- subset(brood, select = c(colony, duration))
drones <- read_csv("drones_rf.csv", col_types = cols(round = col_factor(levels = c("1","2")), treatment = col_factor(levels = c("1","2", "3", "4", "5")), notes = col_skip(), colony_start = col_skip(), colony_count = col_skip(), alive = col_skip(), emerge_date = col_skip()))
#this data set is for drone emerge times and health metrics
drones$order_on_sheet <- as.factor(drones$order_on_sheet)
drones$replicate <- as.factor(drones$replicate)
drones$colony <- as.factor(drones$colony)
drones$id <- as.factor(drones$id)
drones$relative_fat <- as.double(drones$relative_fat)
drones$radial <- as.double(drones$radial)
drones$`alive?` <- as.double(drones$`alive?`)
drf.pollen <- merge(wd.summary, drones, by.x = "colony") # this is a new data frame with average pollen consumption data per colony
drf.pollen$alive <- as.logical(drf.pollen$`alive?`)
drone.sum <- drf.pollen %>%
group_by(colony) %>%
summarise(count.drone = length(id))
drone.sum
## # A tibble: 40 × 2
## colony count.drone
## <fct> <int>
## 1 1.11R2 4
## 2 1.1R2 6
## 3 1.2R2 12
## 4 1.3R2 11
## 5 1.4R2 16
## 6 1.5R2 21
## 7 1.7R2 12
## 8 2.11R2 9
## 9 2.12R2 5
## 10 2.1R2 11
## # … with 30 more rows
drone.sum <- as.data.frame(drone.sum)
qro <- read_csv("qro.csv")
qro$colony <- as.factor(qro$colony)
qro$qro <- as.factor(qro$qro)
drf.pollen <- merge(drf.pollen, qro, by.x = "colony")
drf.pollen<- merge(drf.pollen, brood.sub, by.x = "colony")
range(drf.pollen$dry_weight)
## [1] 0.0166 0.0826
drf.pollen.dry <- drf.pollen[-c(131, 160, 177), ] #remove outlier that was covered in honey and dead bees
range(drf.pollen$dry_weight)
## [1] 0.0166 0.0826
drf.pollen.dry <- subset(drf.pollen.dry, select = c(dry_weight, treatment, whole.mean, mean.dose, workers_alive, alive, duration, qro, colony))
drf.pollen.dry <- na.omit(drf.pollen.dry)
ggplot(drf.pollen.dry, aes(x=dry_weight, fill = treatment)) +
geom_histogram(position = "identity", binwidth = 0.0025,col=I("black")) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
ggtitle("Drone Dry Weight") +
labs(y = "Count", x = "Weight (g) ")
shapiro.test(drf.pollen.dry$dry_weight)
##
## Shapiro-Wilk normality test
##
## data: drf.pollen.dry$dry_weight
## W = 0.99416, p-value = 0.1154
dry.model <- lm(dry_weight~ treatment + whole.mean + mean.dose + workers_alive + duration + qro, data = drf.pollen)
ols_vif_tol(dry.model)
## Variables Tolerance VIF
## 1 treatment2 0.50260697 1.989626
## 2 treatment3 0.59719170 1.674504
## 3 treatment4 0.40461328 2.471496
## 4 treatment5 0.07461722 13.401732
## 5 whole.mean 0.50173020 1.993103
## 6 mean.dose 0.09232082 10.831793
## 7 workers_alive 0.49903846 2.003854
## 8 duration 0.69478708 1.439290
## 9 qroB3 0.78901120 1.267409
## 10 qroB4 0.55986224 1.786154
## 11 qroB5 0.50314078 1.987515
vif(dry.model)
## GVIF Df GVIF^(1/(2*Df))
## treatment 19.152334 4 1.446364
## whole.mean 1.993103 1 1.411773
## mean.dose 10.831793 1 3.291169
## workers_alive 2.003854 1 1.415575
## duration 1.439290 1 1.199704
## qro 3.084743 3 1.206526
dry.mod1 <- update(dry.model, .~. - mean.dose)
vif(dry.mod1)
## GVIF Df GVIF^(1/(2*Df))
## treatment 2.032114 4 1.092681
## whole.mean 1.691258 1 1.300484
## workers_alive 1.982161 1 1.407892
## duration 1.386150 1 1.177349
## qro 2.905325 3 1.194536
dry.mod2 <- update(dry.model, .~. - whole.mean)
vif(dry.mod2)
## GVIF Df GVIF^(1/(2*Df))
## treatment 13.450555 4 1.383861
## mean.dose 9.191372 1 3.031728
## workers_alive 1.985847 1 1.409201
## duration 1.288983 1 1.135334
## qro 2.494860 3 1.164594
dry.mod3 <- update(dry.model, .~. - treatment)
vif(dry.mod3)
## GVIF Df GVIF^(1/(2*Df))
## whole.mean 1.399743 1 1.183107
## mean.dose 1.149282 1 1.072046
## workers_alive 1.808411 1 1.344772
## duration 1.137360 1 1.066471
## qro 2.347073 3 1.152801
# This shows that treatment and mean.dose are highly correlated
pair.dry.df <- subset(drf.pollen, select = c(dry_weight, treatment, whole.mean, mean.dose, workers_alive, alive, duration, qro)) #make a data frame containing only the explanatory variables in the model
ggpairs(pair.dry.df)
dry.int <- lmer(dry_weight~ treatment*whole.mean + workers_alive + duration + qro + (1|colony), data = drf.pollen.dry)
dry.g1 <- lmer(dry_weight~ treatment + whole.mean + mean.dose + workers_alive + duration + qro + (1|colony), data = drf.pollen.dry)
dry.g2 <- lmer(dry_weight~ treatment + whole.mean + workers_alive + duration + qro + (1|colony), data = drf.pollen.dry)
#this is the model we are keeping
dry.g3 <- lmer(dry_weight~ mean.dose + whole.mean + workers_alive + duration + qro + (1|colony), data = drf.pollen.dry)
anova(dry.int, dry.g1, dry.g2) #AIC is lower for dry.g2 and Chisq value is not sig.
## Data: drf.pollen.dry
## Models:
## dry.g2: dry_weight ~ treatment + whole.mean + workers_alive + duration + qro + (1 | colony)
## dry.g1: dry_weight ~ treatment + whole.mean + mean.dose + workers_alive + duration + qro + (1 | colony)
## dry.int: dry_weight ~ treatment * whole.mean + workers_alive + duration + qro + (1 | colony)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## dry.g2 13 -2815.0 -2762.7 1420.5 -2841.0
## dry.g1 14 -2813.1 -2756.7 1420.5 -2841.1 0.1073 1 0.7433
## dry.int 17 -2809.0 -2740.6 1421.5 -2843.0 1.9746 3 0.5777
anova(dry.g1, dry.g3) #mean.dose is not useful
## Data: drf.pollen.dry
## Models:
## dry.g3: dry_weight ~ mean.dose + whole.mean + workers_alive + duration + qro + (1 | colony)
## dry.g1: dry_weight ~ treatment + whole.mean + mean.dose + workers_alive + duration + qro + (1 | colony)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## dry.g3 10 -2797.0 -2756.7 1408.5 -2817.0
## dry.g1 14 -2813.1 -2756.7 1420.5 -2841.1 24.091 4 7.657e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(dry.g2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: dry_weight ~ treatment + whole.mean + workers_alive + duration +
## qro + (1 | colony)
## Data: drf.pollen.dry
##
## REML criterion at convergence: -2704.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.98339 -0.63521 -0.04443 0.67352 3.10627
##
## Random effects:
## Groups Name Variance Std.Dev.
## colony (Intercept) 2.555e-06 0.001598
## Residual 6.036e-05 0.007769
## Number of obs: 413, groups: colony, 39
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 4.815e-02 4.270e-03 11.275
## treatment2 -2.710e-03 1.683e-03 -1.610
## treatment3 -7.079e-03 1.672e-03 -4.233
## treatment4 -6.735e-04 1.610e-03 -0.418
## treatment5 -1.115e-04 1.673e-03 -0.067
## whole.mean 3.993e-03 3.817e-03 1.046
## workers_alive -8.929e-04 6.538e-04 -1.366
## duration -1.095e-04 7.134e-05 -1.535
## qroB3 4.339e-04 1.723e-03 0.252
## qroB4 3.319e-03 1.782e-03 1.862
## qroB5 1.601e-03 1.467e-03 1.091
##
## Correlation of Fixed Effects:
## (Intr) trtmn2 trtmn3 trtmn4 trtmn5 whl.mn wrkrs_ duratn qroB3
## treatment2 0.058
## treatment3 0.094 0.487
## treatment4 0.167 0.520 0.534
## treatment5 0.121 0.565 0.499 0.548
## whole.mean -0.217 0.131 -0.083 -0.147 0.177
## workers_alv -0.709 -0.103 -0.183 -0.278 -0.316 -0.106
## duration -0.555 -0.390 -0.168 -0.169 -0.291 -0.278 0.129
## qroB3 0.103 0.197 0.062 0.092 0.251 0.049 -0.100 -0.252
## qroB4 -0.016 0.065 0.048 0.219 -0.025 -0.487 0.220 -0.011 0.166
## qroB5 -0.434 0.117 -0.042 -0.097 -0.046 0.035 0.583 -0.131 0.200
## qroB4
## treatment2
## treatment3
## treatment4
## treatment5
## whole.mean
## workers_alv
## duration
## qroB3
## qroB4
## qroB5 0.308
plot(resid(dry.g2)) +
abline(h=0, col="red", lwd=2)
## integer(0)
qqnorm(resid(dry.g2));qqline(resid(dry.g2))
Anova(dry.g2)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: dry_weight
## Chisq Df Pr(>Chisq)
## treatment 26.1659 4 2.93e-05 ***
## whole.mean 1.0944 1 0.2955
## workers_alive 1.8656 1 0.1720
## duration 2.3552 1 0.1249
## qro 3.7859 3 0.2855
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dry.emm <- emmeans(dry.g2, "treatment")
pairs(dry.emm)
## contrast estimate SE df t.ratio p.value
## treatment1 - treatment2 0.002710 0.00170 24.6 1.597 0.5131
## treatment1 - treatment3 0.007079 0.00169 22.8 4.186 0.0030
## treatment1 - treatment4 0.000674 0.00162 21.9 0.416 0.9933
## treatment1 - treatment5 0.000111 0.00168 22.1 0.066 1.0000
## treatment2 - treatment3 0.004369 0.00171 28.6 2.548 0.1080
## treatment2 - treatment4 -0.002036 0.00163 23.9 -1.250 0.7227
## treatment2 - treatment5 -0.002598 0.00157 23.4 -1.650 0.4820
## treatment3 - treatment4 -0.006405 0.00160 20.9 -4.000 0.0053
## treatment3 - treatment5 -0.006967 0.00169 24.5 -4.124 0.0031
## treatment4 - treatment5 -0.000562 0.00157 20.0 -0.357 0.9962
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 5 estimates
plot(dry.emm, comparisons = TRUE)
dry.sum <- drf.pollen %>%
group_by(treatment) %>%
summarise(dry.m = mean(dry_weight),
dry.sd = sd(dry_weight),
dryn = length(dry_weight)) %>%
mutate(sedry = dry.sd/sqrt(dryn))
dry.sum
## # A tibble: 5 × 5
## treatment dry.m dry.sd dryn sedry
## <fct> <dbl> <dbl> <int> <dbl>
## 1 1 0.0444 0.00816 82 0.000901
## 2 2 0.0395 0.00834 76 0.000957
## 3 3 0.0370 0.0104 66 0.00129
## 4 4 0.0416 0.00831 107 0.000804
## 5 5 0.0416 0.00731 90 0.000771
dryanova_summary <- summary(dry.g2)
drytuk.means <- emmeans(object = dry.g2,
specs = "treatment",
adjust = "Tukey",
type = "response")
drytuk.means
## treatment emmean SE df lower.CL upper.CL
## 1 0.0432 0.00115 21.1 0.0400 0.0465
## 2 0.0405 0.00123 26.4 0.0371 0.0439
## 3 0.0361 0.00124 26.0 0.0327 0.0396
## 4 0.0425 0.00113 18.1 0.0393 0.0458
## 5 0.0431 0.00120 20.0 0.0397 0.0465
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 5 estimates
dry.cld.model <- cld(object = drytuk.means,
adjust = "Tukey",
Letters = letters,
alpha = 0.05)
dry.cld.model
## treatment emmean SE df lower.CL upper.CL .group
## 3 0.0361 0.00124 26.0 0.0327 0.0396 a
## 2 0.0405 0.00123 26.4 0.0371 0.0439 ab
## 4 0.0425 0.00113 18.1 0.0393 0.0458 b
## 5 0.0431 0.00120 20.0 0.0397 0.0465 b
## 1 0.0432 0.00115 21.1 0.0400 0.0465 b
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 5 estimates
## P value adjustment: tukey method for comparing a family of 5 estimates
## significance level used: alpha = 0.05
## NOTE: If two or more means share the same grouping symbol,
## then we cannot show them to be different.
## But we also did not show them to be the same.
drytuk.treatment <- as.data.frame(dry.cld.model)
drytuk.treatment
## treatment emmean SE df lower.CL upper.CL .group
## 3 3 0.03613653 0.001235386 26.02933 0.03271473 0.03955833 a
## 2 2 0.04050539 0.001233812 26.42931 0.03709196 0.04391881 ab
## 4 4 0.04254161 0.001131634 18.08483 0.03929674 0.04578647 b
## 5 5 0.04310369 0.001198875 20.01363 0.03970375 0.04650364 b
## 1 1 0.04321515 0.001153569 21.07314 0.03996056 0.04646974 b
dry_max <- drf.pollen %>%
group_by(treatment) %>%
summarize(maxdry = max(mean(dry_weight)))
dry_for_plotting <- full_join(drytuk.treatment, dry_max,
by="treatment")
dp <- ggplot(data = dry.sum, aes(x = treatment, y = dry.m, fill = treatment)) +
geom_col(col = "black")+
coord_cartesian(ylim=c(0.03, 0.046)) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
geom_errorbar(aes(ymin = dry.m - sedry,
ymax = dry.m + sedry),
position = position_dodge(0.9), width = 0.4) +
labs(y = "Mean Drone Dry Weight (g)",) +
ggtitle("Average Drone Dry Weight (g) per Treatment") +
scale_x_discrete(name = "Treatment",
labels = c("0 PPB", "150 PPB", "1,500 PPB", "15,000 PPB", "150,000 PPB")) +
theme_cowplot()+
theme_classic(base_size = 30)
dp <- dp +
annotate(geom = "text",
x = 2.5, y = 0.045,
label = "P < 0.001",
size = 8) +
annotate(geom = "text",
x = c(3, 2, 4, 5, 1),
y = dry_for_plotting$maxdry + 0.002,
label = c("a", "ab", "b", "b", "b"),
size = 8) +
theme(legend.position = "none")
dp
drf.pollen.rf <- subset(drf.pollen, select = c(relative_fat, treatment, whole.mean, workers_alive, alive, duration, qro, colony))
drf.pollen.rf <- na.omit(drf.pollen.rf)
shapiro.test(drf.pollen.rf$relative_fat)
##
## Shapiro-Wilk normality test
##
## data: drf.pollen.rf$relative_fat
## W = 0.8011, p-value < 2.2e-16
range(drf.pollen.rf$relative_fat)
## [1] 0.0002 0.0072
ggplot(drf.pollen.rf, aes(x=relative_fat, fill = treatment)) +
geom_histogram(position = "identity", binwidth = 0.0002,col=I("black")) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
ggtitle("Drone Relative Fat") +
labs(y = "Count", x = "Relative Fat(g) ")
drf.pollen.rf$logrf <- log(drf.pollen.rf$relative_fat)
shapiro.test(drf.pollen.rf$logrf)
##
## Shapiro-Wilk normality test
##
## data: drf.pollen.rf$logrf
## W = 0.93266, p-value = 4.097e-12
range(drf.pollen.rf$logrf)
## [1] -8.517193 -4.933674
ggplot(drf.pollen.rf, aes(x=logrf, fill = treatment)) +
geom_histogram(position = "identity", binwidth = 0.1,col=I("black")) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
ggtitle("Drone Relative Fat (log transformed)") +
labs(y = "Count", x = "log(Relative Fat(g)) ")
rf.model <- lm(relative_fat~ treatment + whole.mean + workers_alive + duration + qro, data = drf.pollen.rf)
vif(rf.model)
## GVIF Df GVIF^(1/(2*Df))
## treatment 1.964734 4 1.088085
## whole.mean 1.769399 1 1.330188
## workers_alive 2.024742 1 1.422934
## duration 1.394242 1 1.180780
## qro 3.217236 3 1.215012
rf.int <- lmer(relative_fat~ treatment*whole.mean + workers_alive + duration + qro + (1|colony), data = drf.pollen.rf)
#this is the model we will keep
rf.g1 <- lmer(relative_fat~ treatment + whole.mean + workers_alive + duration + qro + (1|colony), data = drf.pollen.rf)
rf.int.2 <- lmer(logrf~ treatment*whole.mean + workers_alive + duration + qro + (1|colony), data = drf.pollen.rf)
rf.g2 <- lmer(logrf~ treatment + whole.mean + workers_alive + duration + qro + (1|colony), data = drf.pollen.rf)
anova(rf.int, rf.int.2, rf.g1, rf.g2)
## Data: drf.pollen.rf
## Models:
## rf.g1: relative_fat ~ treatment + whole.mean + workers_alive + duration + qro + (1 | colony)
## rf.g2: logrf ~ treatment + whole.mean + workers_alive + duration + qro + (1 | colony)
## rf.int: relative_fat ~ treatment * whole.mean + workers_alive + duration + qro + (1 | colony)
## rf.int.2: logrf ~ treatment * whole.mean + workers_alive + duration + qro + (1 | colony)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## rf.g1 13 -4326.8 -4275.5 2176.42 -4352.8
## rf.g2 13 452.6 503.9 -213.30 426.6 0.0 0
## rf.int 17 -4327.9 -4260.8 2180.94 -4361.9 4788.5 4 < 2.2e-16 ***
## rf.int.2 17 456.8 523.9 -211.42 422.8 0.0 0
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(rf.g1, rf.g2)
## Data: drf.pollen.rf
## Models:
## rf.g1: relative_fat ~ treatment + whole.mean + workers_alive + duration + qro + (1 | colony)
## rf.g2: logrf ~ treatment + whole.mean + workers_alive + duration + qro + (1 | colony)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## rf.g1 13 -4326.8 -4275.5 2176.4 -4352.8
## rf.g2 13 452.6 503.9 -213.3 426.6 0 0
anova(rf.int, rf.g1)
## Data: drf.pollen.rf
## Models:
## rf.g1: relative_fat ~ treatment + whole.mean + workers_alive + duration + qro + (1 | colony)
## rf.int: relative_fat ~ treatment * whole.mean + workers_alive + duration + qro + (1 | colony)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## rf.g1 13 -4326.8 -4275.5 2176.4 -4352.8
## rf.int 17 -4327.9 -4260.8 2180.9 -4361.9 9.0484 4 0.0599 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(rf.int)
## Linear mixed model fit by REML ['lmerMod']
## Formula: relative_fat ~ treatment * whole.mean + workers_alive + duration +
## qro + (1 | colony)
## Data: drf.pollen.rf
##
## REML criterion at convergence: -4126.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.2006 -0.5388 -0.1123 0.3446 6.5413
##
## Random effects:
## Groups Name Variance Std.Dev.
## colony (Intercept) 7.213e-09 8.493e-05
## Residual 6.657e-07 8.159e-04
## Number of obs: 382, groups: colony, 40
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 2.057e-03 6.226e-04 3.304
## treatment2 6.837e-04 6.432e-04 1.063
## treatment3 -1.542e-04 5.419e-04 -0.285
## treatment4 -1.152e-04 5.984e-04 -0.192
## treatment5 -5.468e-04 5.976e-04 -0.915
## whole.mean 7.931e-04 7.551e-04 1.050
## workers_alive -3.239e-05 6.920e-05 -0.468
## duration -1.225e-05 7.054e-06 -1.736
## qroB3 -1.956e-04 1.725e-04 -1.134
## qroB4 3.445e-04 1.760e-04 1.957
## qroB5 5.173e-05 1.585e-04 0.326
## treatment2:whole.mean -1.554e-03 1.132e-03 -1.373
## treatment3:whole.mean -6.187e-04 8.862e-04 -0.698
## treatment4:whole.mean -9.868e-05 1.040e-03 -0.095
## treatment5:whole.mean 1.569e-03 1.082e-03 1.449
plot(resid(rf.int)) +
abline(h=0, col="red", lwd=2)
## integer(0)
qqnorm(resid(rf.int));qqline(resid(rf.int)) #doesn't look great
plot(rf.int)
plot(drf.pollen.rf$treatment, drf.pollen.rf$relative_fat)
plot(drf.pollen.rf$relative_fat)
rf_sub <- subset(drf.pollen.rf, relative_fat<0.004) #Difference of 13 bees
ggplot(rf_sub, aes(x=relative_fat, fill = treatment)) +
geom_histogram(position = "identity", binwidth = 0.0002,col=I("black")) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
ggtitle("Drone Relative Fat") +
labs(y = "Count", x = "Relative Fat(g) ")
shapiro.test(rf_sub$relative_fat)
##
## Shapiro-Wilk normality test
##
## data: rf_sub$relative_fat
## W = 0.97686, p-value = 1.232e-05
rf.int.sub <- lmer(relative_fat~ treatment*whole.mean + workers_alive + duration + qro + (1|colony), data = rf_sub)
rf.g1.sub <- lmer(relative_fat~ treatment + whole.mean + workers_alive + duration + qro + (1|colony), data = rf_sub)
anova(rf.int.sub, rf.g1.sub) #now the model without the interaction is better.
## Data: rf_sub
## Models:
## rf.g1.sub: relative_fat ~ treatment + whole.mean + workers_alive + duration + qro + (1 | colony)
## rf.int.sub: relative_fat ~ treatment * whole.mean + workers_alive + duration + qro + (1 | colony)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## rf.g1.sub 13 -4462.6 -4411.7 2244.3 -4488.6
## rf.int.sub 17 -4456.0 -4389.6 2245.0 -4490.0 1.4676 4 0.8324
plot(resid(rf.int.sub)) +
abline(h=0, col="red", lwd=2)
## integer(0)
qqnorm(resid(rf.int.sub));qqline(resid(rf.int.sub)) #look's much better
plot(resid(rf.g1.sub)) +
abline(h=0, col="red", lwd=2)
## integer(0)
qqnorm(resid(rf.g1.sub));qqline(resid(rf.g1.sub))
Anova(rf.g1.sub)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: relative_fat
## Chisq Df Pr(>Chisq)
## treatment 13.7929 4 0.007986 **
## whole.mean 3.1754 1 0.074754 .
## workers_alive 0.0000 1 0.997282
## duration 5.9490 1 0.014726 *
## qro 1.9773 3 0.577132
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
drop1(rf.g1.sub, test = "Chisq") #get rid of duration
## Single term deletions
##
## Model:
## relative_fat ~ treatment + whole.mean + workers_alive + duration +
## qro + (1 | colony)
## npar AIC LRT Pr(Chi)
## <none> -4462.6
## treatment 4 -4454.3 16.2968 0.002646 **
## whole.mean 1 -4460.8 3.7924 0.051487 .
## workers_alive 1 -4464.6 0.0171 0.895963
## duration 1 -4456.7 7.8442 0.005098 **
## qro 3 -4465.7 2.8542 0.414653
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rf.g1.sub.1 <- lmer(relative_fat~ treatment + whole.mean + workers_alive + qro + (1|colony), data = rf_sub)
qqnorm(resid(rf.g1.sub.1));qqline(resid(rf.g1.sub.1))
anova(rf.g1.sub, rf.g1.sub.1) #keep duration
## Data: rf_sub
## Models:
## rf.g1.sub.1: relative_fat ~ treatment + whole.mean + workers_alive + qro + (1 | colony)
## rf.g1.sub: relative_fat ~ treatment + whole.mean + workers_alive + duration + qro + (1 | colony)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## rf.g1.sub.1 12 -4456.7 -4409.8 2240.4 -4480.7
## rf.g1.sub 13 -4462.6 -4411.7 2244.3 -4488.6 7.8442 1 0.005098 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
I am going to keep the subset data without relative fat values greater than 0.004, the model without the interaction effect, with all originally included variables.
Anova(rf.g1.sub)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: relative_fat
## Chisq Df Pr(>Chisq)
## treatment 13.7929 4 0.007986 **
## whole.mean 3.1754 1 0.074754 .
## workers_alive 0.0000 1 0.997282
## duration 5.9490 1 0.014726 *
## qro 1.9773 3 0.577132
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rf.emm <- emmeans(rf.g1.sub, "treatment")
pairs(rf.emm)
## contrast estimate SE df t.ratio p.value
## treatment1 - treatment2 -8.10e-06 0.000138 24.9 -0.059 1.0000
## treatment1 - treatment3 3.51e-04 0.000139 25.2 2.527 0.1162
## treatment1 - treatment4 1.37e-04 0.000136 23.2 1.010 0.8482
## treatment1 - treatment5 -1.21e-04 0.000140 23.6 -0.863 0.9076
## treatment2 - treatment3 3.59e-04 0.000137 28.5 2.613 0.0945
## treatment2 - treatment4 1.46e-04 0.000134 23.5 1.084 0.8131
## treatment2 - treatment5 -1.13e-04 0.000128 23.2 -0.879 0.9018
## treatment3 - treatment4 -2.14e-04 0.000133 23.4 -1.600 0.5118
## treatment3 - treatment5 -4.72e-04 0.000137 26.5 -3.435 0.0154
## treatment4 - treatment5 -2.58e-04 0.000133 21.5 -1.948 0.3237
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 5 estimates
plot(rf.emm, comparisons = TRUE)
rf.sum <- rf_sub %>%
group_by(treatment) %>%
summarise(mrf = mean(relative_fat),
sdrf = sd(relative_fat),
nrf = length(relative_fat)) %>%
mutate(serf = sdrf/sqrt(nrf))
rf.sum
## # A tibble: 5 × 5
## treatment mrf sdrf nrf serf
## <fct> <dbl> <dbl> <int> <dbl>
## 1 1 0.00181 0.000663 70 0.0000792
## 2 2 0.00168 0.000559 75 0.0000645
## 3 3 0.00143 0.000582 59 0.0000758
## 4 4 0.00161 0.000565 88 0.0000603
## 5 5 0.00184 0.000490 77 0.0000558
rfanova_summary <- summary(rf.g1.sub)
rftuk.means <- emmeans(object = rf.g1.sub,
specs = "treatment",
adjust = "Tukey",
type = "response")
rftuk.means
## treatment emmean SE df lower.CL upper.CL
## 1 0.00173 9.65e-05 22.8 0.00146 0.00200
## 2 0.00174 9.78e-05 25.0 0.00146 0.00201
## 3 0.00138 1.00e-04 27.9 0.00110 0.00165
## 4 0.00159 9.82e-05 20.8 0.00131 0.00187
## 5 0.00185 9.96e-05 22.1 0.00157 0.00213
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 5 estimates
rf.cld.model <- cld(object = rftuk.means,
adjust = "Tukey",
Letters = letters,
alpha = 0.05)
rf.cld.model
## treatment emmean SE df lower.CL upper.CL .group
## 3 0.00138 1.00e-04 27.9 0.00110 0.00165 a
## 4 0.00159 9.82e-05 20.8 0.00131 0.00187 ab
## 1 0.00173 9.65e-05 22.8 0.00146 0.00200 ab
## 2 0.00174 9.78e-05 25.0 0.00146 0.00201 ab
## 5 0.00185 9.96e-05 22.1 0.00157 0.00213 b
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 5 estimates
## P value adjustment: tukey method for comparing a family of 5 estimates
## significance level used: alpha = 0.05
## NOTE: If two or more means share the same grouping symbol,
## then we cannot show them to be different.
## But we also did not show them to be the same.
rftuk.treatment <- as.data.frame(rf.cld.model)
rftuk.treatment
## treatment emmean SE df lower.CL upper.CL .group
## 3 3 0.001376931 1.004608e-04 27.95474 0.001100155 0.001653706 a
## 4 4 0.001590453 9.819062e-05 20.83805 0.001313121 0.001867785 ab
## 1 1 0.001727944 9.650528e-05 22.77358 0.001457640 0.001998248 ab
## 2 2 0.001736044 9.784528e-05 24.96593 0.001464130 0.002007958 ab
## 5 5 0.001848567 9.961980e-05 22.15407 0.001568838 0.002128296 b
rf_max <- rf_sub %>%
group_by(treatment) %>%
summarize(maxrf = max(mean(relative_fat)))
rf_for_plotting <- full_join(rftuk.treatment, rf_max,
by="treatment")
rf <- ggplot(data = rf.sum, aes(x = treatment, y = mrf, fill = treatment)) +
geom_col(col = "black")+
coord_cartesian(ylim=c(0.0012, 0.002)) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
geom_errorbar(aes(ymin = mrf - serf,
ymax = mrf + serf),
position = position_dodge(0.9), width = 0.4) +
labs(y = "Mean Drone Relative Fat (g)",) +
ggtitle("Average Drone Relative Fat (g) per Treatment") +
scale_x_discrete(name = "Treatment",
labels = c("0 PPB", "150 PPB", "1,500 PPB", "15,000 PPB", "150,000 PPB")) +
theme_cowplot()+
theme_classic(base_size = 30)
rf <- rf +
annotate(geom = "text",
x = 1, y = 0.00199,
label = "P < 0.01",
size = 8) +
annotate(geom = "text",
x = c(3, 4, 1, 2, 5),
y = rf_for_plotting$maxrf + 0.0001,
label = c("a", "ab", "ab", "ab", "b"),
size = 8) +
theme(legend.position = "none")
rf
rc.drones <- subset(drf.pollen, select = c(radial, treatment, whole.mean, workers_alive, alive, duration, qro, colony))
rc.drones <- na.omit(rc.drones)
ggplot(rc.drones, aes(x=radial, fill = treatment)) +
geom_histogram(position = "identity", binwidth = 0.05,col=I("black")) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
ggtitle("Drone Radial Cell Length") +
labs(y = "Count", x = "Length (mm) ")
shapiro.test(rc.drones$radial)
##
## Shapiro-Wilk normality test
##
## data: rc.drones$radial
## W = 0.98453, p-value = 0.000244
range(rc.drones$radial)
## [1] 1.6924 3.1542
rc.drones$radsquare <- (rc.drones$radial)^2
shapiro.test(rc.drones$radsquare)
##
## Shapiro-Wilk normality test
##
## data: rc.drones$radsquare
## W = 0.99253, p-value = 0.0397
ggplot(rc.drones, aes(x=radsquare, fill = treatment)) +
geom_histogram(position = "identity", binwidth = 0.5,col=I("black")) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
ggtitle("Drone Radial Cell Length ^2 ") +
labs(y = "Count", x = "Length ^2 (mm) ")
rad.model <- lm(radial~ treatment + whole.mean + workers_alive + duration + qro, data = rc.drones)
vif(rad.model)
## GVIF Df GVIF^(1/(2*Df))
## treatment 2.020243 4 1.091881
## whole.mean 1.737422 1 1.318113
## workers_alive 1.966186 1 1.402208
## duration 1.418566 1 1.191036
## qro 2.964081 3 1.198528
#No colinearity
rad.int <- lmer(radsquare~ treatment*whole.mean + workers_alive + duration + qro + (1|colony), data = rc.drones)
rad.g1 <- lmer(radsquare~ treatment + whole.mean + workers_alive + duration + qro + (1|colony), data = rc.drones) #this is the model we are keeping
rad.g2 <- lmer(radsquare~ treatment + whole.mean + workers_alive + duration + qro + (1|colony), data = rc.drones)
rad.g3 <- lmer(radsquare~ whole.mean + workers_alive + duration + qro + (1|colony), data = rc.drones)
anova(rad.int, rad.g1, rad.g2, rad.g3)
## Data: rc.drones
## Models:
## rad.g3: radsquare ~ whole.mean + workers_alive + duration + qro + (1 | colony)
## rad.g1: radsquare ~ treatment + whole.mean + workers_alive + duration + qro + (1 | colony)
## rad.g2: radsquare ~ treatment + whole.mean + workers_alive + duration + qro + (1 | colony)
## rad.int: radsquare ~ treatment * whole.mean + workers_alive + duration + qro + (1 | colony)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## rad.g3 9 1139.3 1175.3 -560.64 1121.3
## rad.g1 13 1137.5 1189.7 -555.77 1111.5 9.7236 4 0.04535 *
## rad.g2 13 1137.5 1189.7 -555.77 1111.5 0.0000 0
## rad.int 17 1143.4 1211.5 -554.70 1109.4 2.1546 4 0.70734
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
qqnorm(resid(rad.g1));qqline(resid(rad.g1))
plot(resid(rad.g1)) +
abline(h=0, col="red", lwd=2)
## integer(0)
#yay! :)
summary(rad.g1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: radsquare ~ treatment + whole.mean + workers_alive + duration +
## qro + (1 | colony)
## Data: rc.drones
##
## REML criterion at convergence: 1138
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3146 -0.5967 0.0096 0.6484 4.1420
##
## Random effects:
## Groups Name Variance Std.Dev.
## colony (Intercept) 0.09018 0.3003
## Residual 0.87185 0.9337
## Number of obs: 407, groups: colony, 40
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 6.64868 0.63542 10.463
## treatment2 -0.02433 0.24342 -0.100
## treatment3 -0.49655 0.24302 -2.043
## treatment4 -0.01964 0.23581 -0.083
## treatment5 0.02175 0.24500 0.089
## whole.mean 0.23458 0.55270 0.424
## workers_alive -0.02921 0.09767 -0.299
## duration -0.01463 0.01061 -1.379
## qroB3 0.01197 0.24599 0.049
## qroB4 0.17116 0.26564 0.644
## qroB5 0.37547 0.21685 1.731
##
## Correlation of Fixed Effects:
## (Intr) trtmn2 trtmn3 trtmn4 trtmn5 whl.mn wrkrs_ duratn qroB3
## treatment2 0.042
## treatment3 0.092 0.512
## treatment4 0.125 0.525 0.535
## treatment5 0.113 0.573 0.517 0.543
## whole.mean -0.201 0.143 -0.048 -0.128 0.179
## workers_alv -0.719 -0.095 -0.180 -0.240 -0.299 -0.088
## duration -0.547 -0.376 -0.191 -0.153 -0.285 -0.297 0.110
## qroB3 0.113 0.178 0.062 0.097 0.244 0.033 -0.101 -0.246
## qroB4 -0.039 0.041 0.025 0.197 -0.044 -0.489 0.230 0.014 0.166
## qroB5 -0.452 0.117 -0.054 -0.081 -0.054 0.052 0.590 -0.122 0.188
## qroB4
## treatment2
## treatment3
## treatment4
## treatment5
## whole.mean
## workers_alv
## duration
## qroB3
## qroB4
## qroB5 0.296
Anova(rad.g1)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: radsquare
## Chisq Df Pr(>Chisq)
## treatment 6.7241 4 0.1512
## whole.mean 0.1801 1 0.6712
## workers_alive 0.0894 1 0.7649
## duration 1.9025 1 0.1678
## qro 3.1070 3 0.3754
rad.emm <- emmeans(rad.g1, "treatment")
pairs(rad.emm)
## contrast estimate SE df t.ratio p.value
## treatment1 - treatment2 0.02433 0.245 25.5 0.099 1.0000
## treatment1 - treatment3 0.49655 0.245 25.8 2.027 0.2817
## treatment1 - treatment4 0.01964 0.237 23.2 0.083 1.0000
## treatment1 - treatment5 -0.02175 0.246 23.4 -0.088 1.0000
## treatment2 - treatment3 0.47222 0.242 29.5 1.951 0.3142
## treatment2 - treatment4 -0.00469 0.235 25.1 -0.020 1.0000
## treatment2 - treatment5 -0.04608 0.227 23.7 -0.203 0.9996
## treatment3 - treatment4 -0.47691 0.233 23.9 -2.048 0.2747
## treatment3 - treatment5 -0.51830 0.241 26.8 -2.146 0.2308
## treatment4 - treatment5 -0.04139 0.231 22.0 -0.179 0.9998
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 5 estimates
rad.sum <- rc.drones %>%
group_by(treatment) %>%
summarise(rad.m = mean(radial),
rad.sd = sd(radial),
rad.n = length(radial)) %>%
mutate(serad = rad.sd/sqrt(rad.n))
rad.sum
## # A tibble: 5 × 5
## treatment rad.m rad.sd rad.n serad
## <fct> <dbl> <dbl> <int> <dbl>
## 1 1 2.51 0.161 79 0.0182
## 2 2 2.45 0.183 76 0.0209
## 3 3 2.36 0.243 62 0.0309
## 4 4 2.47 0.231 104 0.0227
## 5 5 2.48 0.165 86 0.0178
radanova_summary <- summary(rad.g1)
radtuk.means <- emmeans(object = rad.g1,
specs = "treatment",
adjust = "Tukey",
type = "response")
radtuk.means
## treatment emmean SE df lower.CL upper.CL
## 1 6.17 0.170 22.6 5.69 6.65
## 2 6.15 0.174 26.1 5.66 6.63
## 3 5.67 0.175 28.4 5.19 6.16
## 4 6.15 0.168 20.4 5.68 6.63
## 5 6.19 0.175 21.3 5.70 6.68
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 5 estimates
rad.cld.model <- cld(object = radtuk.means,
adjust = "Tukey",
Letters = letters,
alpha = 0.05)
rad.cld.model
## treatment emmean SE df lower.CL upper.CL .group
## 3 5.67 0.175 28.4 5.19 6.16 a
## 2 6.15 0.174 26.1 5.66 6.63 a
## 4 6.15 0.168 20.4 5.68 6.63 a
## 1 6.17 0.170 22.6 5.69 6.65 a
## 5 6.19 0.175 21.3 5.70 6.68 a
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 5 estimates
## P value adjustment: tukey method for comparing a family of 5 estimates
## significance level used: alpha = 0.05
## NOTE: If two or more means share the same grouping symbol,
## then we cannot show them to be different.
## But we also did not show them to be the same.
radtuk.treatment <- as.data.frame(rad.cld.model)
rad_max <- rc.drones %>%
group_by(treatment) %>%
summarize(maxrad = max(mean(radial)))
rad_for_plotting <- full_join(radtuk.treatment, rad_max,
by="treatment")
radp <- ggplot(data = rad.sum, aes(x = treatment, y = rad.m, fill = treatment)) +
geom_col(col = "black")+
coord_cartesian(ylim=c(2,2.55)) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
geom_errorbar(aes(ymin = rad.m - serad,
ymax = rad.m + serad),
position = position_dodge(0.9), width = 0.4) +
labs(y = "Mean Drone Radial Cell Length",) +
ggtitle("Average Drone Radial Cell Length (mm) per Treatment") +
scale_x_discrete(name = "Treatment",
labels = c("0 PPB", "150 PPB", "1,500 PPB", "15,000 PPB", "150,000 PPB")) +
theme_cowplot()+
theme_classic(base_size = 30) +
theme(legend.position = "none")
radp <- radp +
annotate(geom = "text",
x = 2, y = 2.54,
label = "P > 0.1",
size = 12) +
annotate(geom = "text",
x = c(3, 2, 4, 1, 5),
y = rad_for_plotting$maxrad + 0.05,
label = c("a", "a", "a", "a", "a"),
size = 12) +
theme(legend.position = "none")
radp
trunc.csv <- read_csv("duration.fortrunc.csv",
col_types = cols(round = col_factor(levels = c("1",
"2")), treatment = col_factor(levels = c("1",
"2", "3", "4", "5")), replicate = col_factor(levels = c("1",
"2", "3", "4", "5", "7", "9", "11",
"12")), start_date = col_date(format = "%m/%d/%Y"),
emerge_date = col_date(format = "%m/%d/%Y"),
end_date = col_date(format = "%m/%d/%Y"))) # this data set is for drone counts
trunc <- merge(wd.summary, trunc.csv, by.x = "colony")
trunc <- merge(mortality, trunc, by.x = "colony")
trunc$qro <- as.factor(trunc$origin)
count.drones <- subset(trunc, select = c(count, treatment, whole.mean, alive, duration, qro, colony))
count.drones <- na.omit(count.drones)
ggplot(count.drones, aes(x=count, fill = treatment)) +
geom_histogram(position = "identity", binwidth = 1 ,col=I("black")) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
ggtitle("Drone Count") +
labs(y = "Count", x = "Number of Drones")
count.model <- lm(count~ treatment + whole.mean + alive + duration + qro, data = count.drones)
vif(count.model)
## GVIF Df GVIF^(1/(2*Df))
## treatment 1.443153 4 1.046921
## whole.mean 2.049137 1 1.431481
## alive 2.511104 1 1.584646
## duration 2.084378 1 1.443738
## qro 1.897492 3 1.112662
count.pois <- glm(count ~ treatment + whole.mean +alive +duration + qro, data = count.drones, family = "poisson")
summary(count.pois) #overdispersed
##
## Call:
## glm(formula = count ~ treatment + whole.mean + alive + duration +
## qro, family = "poisson", data = count.drones)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.8240 -1.5124 -0.6275 0.9664 2.7418
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.15115 0.64327 -1.790 0.073528 .
## treatment2 -0.06390 0.15584 -0.410 0.681776
## treatment3 -0.45881 0.16131 -2.844 0.004452 **
## treatment4 -0.02662 0.16024 -0.166 0.868060
## treatment5 0.13738 0.16634 0.826 0.408849
## whole.mean 3.04430 0.40744 7.472 7.91e-14 ***
## alive 0.15824 0.06218 2.545 0.010933 *
## duration 0.02629 0.01308 2.010 0.044444 *
## qroB3 0.25413 0.15904 1.598 0.110063
## qroB4 -0.06042 0.18029 -0.335 0.737534
## qroB5 0.46523 0.13301 3.498 0.000469 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 275.44 on 44 degrees of freedom
## Residual deviance: 107.43 on 34 degrees of freedom
## AIC: 291.43
##
## Number of Fisher Scoring iterations: 5
count.int <- glm.nb(count~ treatment*whole.mean + alive + duration + qro, data = count.drones)
count.g1 <- glm.nb(count~ treatment + whole.mean + alive + duration + qro , data = count.drones) #this is the model we are keeping
count.g2 <- glm.nb(count~ treatment + alive + duration + qro, data = count.drones)
count.g3 <- glm.nb(count~ whole.mean + alive + duration + qro, data = count.drones)
anova(count.int, count.g1)
## Likelihood ratio tests of Negative Binomial Models
##
## Response: count
## Model theta Resid. df
## 1 treatment + whole.mean + alive + duration + qro 6.877672 34
## 2 treatment * whole.mean + alive + duration + qro 7.979431 30
## 2 x log-lik. Test df LR stat. Pr(Chi)
## 1 -251.3451
## 2 -247.4647 1 vs 2 4 3.880433 0.4224293
anova(count.g1, count.g2)
## Likelihood ratio tests of Negative Binomial Models
##
## Response: count
## Model theta Resid. df
## 1 treatment + alive + duration + qro 2.882863 35
## 2 treatment + whole.mean + alive + duration + qro 6.877672 34
## 2 x log-lik. Test df LR stat. Pr(Chi)
## 1 -275.2316
## 2 -251.3451 1 vs 2 1 23.8865 1.021861e-06
anova(count.g1, count.g3)
## Likelihood ratio tests of Negative Binomial Models
##
## Response: count
## Model theta Resid. df
## 1 whole.mean + alive + duration + qro 5.201726 38
## 2 treatment + whole.mean + alive + duration + qro 6.877672 34
## 2 x log-lik. Test df LR stat. Pr(Chi)
## 1 -258.0562
## 2 -251.3451 1 vs 2 4 6.711111 0.1519653
AIC(count.g1, count.g3)
## df AIC
## count.g1 12 275.3451
## count.g3 8 274.0562
AIC(count.g1, count.int)
## df AIC
## count.g1 12 275.3451
## count.int 16 279.4647
qqnorm(resid(count.g1));qqline(resid(count.g1))
plot(resid(count.g1)) +
abline(h=0, col="red", lwd=2)
## integer(0)
#these are actually not bad
summary(count.g1)
##
## Call:
## glm.nb(formula = count ~ treatment + whole.mean + alive + duration +
## qro, data = count.drones, init.theta = 6.877671906, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0993 -1.1173 -0.3082 0.5519 1.9329
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.807665 1.000096 -1.807 0.070686 .
## treatment2 -0.125314 0.254829 -0.492 0.622892
## treatment3 -0.525825 0.257952 -2.038 0.041503 *
## treatment4 0.003136 0.267312 0.012 0.990640
## treatment5 0.097910 0.271356 0.361 0.718235
## whole.mean 3.547313 0.648033 5.474 4.4e-08 ***
## alive 0.278831 0.096008 2.904 0.003681 **
## duration 0.022130 0.020142 1.099 0.271898
## qroB3 0.238797 0.267096 0.894 0.371295
## qroB4 -0.079652 0.309850 -0.257 0.797129
## qroB5 0.736118 0.220697 3.335 0.000852 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(6.8777) family taken to be 1)
##
## Null deviance: 136.890 on 44 degrees of freedom
## Residual deviance: 52.237 on 34 degrees of freedom
## AIC: 275.35
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 6.88
## Std. Err.: 2.65
## Warning while fitting theta: alternation limit reached
##
## 2 x log-likelihood: -251.345
Anova(count.g1)
## Analysis of Deviance Table (Type II tests)
##
## Response: count
## LR Chisq Df Pr(>Chisq)
## treatment 7.2135 4 0.125026
## whole.mean 30.6403 1 3.106e-08 ***
## alive 8.3586 1 0.003839 **
## duration 1.0701 1 0.300918
## qro 13.0619 3 0.004505 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
em.count <- emmeans(count.g1, "treatment")
pairs(em.count)
## contrast estimate SE df z.ratio p.value
## treatment1 - treatment2 0.12531 0.255 Inf 0.492 0.9882
## treatment1 - treatment3 0.52583 0.258 Inf 2.038 0.2476
## treatment1 - treatment4 -0.00314 0.267 Inf -0.012 1.0000
## treatment1 - treatment5 -0.09791 0.271 Inf -0.361 0.9964
## treatment2 - treatment3 0.40051 0.252 Inf 1.589 0.5046
## treatment2 - treatment4 -0.12845 0.252 Inf -0.510 0.9864
## treatment2 - treatment5 -0.22322 0.249 Inf -0.895 0.8988
## treatment3 - treatment4 -0.52896 0.261 Inf -2.028 0.2525
## treatment3 - treatment5 -0.62374 0.263 Inf -2.373 0.1227
## treatment4 - treatment5 -0.09477 0.261 Inf -0.362 0.9963
##
## Results are averaged over the levels of: qro
## Results are given on the log (not the response) scale.
## P value adjustment: tukey method for comparing a family of 5 estimates
plot(em.count, comparisons = TRUE)
count.sum <- count.drones %>%
group_by(treatment) %>%
summarise(count.m = mean(count),
count.sd = sd(count),
count.n = length(count)) %>%
mutate(secount = count.sd/sqrt(count.n))
count.sum
## # A tibble: 5 × 5
## treatment count.m count.sd count.n secount
## <fct> <dbl> <dbl> <int> <dbl>
## 1 1 9.67 7.47 9 2.49
## 2 2 9.78 6.67 9 2.22
## 3 3 8.67 6.76 9 2.25
## 4 4 12.2 9.13 9 3.04
## 5 5 10.9 6.92 9 2.31
countanova_summary <- summary(count.g1)
counttuk.means <- emmeans(object = count.g1,
specs = "treatment",
adjust = "Tukey",
type = "response")
counttuk.means
## treatment response SE df asymp.LCL asymp.UCL
## 1 9.09 1.77 Inf 5.50 15.00
## 2 8.02 1.49 Inf 4.97 12.93
## 3 5.37 1.06 Inf 3.23 8.93
## 4 9.11 1.89 Inf 5.35 15.52
## 5 10.02 1.92 Inf 6.12 16.41
##
## Results are averaged over the levels of: qro
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 5 estimates
## Intervals are back-transformed from the log scale
count.cld.model <- cld(object = counttuk.means,
adjust = "Tukey",
Letters = letters,
alpha = 0.05)
count.cld.model
## treatment response SE df asymp.LCL asymp.UCL .group
## 3 5.37 1.06 Inf 3.23 8.93 a
## 2 8.02 1.49 Inf 4.97 12.93 a
## 1 9.09 1.77 Inf 5.50 15.00 a
## 4 9.11 1.89 Inf 5.35 15.52 a
## 5 10.02 1.92 Inf 6.12 16.41 a
##
## Results are averaged over the levels of: qro
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 5 estimates
## Intervals are back-transformed from the log scale
## P value adjustment: tukey method for comparing a family of 5 estimates
## Tests are performed on the log scale
## significance level used: alpha = 0.05
## NOTE: If two or more means share the same grouping symbol,
## then we cannot show them to be different.
## But we also did not show them to be the same.
counttuk.means <- as.data.frame(counttuk.means)
countuk.treatment <- as.data.frame(count.cld.model)
count_max <- count.drones %>%
group_by(treatment) %>%
summarize(maxcount = max(mean(count)))
count_for_plotting <- full_join(countuk.treatment, count_max,
by="treatment")
ggplot(data = count.sum, aes(x = treatment, y = count.m, fill = treatment)) +
geom_col(col = "black")+
coord_cartesian(ylim=c(1,17.5)) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
geom_errorbar(aes(ymin = count.m-secount,
ymax = count.m+secount),
position = position_dodge(0.9), width = 0.4) +
labs(y = "Mean Drone Count",) +
ggtitle("Average Drone Count per Treatment") +
scale_x_discrete(name = "Treatment",
labels = c("0 PPB", "150 PPB", "1,500 PPB", "15,000 PPB", "150,000 PPB")) +
theme_cowplot()+
theme_classic(base_size = 30) +
theme(legend.position = "none") +
annotate(geom = "text",
x = 1.5, y = 16,
label = "P > 0.05",
size = 15) +
annotate(geom = "text",
x = c(3, 2, 1, 4, 5),
y = count_for_plotting$maxcount + 3.5,
label = c("a", "a", "a", "a", "a"),
size = 12) +
theme(legend.position = "none")
duration <- subset(trunc, select = c(count, treatment, whole.mean, alive, duration, qro, colony))
ggplot(duration, aes(x=duration, fill = treatment)) +
geom_histogram(position = "identity", binwidth = 1 ,col=I("black")) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
ggtitle("Colony Duration") +
labs(y = "Count", x = "Days")
descdist(duration$duration, discrete = TRUE)
## summary statistics
## ------
## min: 20 max: 58
## median: 42
## mean: 42.46667
## estimated sd: 6.542171
## estimated skewness: -0.2500756
## estimated kurtosis: 6.629389
duration$square.dur <- (duration$duration)^2
descdist(duration$square.dur, discrete = TRUE)
## summary statistics
## ------
## min: 400 max: 3364
## median: 1764
## mean: 1845.267
## estimated sd: 553.6501
## estimated skewness: 0.8579907
## estimated kurtosis: 5.719182
dur.model <- lm(duration~ treatment + whole.mean + alive + qro, data = duration)
vif(dur.model)
## GVIF Df GVIF^(1/(2*Df))
## treatment 1.144322 4 1.016994
## whole.mean 1.641378 1 1.281163
## alive 1.708799 1 1.307210
## qro 1.550084 3 1.075786
dur.pois <- glm(duration ~ treatment + whole.mean + alive + qro, data = duration, family = "poisson")
summary(count.pois) #overdispersed
##
## Call:
## glm(formula = count ~ treatment + whole.mean + alive + duration +
## qro, family = "poisson", data = count.drones)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.8240 -1.5124 -0.6275 0.9664 2.7418
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.15115 0.64327 -1.790 0.073528 .
## treatment2 -0.06390 0.15584 -0.410 0.681776
## treatment3 -0.45881 0.16131 -2.844 0.004452 **
## treatment4 -0.02662 0.16024 -0.166 0.868060
## treatment5 0.13738 0.16634 0.826 0.408849
## whole.mean 3.04430 0.40744 7.472 7.91e-14 ***
## alive 0.15824 0.06218 2.545 0.010933 *
## duration 0.02629 0.01308 2.010 0.044444 *
## qroB3 0.25413 0.15904 1.598 0.110063
## qroB4 -0.06042 0.18029 -0.335 0.737534
## qroB5 0.46523 0.13301 3.498 0.000469 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 275.44 on 44 degrees of freedom
## Residual deviance: 107.43 on 34 degrees of freedom
## AIC: 291.43
##
## Number of Fisher Scoring iterations: 5
dur.int <- glm.nb(duration~ treatment*whole.mean + alive + qro, data = duration)
dur.g1 <- glm.nb(duration~ treatment + whole.mean + alive + qro, data = duration)
drop1(dur.g1, test = "Chisq")
## Single term deletions
##
## Model:
## duration ~ treatment + whole.mean + alive + qro
## Df Deviance AIC LRT Pr(>Chi)
## <none> 22.659 293.65
## treatment 4 28.258 291.25 5.5995 0.231122
## whole.mean 1 27.591 296.58 4.9320 0.026363 *
## alive 1 32.902 301.90 10.2434 0.001372 **
## qro 3 27.751 292.75 5.0917 0.165204
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dur.g2 <- glm.nb(duration~ treatment + alive + qro, data = duration)
dur.g3 <- glm.nb(duration~ whole.mean + alive + qro, data = duration)
# Switch to squared data below to get rid of warning messages
dur.pois <- glm(square.dur ~ treatment + whole.mean + alive + qro, data = duration, family = "poisson")
summary(count.pois) #overdispersed
##
## Call:
## glm(formula = count ~ treatment + whole.mean + alive + duration +
## qro, family = "poisson", data = count.drones)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.8240 -1.5124 -0.6275 0.9664 2.7418
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.15115 0.64327 -1.790 0.073528 .
## treatment2 -0.06390 0.15584 -0.410 0.681776
## treatment3 -0.45881 0.16131 -2.844 0.004452 **
## treatment4 -0.02662 0.16024 -0.166 0.868060
## treatment5 0.13738 0.16634 0.826 0.408849
## whole.mean 3.04430 0.40744 7.472 7.91e-14 ***
## alive 0.15824 0.06218 2.545 0.010933 *
## duration 0.02629 0.01308 2.010 0.044444 *
## qroB3 0.25413 0.15904 1.598 0.110063
## qroB4 -0.06042 0.18029 -0.335 0.737534
## qroB5 0.46523 0.13301 3.498 0.000469 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 275.44 on 44 degrees of freedom
## Residual deviance: 107.43 on 34 degrees of freedom
## AIC: 291.43
##
## Number of Fisher Scoring iterations: 5
dur.int <- glm.nb(square.dur~ treatment*whole.mean + alive + qro, data = duration)
dur.g1 <- glm.nb(square.dur~ treatment + whole.mean + alive + qro, data = duration)
drop1(dur.g1, test = "F")
## Single term deletions
##
## Model:
## square.dur ~ treatment + whole.mean + alive + qro
## Df Deviance AIC F value Pr(>F)
## <none> 45.458 684.39
## treatment 4 58.601 689.54 2.5298 0.0578470 .
## whole.mean 1 55.290 692.23 7.5698 0.0093366 **
## alive 1 67.018 703.95 16.6000 0.0002517 ***
## qro 3 56.324 689.26 2.7888 0.0549046 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dur.g2 <- glm.nb(square.dur~ treatment + alive + qro, data = duration)
dur.g3 <- glm.nb(square.dur~ whole.mean + alive + qro, data = duration) # stick with this one?
dur.g4 <- glm.nb(square.dur ~ treatment, data = duration)
AIC(dur.pois, dur.g1)
## df AIC
## dur.pois 10 4199.0536
## dur.g1 11 686.3947
AIC(dur.g1, dur.g2, dur.g3, dur.g4)
## df AIC
## dur.g1 11 686.3947
## dur.g2 10 693.2944
## dur.g3 7 689.9570
## dur.g4 6 703.4817
anova(dur.g1, dur.g3, dur.g4)
## Likelihood ratio tests of Negative Binomial Models
##
## Response: square.dur
## Model theta Resid. df 2 x log-lik.
## 1 treatment 11.60760 40 -691.4817
## 2 whole.mean + alive + qro 16.31718 39 -675.9570
## 3 treatment + whole.mean + alive + qro 21.05744 35 -664.3947
## Test df LR stat. Pr(Chi)
## 1
## 2 1 vs 2 1 15.5247 8.143412e-05
## 3 2 vs 3 4 11.5623 2.092096e-02
qqnorm(resid(dur.g3));qqline(resid(dur.g3))
qqnorm(resid(dur.g1));qqline(resid(dur.g1)) # I almost feel like this one fits better
summary(dur.g1)
##
## Call:
## glm.nb(formula = square.dur ~ treatment + whole.mean + alive +
## qro, data = duration, init.theta = 21.05743915, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.89476 -0.75776 0.00199 0.60069 2.22508
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.48890 0.12691 59.012 < 2e-16 ***
## treatment2 -0.22563 0.10490 -2.151 0.03148 *
## treatment3 -0.14635 0.10716 -1.366 0.17200
## treatment4 -0.30842 0.10401 -2.965 0.00302 **
## treatment5 -0.33825 0.10672 -3.169 0.00153 **
## whole.mean -0.73649 0.23250 -3.168 0.00154 **
## alive 0.13810 0.02756 5.012 5.4e-07 ***
## qroB3 -0.28456 0.10963 -2.596 0.00944 **
## qroB4 -0.01014 0.12836 -0.079 0.93706
## qroB5 0.13285 0.08959 1.483 0.13813
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(21.0574) family taken to be 1)
##
## Null deviance: 91.425 on 44 degrees of freedom
## Residual deviance: 45.458 on 35 degrees of freedom
## AIC: 686.39
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 21.06
## Std. Err.: 4.47
##
## 2 x log-likelihood: -664.395
summary(dur.g3)
##
## Call:
## glm.nb(formula = square.dur ~ whole.mean + alive + qro, data = duration,
## init.theta = 16.31718118, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.3506 -0.5620 -0.0943 0.4540 2.6766
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.38105 0.13375 55.184 < 2e-16 ***
## whole.mean -0.68794 0.25724 -2.674 0.007488 **
## alive 0.11240 0.02980 3.772 0.000162 ***
## qroB3 -0.26889 0.12412 -2.166 0.030287 *
## qroB4 -0.04101 0.14408 -0.285 0.775925
## qroB5 0.11116 0.10092 1.101 0.270697
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(16.3172) family taken to be 1)
##
## Null deviance: 71.078 on 44 degrees of freedom
## Residual deviance: 45.569 on 39 degrees of freedom
## AIC: 689.96
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 16.32
## Std. Err.: 3.45
##
## 2 x log-likelihood: -675.957
Anova(dur.g1)
## Analysis of Deviance Table (Type II tests)
##
## Response: square.dur
## LR Chisq Df Pr(>Chisq)
## treatment 13.1426 4 0.010600 *
## whole.mean 9.8316 1 0.001715 **
## alive 21.5601 1 3.429e-06 ***
## qro 10.8661 3 0.012472 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Anova(dur.g3)
## Analysis of Deviance Table (Type II tests)
##
## Response: square.dur
## LR Chisq Df Pr(>Chisq)
## whole.mean 7.2474 1 0.0071004 **
## alive 13.2906 1 0.0002667 ***
## qro 7.1824 3 0.0663058 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# I am going to keep model dur.g1. I think that since whole.mean exlains deviation in the data for both models, it is more important to include treatment as our primary explanatory variable.
em.dur<- emmeans(dur.g1, "treatment")
pairs(em.dur)
## contrast estimate SE df z.ratio p.value
## treatment1 - treatment2 0.2256 0.105 Inf 2.151 0.1987
## treatment1 - treatment3 0.1464 0.107 Inf 1.366 0.6497
## treatment1 - treatment4 0.3084 0.104 Inf 2.965 0.0252
## treatment1 - treatment5 0.3383 0.107 Inf 3.169 0.0133
## treatment2 - treatment3 -0.0793 0.104 Inf -0.763 0.9410
## treatment2 - treatment4 0.0828 0.104 Inf 0.798 0.9314
## treatment2 - treatment5 0.1126 0.105 Inf 1.075 0.8197
## treatment3 - treatment4 0.1621 0.105 Inf 1.542 0.5353
## treatment3 - treatment5 0.1919 0.105 Inf 1.831 0.3556
## treatment4 - treatment5 0.0298 0.106 Inf 0.281 0.9986
##
## Results are averaged over the levels of: qro
## Results are given on the log (not the response) scale.
## P value adjustment: tukey method for comparing a family of 5 estimates
plot(em.dur, comparisons = TRUE)
dur.sum <- duration %>%
group_by(treatment) %>%
summarise(cdur.m = mean(duration),
dur.sd = sd(duration),
dur.n = length(duration)) %>%
mutate(sedur = dur.sd/sqrt(dur.n))
dur.sum
## # A tibble: 5 × 5
## treatment cdur.m dur.sd dur.n sedur
## <fct> <dbl> <dbl> <int> <dbl>
## 1 1 45 7.21 9 2.40
## 2 2 42 3.16 9 1.05
## 3 3 44.6 5.08 9 1.69
## 4 4 39.3 4.87 9 1.62
## 5 5 41.4 9.96 9 3.32
durtuk.means <- emmeans(object = dur.g1,
specs = "treatment",
adjust = "Tukey",
type = "response")
durtuk.means
## treatment response SE df asymp.LCL asymp.UCL
## 1 2127 172 Inf 1727 2619
## 2 1697 133 Inf 1388 2075
## 3 1837 144 Inf 1503 2246
## 4 1562 125 Inf 1272 1919
## 5 1516 117 Inf 1245 1847
##
## Results are averaged over the levels of: qro
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 5 estimates
## Intervals are back-transformed from the log scale
dur.cld.model <- cld(object = durtuk.means,
adjust = "Tukey",
Letters = letters,
alpha = 0.05)
dur.cld.model
## treatment response SE df asymp.LCL asymp.UCL .group
## 5 1516 117 Inf 1245 1847 a
## 4 1562 125 Inf 1272 1919 a
## 2 1697 133 Inf 1388 2075 ab
## 3 1837 144 Inf 1503 2246 ab
## 1 2127 172 Inf 1727 2619 b
##
## Results are averaged over the levels of: qro
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 5 estimates
## Intervals are back-transformed from the log scale
## P value adjustment: tukey method for comparing a family of 5 estimates
## Tests are performed on the log scale
## significance level used: alpha = 0.05
## NOTE: If two or more means share the same grouping symbol,
## then we cannot show them to be different.
## But we also did not show them to be the same.
durtuk.means <- as.data.frame(durtuk.means)
dur_max <- duration %>%
group_by(treatment) %>%
summarize(maxdur = max((duration)))
dur_for_plotting <- full_join(durtuk.means, dur_max,
by="treatment")
dur_for_plotting$maxse <- dur_for_plotting$response + dur_for_plotting$SE
ggplot(data = dur.sum, aes(x = treatment, y = cdur.m, fill = treatment)) +
geom_col(col = "black")+
coord_cartesian(ylim=c(5, 53)) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
geom_errorbar(aes(ymin = cdur.m-sedur,
ymax = cdur.m+sedur),
position = position_dodge(0.9), width = 0.4) +
labs(y = "Days",) +
ggtitle("Average Colony Duration per Treatment") +
scale_x_discrete(name = "Treatment",
labels = c("0 PPB", "150 PPB", "1,500 PPB", "15,000 PPB", "150,000 PPB")) +
theme_cowplot()+
theme_classic(base_size = 30) +
theme(legend.position = "none") +
annotate(geom = "text",
x = 1, y = 51.5,
label = "P < 0.05",
size = 12) +
annotate(geom = "text",
x = c(5, 4, 2, 3, 1),
y = c(45.5, 41.7, 44, 47, 48.3),
label = c("a", "a", "ab", "ab", "b"),
size = 12) +
theme(legend.position = "none")
dur_for_plotting
## treatment response SE df asymp.LCL asymp.UCL maxdur maxse
## 1 2127 172 Inf 1727 2619 58 2299
## 2 1697 133 Inf 1388 2075 47 1830
## 3 1837 144 Inf 1503 2246 55 1981
## 4 1562 125 Inf 1272 1919 46 1687
## 5 1516 117 Inf 1245 1847 58 1633
##
## Results are averaged over the levels of: qro
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 5 estimates
## Intervals are back-transformed from the log scale