pollen <- read_csv("pollen1.csv", col_types = cols(round = col_factor(levels = c("1",
"2")), treatment = col_factor(levels = c("1",
"2", "3", "4", "5")), replicate = col_factor(levels = c("1",
"2", "3", "4", "5", "6", "7", "9", "11",
"12")), start_date = col_date(format = "%m/%d/%Y"),
start_time = col_character(), end_date = col_date(format = "%m/%d/%Y"),
end_time = col_character()))
pollen$colony <- as.factor(pollen$colony)
pollen$pid <- as.factor(pollen$pid)
pollen$count <- as.factor(pollen$count)
pollen$whole_dif <- as.double(pollen$whole_dif)
pollen <- na.omit(pollen)
range(pollen$difference)
## [1] -0.98780 1.56542
# get rid of negative numbers
pollen$difference[pollen$difference < 0] <- NA
pollen <- na.omit(pollen)
range(pollen$difference)
## [1] 0.002715 1.565420
# add queenright original colony column
qro <- read_csv("qro.csv")
qro$colony <- as.factor(qro$colony)
qro$qro <- as.factor(qro$qro)
pollen <- merge(pollen, qro, by.x = "colony")
Let’s look at the shape of the pollen data in a histogram.
shapiro.test(pollen$difference)
##
## Shapiro-Wilk normality test
##
## data: pollen$difference
## W = 0.86822, p-value < 2.2e-16
pollen$boxp <- bcPower(pollen$difference, -2, gamma=1)
shapiro.test(pollen$boxp)
##
## Shapiro-Wilk normality test
##
## data: pollen$boxp
## W = 0.96065, p-value < 2.2e-16
ggplot(pollen, aes(x=boxp, fill = treatment)) +
geom_histogram(position = "identity", binwidth = 0.009,col=I("black")) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
ggtitle("Pollen Consumption (g) - BoxCox Transformed") +
labs(y = "Number of Pollen Balls", x = "Pollen Consumed (g), BoxCox power transformation")
pmod <- glm(difference ~ treatment + bees_alive + round + qro + start_date, data = pollen)
summary(pmod)
##
## Call:
## glm(formula = difference ~ treatment + bees_alive + round + qro +
## start_date, data = pollen)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.63819 -0.20716 -0.07461 0.13856 1.02860
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.314e+02 1.249e+01 -10.521 < 2e-16 ***
## treatment2 5.457e-02 2.792e-02 1.954 0.050874 .
## treatment3 8.980e-02 2.799e-02 3.209 0.001369 **
## treatment4 7.736e-02 2.803e-02 2.760 0.005875 **
## treatment5 1.622e-02 2.817e-02 0.576 0.564819
## bees_alive 1.372e-01 1.579e-02 8.694 < 2e-16 ***
## round2 -2.782e+00 2.650e-01 -10.496 < 2e-16 ***
## qroB3 1.152e-01 3.580e-02 3.218 0.001324 **
## qroB4 3.198e-01 3.539e-02 9.037 < 2e-16 ***
## qroB5 9.208e-02 2.546e-02 3.616 0.000311 ***
## qroK1 1.606e-01 7.224e-02 2.223 0.026393 *
## qroK2/K1 -1.635e-01 8.403e-02 -1.946 0.051885 .
## qroK3 -1.732e-01 9.760e-02 -1.774 0.076274 .
## qroK3/K2 NA NA NA NA
## start_date 6.961e-03 6.615e-04 10.522 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.09332232)
##
## Null deviance: 140.47 on 1235 degrees of freedom
## Residual deviance: 114.04 on 1222 degrees of freedom
## AIC: 592.12
##
## Number of Fisher Scoring iterations: 2
Anova(pmod)
## Analysis of Deviance Table (Type II tests)
##
## Response: difference
## LR Chisq Df Pr(>Chisq)
## treatment 15.211 4 0.004282 **
## bees_alive 75.577 1 < 2.2e-16 ***
## round 0
## qro 145.034 6 < 2.2e-16 ***
## start_date 110.719 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(pmod)
pbox <- glm(boxp ~ treatment + bees_alive + round + qro + start_date, data = pollen)
summary(pbox)
##
## Call:
## glm(formula = boxp ~ treatment + bees_alive + round + qro + start_date,
## data = pollen)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.23456 -0.05390 -0.01050 0.05201 0.19748
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.756e+01 3.240e+00 -11.594 < 2e-16 ***
## treatment2 2.091e-02 7.240e-03 2.888 0.003948 **
## treatment3 2.803e-02 7.258e-03 3.862 0.000119 ***
## treatment4 2.317e-02 7.270e-03 3.187 0.001473 **
## treatment5 4.344e-03 7.304e-03 0.595 0.552138
## bees_alive 3.773e-02 4.094e-03 9.215 < 2e-16 ***
## round2 -8.164e-01 6.873e-02 -11.879 < 2e-16 ***
## qroB3 3.049e-02 9.283e-03 3.284 0.001051 **
## qroB4 7.914e-02 9.178e-03 8.623 < 2e-16 ***
## qroB5 2.269e-02 6.603e-03 3.436 0.000610 ***
## qroK1 2.886e-02 1.873e-02 1.541 0.123689
## qroK2/K1 -5.243e-02 2.179e-02 -2.406 0.016284 *
## qroK3 -5.442e-02 2.531e-02 -2.150 0.031727 *
## qroK3/K2 NA NA NA NA
## start_date 1.996e-03 1.715e-04 11.636 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.006275726)
##
## Null deviance: 9.7775 on 1235 degrees of freedom
## Residual deviance: 7.6689 on 1222 degrees of freedom
## AIC: -2744.3
##
## Number of Fisher Scoring iterations: 2
Anova(pbox)
## Analysis of Deviance Table (Type II tests)
##
## Response: boxp
## LR Chisq Df Pr(>Chisq)
## treatment 23.266 4 0.000112 ***
## bees_alive 84.925 1 < 2.2e-16 ***
## round 0
## qro 132.892 6 < 2.2e-16 ***
## start_date 135.387 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(pbox)
boxemm <- emmeans(pbox, "treatment")
pairs(boxemm)
## contrast estimate SE df t.ratio p.value
## treatment1 - treatment2 -0.02091 0.00724 1222 -2.888 0.0322
## treatment1 - treatment3 -0.02803 0.00726 1222 -3.862 0.0011
## treatment1 - treatment4 -0.02317 0.00727 1222 -3.187 0.0128
## treatment1 - treatment5 -0.00434 0.00730 1222 -0.595 0.9759
## treatment2 - treatment3 -0.00712 0.00737 1222 -0.965 0.8706
## treatment2 - treatment4 -0.00226 0.00750 1222 -0.302 0.9982
## treatment2 - treatment5 0.01656 0.00723 1222 2.290 0.1486
## treatment3 - treatment4 0.00486 0.00752 1222 0.646 0.9673
## treatment3 - treatment5 0.02368 0.00744 1222 3.185 0.0129
## treatment4 - treatment5 0.01883 0.00755 1222 2.494 0.0927
##
## Results are averaged over the levels of: qro, round
## P value adjustment: tukey method for comparing a family of 5 estimates
Even though the histogram for the boxcox transformation doesn’t look all that great, the W value is greatly improved and the diagnostic plots for the model look pretty good. The q-q plot and residuals v fitted looked relatively well fitting and evenly spread.
polsum <- pollen %>%
group_by(treatment) %>%
summarise(mp = mean(difference),
sdp = sd(difference),
np = length(difference)) %>%
mutate(sep = sdp/sqrt(np))
polsum
## # A tibble: 5 × 5
## treatment mp sdp np sep
## <fct> <dbl> <dbl> <int> <dbl>
## 1 1 0.461 0.336 252 0.0212
## 2 2 0.508 0.327 242 0.0210
## 3 3 0.561 0.350 251 0.0221
## 4 4 0.514 0.345 251 0.0218
## 5 5 0.459 0.319 240 0.0206
ggplot(data = polsum, aes(x = treatment, y = mp, fill = treatment)) +
geom_col(col = "black")+
coord_cartesian(ylim=c(0.4,0.6)) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
geom_errorbar(aes(ymin = mp - sep,
ymax = mp + sep),
position = position_dodge(0.9), width = 0.4) +
labs(y = "Mean Pollen Consumed (g)",) +
ggtitle("Average Pollen Consumed (g) per Treatment") +
scale_x_discrete(name = "Treatment",
labels = c("0 PPB", "150 PPB", "1,500 PPB", "15,000 PPB", "150,000 PPB")) +
theme_cowplot()+
theme_classic(base_size = 20)
sum2 <- pollen %>%
group_by(treatment, count) %>%
summarise(mp = mean(difference),
sdp = sd(difference),
np = length(difference)) %>%
mutate(sep = sdp/sqrt(np))
sum2
## # A tibble: 136 × 6
## # Groups: treatment [5]
## treatment count mp sdp np sep
## <fct> <fct> <dbl> <dbl> <int> <dbl>
## 1 1 2 0.299 0.0710 11 0.0214
## 2 1 3 0.268 0.0863 13 0.0239
## 3 1 4 0.257 0.0993 14 0.0265
## 4 1 5 0.192 0.110 12 0.0318
## 5 1 6 0.221 0.106 10 0.0336
## 6 1 7 0.394 0.338 11 0.102
## 7 1 8 0.336 0.173 11 0.0521
## 8 1 9 0.420 0.228 10 0.0722
## 9 1 10 0.481 0.342 8 0.121
## 10 1 11 0.611 0.429 9 0.143
## # … with 126 more rows
ggplot(data = sum2, aes(x = count, y = mp)) +
geom_point(aes(color = treatment)) +
scale_color_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
labs(y = "Mean Pollen Consumed (g)", x = "Pollen Ball Number") +
ggtitle("Average Pollen Consumed (g) per Treatment") +
theme_cowplot()+
theme_classic(base_size = 16) +
facet_grid(vars(treatment))