Start by imputing pollen data and creating a new data frame with average pollen consumption on a per-colony basis
### Figure out average pollen consumption by treatment
pollen <- read_csv("pollen1.csv", col_types = cols(round = col_factor(levels = c("1",
"2")), treatment = col_factor(levels = c("1",
"2", "3", "4", "5")), replicate = col_factor(levels = c("1",
"2", "3", "4", "5", "6", "7", "9", "11",
"12")), start_date = col_date(format = "%m/%d/%Y"),
start_time = col_character(), end_date = col_date(format = "%m/%d/%Y"),
end_time = col_character()))
pollen$colony <- as.factor(pollen$colony)
pollen$pid <- as.factor(pollen$pid)
pollen$count <- as.factor(pollen$count)
pollen <- na.omit(pollen)
range(pollen$difference)
## [1] -0.98780 1.56542
# get rid of negative numbers
pollen$difference[pollen$difference < 0] <- NA
pollen <- na.omit(pollen)
range(pollen$difference)
## [1] 0.002715 1.565420
pollen$whole_dif <- as.numeric(pollen$difference)
wd.1 <- lm(difference ~ treatment, data = pollen)
summary(wd.1)
##
## Call:
## lm(formula = difference ~ treatment, data = pollen)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5268 -0.2476 -0.1363 0.1874 1.0576
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.460679 0.021143 21.789 < 2e-16 ***
## treatment2 0.047174 0.030208 1.562 0.118630
## treatment3 0.100480 0.029931 3.357 0.000812 ***
## treatment4 0.053390 0.029931 1.784 0.074703 .
## treatment5 -0.001879 0.030272 -0.062 0.950524
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3356 on 1231 degrees of freedom
## Multiple R-squared: 0.01281, Adjusted R-squared: 0.009604
## F-statistic: 3.994 on 4 and 1231 DF, p-value: 0.003177
wd.emm <- emmeans(wd.1, "treatment")
summary(wd.emm)
## treatment emmean SE df lower.CL upper.CL
## 1 0.461 0.0211 1231 0.419 0.502
## 2 0.508 0.0216 1231 0.466 0.550
## 3 0.561 0.0212 1231 0.520 0.603
## 4 0.514 0.0212 1231 0.473 0.556
## 5 0.459 0.0217 1231 0.416 0.501
##
## Confidence level used: 0.95
wd.summary <- pollen %>%
group_by(colony) %>%
summarize(whole.mean = mean(difference))
as.data.frame(wd.summary) # this is the data frame I will merge with subsequent data frames to contain average pollen consumption per colony as a new column
## colony whole.mean
## 1 1.11R2 0.2829509
## 2 1.12R2 0.1697964
## 3 1.1R1 0.8049667
## 4 1.1R2 0.5213458
## 5 1.2R1 0.4704294
## 6 1.2R2 0.3374200
## 7 1.3R1 0.3910053
## 8 1.3R2 0.4512891
## 9 1.4R2 0.6063016
## 10 1.5R2 0.7079516
## 11 1.7R2 0.7400381
## 12 1.9R2 0.2240081
## 13 2.11R2 0.4178270
## 14 2.12R2 0.4035568
## 15 2.1R1 0.7282895
## 16 2.1R2 0.6101589
## 17 2.2R1 0.4663045
## 18 2.2R2 0.5109234
## 19 2.3R1 0.4052000
## 20 2.3R2 0.3280036
## 21 2.4R2 0.3881394
## 22 2.5R2 0.7194915
## 23 2.7R2 0.5299685
## 24 2.9R2 0.5857152
## 25 3.11R2 0.4216410
## 26 3.12R2 0.3390993
## 27 3.1R1 0.8014682
## 28 3.1R2 0.3711948
## 29 3.2R1 0.8020500
## 30 3.2R2 0.3461010
## 31 3.3R1 0.5873429
## 32 3.3R2 0.8465806
## 33 3.4R2 0.4120433
## 34 3.5R2 0.8906211
## 35 3.7R2 0.6266680
## 36 3.9R2 0.4409511
## 37 4.11R2 0.3456011
## 38 4.12R2 0.2496295
## 39 4.1R1 0.8837867
## 40 4.1R2 0.7074755
## 41 4.2R1 0.3319238
## 42 4.2R2 0.3871742
## 43 4.3R1 0.3944500
## 44 4.3R2 0.5800074
## 45 4.4R2 0.8356247
## 46 4.5R2 0.2335967
## 47 4.7R2 0.6237400
## 48 4.9R2 0.5322950
## 49 5.11R2 0.4113960
## 50 5.12R2 0.2077741
## 51 5.1R1 0.6799737
## 52 5.1R2 0.3782286
## 53 5.2R1 0.7140056
## 54 5.2R2 0.4912468
## 55 5.3R1 0.2857654
## 56 5.3R2 0.2128778
## 57 5.4R2 0.4563045
## 58 5.5R2 0.7095479
## 59 5.7R2 0.6113189
## 60 5.9R2 0.4188290
drones <- read_csv("drones_rf.csv", col_types = cols(round = col_factor(levels = c("1","2")), treatment = col_factor(levels = c("1","2", "3", "4", "5")), notes = col_skip(), colony_start = col_skip(), colony_count = col_skip(), emerge_date = col_skip()))
drones$order_on_sheet <- as.factor(drones$order_on_sheet)
drones$replicate <- as.factor(drones$replicate)
drones$colony <- as.factor(drones$colony)
drones$id <- as.factor(drones$id)
drones$alive <- as.logical(drones$`alive?`)
drones$relative_fat <- as.double(drones$relative_fat)
drones$radial <- as.double(drones$radial)
drones <- na.omit(drones)
drf.pollen <- merge(wd.summary, drones, by.x = "colony") # this is a new data frame with average pollen consumption data per colony
hist(drf.pollen$radial)
shapiro.test(drf.pollen$radial)
##
## Shapiro-Wilk normality test
##
## data: drf.pollen$radial
## W = 0.98282, p-value = 7.268e-05
Data is normal enough, use lmer
I also used ranova here after selecting the best model based on AIC and it looks like inclusing colony as a random effect is important
whole.mean = the average pollen consumption per colony after controlling for evaporative control differences
rad.g1 <- lmer(radial~ treatment + whole.mean + alive + emerge_time + round + workers_alive + (1|colony), data = drf.pollen)
rad.g2 <- lmer(radial~ treatment + whole.mean + alive + emerge_time + round + (1|colony), data = drf.pollen)
rad.g3 <- lmer(radial~ treatment + whole.mean + alive + round + workers_alive + (1|colony), data = drf.pollen)
rad.g4 <- lmer(radial~ treatment + whole.mean + emerge_time + round + workers_alive + (1|colony), data = drf.pollen)
rad.g5 <- lmer(radial~ treatment + whole.mean + alive + round + (1|colony), data = drf.pollen)
AIC(rad.g1, rad.g2, rad.g3, rad.g4, rad.g5) # get rid of emerge time + workers_alive
## df AIC
## rad.g1 12 -111.8519
## rad.g2 11 -119.2878
## rad.g3 11 -122.9950
## rad.g4 11 -111.2395
## rad.g5 10 -130.3781
summary(rad.g5)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: radial ~ treatment + whole.mean + alive + round + (1 | colony)
## Data: drf.pollen
##
## REML criterion at convergence: -150.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.0107 -0.5305 0.0470 0.5730 3.6316
##
## Random effects:
## Groups Name Variance Std.Dev.
## colony (Intercept) 0.007071 0.08409
## Residual 0.034469 0.18566
## Number of obs: 418, groups: colony, 51
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 2.29774 0.12037 147.33029 19.089 <2e-16 ***
## treatment2 -0.03262 0.05030 29.00334 -0.648 0.5218
## treatment3 -0.09132 0.05276 31.86779 -1.731 0.0932 .
## treatment4 -0.03111 0.05215 27.04787 -0.597 0.5557
## treatment5 -0.04735 0.05106 26.57095 -0.927 0.3621
## whole.mean 0.06095 0.10248 32.61748 0.595 0.5561
## aliveTRUE 0.24241 0.10198 388.74411 2.377 0.0179 *
## round2 -0.06999 0.04646 55.37831 -1.506 0.1376
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trtmn2 trtmn3 trtmn4 trtmn5 whl.mn alTRUE
## treatment2 -0.225
## treatment3 -0.214 0.515
## treatment4 -0.131 0.520 0.503
## treatment5 -0.197 0.536 0.504 0.513
## whole.mean -0.427 0.019 -0.124 -0.084 0.026
## aliveTRUE -0.729 -0.033 0.054 -0.041 -0.061 -0.115
## round2 -0.323 0.055 0.034 -0.041 0.037 0.185 -0.122
anova(rad.g5)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## treatment 0.110023 0.027506 4 28.43 0.7980 0.53652
## whole.mean 0.012194 0.012194 1 32.62 0.3538 0.55610
## alive 0.194773 0.194773 1 388.74 5.6506 0.01793 *
## round 0.078225 0.078225 1 55.38 2.2694 0.13763
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ranova(rad.g5)
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## radial ~ treatment + whole.mean + alive + round + (1 | colony)
## npar logLik AIC LRT Df Pr(>Chisq)
## <none> 10 75.189 -130.38
## (1 | colony) 9 67.087 -116.17 16.203 1 5.69e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(resid(rad.g5)) +
abline(h=0, col="red", lwd=2)
## integer(0)
qqnorm(resid(rad.g5));qqline(resid(rad.g5))
rad.emm <- emmeans(rad.g5, "treatment")
?emmeans::summary.emmGrid
pairs(rad.emm, adjust = "tukey")
## contrast estimate SE df t.ratio p.value
## treatment1 - treatment2 0.03262 0.0505 38.0 0.646 0.9664
## treatment1 - treatment3 0.09132 0.0530 41.7 1.722 0.4320
## treatment1 - treatment4 0.03111 0.0524 35.5 0.593 0.9752
## treatment1 - treatment5 0.04735 0.0512 34.9 0.924 0.8856
## treatment2 - treatment3 0.05870 0.0510 42.1 1.151 0.7787
## treatment2 - treatment4 -0.00151 0.0505 36.4 -0.030 1.0000
## treatment2 - treatment5 0.01473 0.0489 34.9 0.301 0.9981
## treatment3 - treatment4 -0.06021 0.0526 38.9 -1.145 0.7815
## treatment3 - treatment5 -0.04397 0.0519 39.3 -0.847 0.9141
## treatment4 - treatment5 0.01624 0.0511 33.8 0.318 0.9977
##
## Results are averaged over the levels of: alive, round
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 5 estimates
summary(rad.emm)
## treatment emmean SE df lower.CL upper.CL
## 1 2.42 0.0620 162 2.30 2.54
## 2 2.39 0.0609 165 2.27 2.51
## 3 2.33 0.0591 178 2.21 2.44
## 4 2.39 0.0639 144 2.26 2.51
## 5 2.37 0.0629 150 2.25 2.50
##
## Results are averaged over the levels of: alive, round
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
plot(rad.emm, comparisons = TRUE)
The blue bars on the plot emmeans are the confidence intervals. The red arrow lines represent a scheme to determine homogeneous groups. If the red lines overlap for two groups, then they are not significantly different using the method chosen.
Based on the diagnostic plots I am going to make the decision that this model fits pretty well.
The consensus from what I’ve seen online is that a) you shouldn’t make choices about random effects based on stats, you should so based on your actual study design/ the way the system works, b) aov uses F tests for everything not t tests which gives completely different results. Also, the ranova test I ran above shows that the random variable is important to the model so we should stick with lmer and go from there.
aov results show that treatment sig impacts radial length (anova = 0.01837) and treatment 3 differs from treatment 1 (p = 0.0064) and from treatment 4 (p = 0.045).
aov.rad <- aov(radial~ treatment + whole.mean + alive + round + colony, data = drf.pollen)
aov.rad1 <- aov(radial~ treatment + whole.mean, data = drf.pollen)
AIC(aov.rad, rad.g2) #aov better
## df AIC
## aov.rad 53 -178.2168
## rad.g2 11 -119.2878
AIC(aov.rad1, aov.rad) # better with "alive" included
## df AIC
## aov.rad1 7 -151.1315
## aov.rad 53 -178.2168
summary(aov.rad)
## Df Sum Sq Mean Sq F value Pr(>F)
## treatment 4 0.530 0.13256 3.913 0.0040 **
## whole.mean 1 0.023 0.02300 0.679 0.4105
## alive 1 0.196 0.19639 5.797 0.0165 *
## round 1 0.173 0.17297 5.106 0.0244 *
## colony 44 3.718 0.08449 2.494 2.05e-06 ***
## Residuals 366 12.400 0.03388
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Anova(aov.rad)
## Anova Table (Type II tests)
##
## Response: radial
## Sum Sq Df F value Pr(>F)
## treatment 0
## whole.mean 0
## alive 0.0318 1 0.9398 0.333
## round 0
## colony 3.7178 44 2.4941 2.055e-06 ***
## Residuals 12.3995 366
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tukey_rad <- HSD.test(aov.rad, trt = "treatment",
console = TRUE)
##
## Study: aov.rad ~ "treatment"
##
## HSD Test for radial
##
## Mean Square Error: 0.03387854
##
## treatment, means
##
## radial std r Min Max
## 1 2.519668 0.1660432 79 2.0540 2.8720
## 2 2.469565 0.1843602 86 1.9708 2.8903
## 3 2.404999 0.2431685 69 1.7169 3.1542
## 4 2.492920 0.2323400 92 1.6924 3.0593
## 5 2.468763 0.1669673 92 2.0990 2.8642
##
## Alpha: 0.05 ; DF Error: 366
## Critical Value of Studentized Range: 3.876962
##
## Groups according to probability of means differences and alpha level( 0.05 )
##
## Treatments with the same letter are not significantly different.
##
## radial groups
## 1 2.519668 a
## 4 2.492920 a
## 2 2.469565 ab
## 5 2.468763 ab
## 3 2.404999 b
tidy_tukey.rad <- as.data.frame(tukey_rad$groups)
tidy_tukey.rad
## radial groups
## 1 2.519668 a
## 4 2.492920 a
## 2 2.469565 ab
## 5 2.468763 ab
## 3 2.404999 b
tukey.test.rad <- TukeyHSD(aov.rad, which = 'treatment')
tukey.test.rad
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = radial ~ treatment + whole.mean + alive + round + colony, data = drf.pollen)
##
## $treatment
## diff lwr upr p adj
## 2-1 -0.0501032382 -0.128738638 0.02853216 0.4066745
## 3-1 -0.1146698037 -0.197813947 -0.03152566 0.0017010
## 4-1 -0.0267487892 -0.104146746 0.05064917 0.8780511
## 5-1 -0.0509053110 -0.128303267 0.02649265 0.3733343
## 3-2 -0.0645665656 -0.146117882 0.01698475 0.1932997
## 4-2 0.0233544489 -0.052329844 0.09903874 0.9159913
## 5-2 -0.0008020728 -0.076486366 0.07488222 0.9999998
## 4-3 0.0879210145 0.007562227 0.16827980 0.0240049
## 5-3 0.0637644928 -0.016594295 0.14412328 0.1914095
## 5-4 -0.0241565217 -0.098554303 0.05024126 0.9004689
radial.summary <- drf.pollen %>%
group_by(treatment) %>%
summarize(mean.rad = mean(radial),
sd.rad = sd(radial),
n.rad = length(radial)) %>%
mutate(se.rad = sd.rad/sqrt(n.rad))
radial.summary
## # A tibble: 5 × 5
## treatment mean.rad sd.rad n.rad se.rad
## <fct> <dbl> <dbl> <int> <dbl>
## 1 1 2.52 0.166 79 0.0187
## 2 2 2.47 0.184 86 0.0199
## 3 3 2.40 0.243 69 0.0293
## 4 4 2.49 0.232 92 0.0242
## 5 5 2.47 0.167 92 0.0174
anova.rad <- aov(radial ~ treatment, data = drf.pollen)
tukey.rad <- TukeyHSD(anova.rad)
cld.rad <- multcompLetters4(anova.rad, tukey.rad)
cld.rad
## $treatment
## 1 4 2 5 3
## "a" "a" "ab" "ab" "b"
dt.rad <- as.data.frame(radial.summary)
dt.rad
## treatment mean.rad sd.rad n.rad se.rad
## 1 1 2.519668 0.1660432 79 0.01868132
## 2 2 2.469565 0.1843602 86 0.01988007
## 3 3 2.404999 0.2431685 69 0.02927405
## 4 4 2.492920 0.2323400 92 0.02422312
## 5 5 2.468763 0.1669673 92 0.01740755
dt.rad$n.rad <- as.double(dt.rad$n.rad)
cld.rad <- as.data.frame.list(cld.rad$treatment)
cld.rad
## Letters monospacedLetters LetterMatrix.a LetterMatrix.b
## 1 a a TRUE FALSE
## 4 a a TRUE FALSE
## 2 ab ab TRUE TRUE
## 5 ab ab TRUE TRUE
## 3 b b FALSE TRUE
dt.rad$cld.rad <- cld.rad$Letters
dt.rad
## treatment mean.rad sd.rad n.rad se.rad cld.rad
## 1 1 2.519668 0.1660432 79 0.01868132 a
## 2 2 2.469565 0.1843602 86 0.01988007 a
## 3 3 2.404999 0.2431685 69 0.02927405 ab
## 4 4 2.492920 0.2323400 92 0.02422312 ab
## 5 5 2.468763 0.1669673 92 0.01740755 b
range(dt.rad$mean.rad)
## [1] 2.404999 2.519668
ggplot(data = dt.rad, aes(x = treatment, y = mean.rad, fill = treatment)) +
geom_bar(stat = "identity")+
coord_cartesian(ylim=c(2, 2.6)) +
scale_fill_manual(values = c("gray0", "cadetblue2", "cadetblue3", "cadetblue4", "deepskyblue4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)",
"Treatment 2",
"Treatment 3", "Treatment 4",
"Treatment 5")) +
theme_cowplot()+
geom_errorbar(aes(ymin = mean.rad-se.rad,
ymax = mean.rad+se.rad)) +
ggtitle("Average Drone Radial Cell Length (mm) per Treatment") +
labs(y = "Radial Length(mm)") +
scale_x_discrete(name = "Treatment",
labels = c("0 PPB", "150 PPB", "1,500 PPB", "15,000 PPB", "150,000 PPB")) +
theme_classic(base_size = 12) +
geom_text(aes(label = cld.rad, y = mean.rad + se.rad), vjust = -0.5) +
stat_compare_means(label.y = 2.59)
hist(drf.pollen$relative_fat)
shapiro.test(drf.pollen$relative_fat)
##
## Shapiro-Wilk normality test
##
## data: drf.pollen$relative_fat
## W = 0.72228, p-value < 2.2e-16
range(drf.pollen$relative_fat)
## [1] 0.0002 0.0112
which(drf.pollen$relative_fat %in% c(0.0112)) # same problem drone as dry weights
## [1] 68
drf.pollen <- drf.pollen[-65,]
range(drf.pollen$relative_fat)
## [1] 0.0002 0.0112
shapiro.test(drf.pollen$relative_fat)
##
## Shapiro-Wilk normality test
##
## data: drf.pollen$relative_fat
## W = 0.71899, p-value < 2.2e-16
hist(drf.pollen$relative_fat)
drf.pollen$logrf <- log(drf.pollen$relative_fat)
shapiro.test(drf.pollen$logrf)
##
## Shapiro-Wilk normality test
##
## data: drf.pollen$logrf
## W = 0.94303, p-value = 1.454e-11
hist(drf.pollen$logrf) # this looks a bit *more* normal I guess, I'm going to stick with glmer though
non parametric tests
kruskal.test(relative_fat ~ treatment, data = drf.pollen) #very clear sig difference in at least one group
##
## Kruskal-Wallis rank sum test
##
## data: relative_fat by treatment
## Kruskal-Wallis chi-squared = 32.015, df = 4, p-value = 1.9e-06
after some more digging online I actually think we want to use LMER for everything regardless of normality
https://stats.oarc.ucla.edu/r/dae/mixed-effects-logistic-regression/
rfglmer <- glmer(logrf ~ treatment + whole.mean + alive + emerge_time + round + workers_alive + (1|colony), data = drf.pollen)
rf1 <- glmer(logrf ~ treatment + whole.mean + alive + emerge_time + round + (1|colony), data = drf.pollen)
rf2 <- glmer(logrf ~ treatment + whole.mean + alive + round + workers_alive + (1|colony), data = drf.pollen)
rf7 <- lmer(logrf ~ treatment + whole.mean + alive + round + workers_alive + (1|colony), data = drf.pollen)
rf3 <- lmer(logrf ~ treatment + whole.mean + round + workers_alive + (1|colony), data = drf.pollen)
rf6 <- glmer(logrf ~ treatment + whole.mean + round + workers_alive + (1|colony), data = drf.pollen)
rf4 <- glmer(logrf ~ treatment + whole.mean + (1|colony), data = drf.pollen)
rf5 <- glmer(logrf ~ treatment + whole.mean + round + (1|colony), data = drf.pollen)
AIC(rfglmer, rf1, rf2, rf3, rf4, rf5, rf6, rf7)
## df AIC
## rfglmer 12 530.5065
## rf1 11 523.3246
## rf2 11 522.8935
## rf3 10 536.0543
## rf4 8 546.7068
## rf5 9 529.2019
## rf6 10 536.0543
## rf7 11 522.8935
anova(rf2)
## Analysis of Variance Table
## npar Sum Sq Mean Sq F value
## treatment 4 3.5836 0.8959 5.1365
## whole.mean 1 0.0546 0.0546 0.3128
## alive 1 3.9818 3.9818 22.8293
## round 1 3.8302 3.8302 21.9603
## workers_alive 1 0.0037 0.0037 0.0214
anova(rf7)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## treatment 2.7882 0.6971 4 37.39 3.9965 0.008501 **
## whole.mean 0.1546 0.1546 1 43.00 0.8866 0.351669
## alive 2.9047 2.9047 1 397.72 16.6540 5.422e-05 ***
## round 3.7619 3.7619 1 85.44 21.5686 1.226e-05 ***
## workers_alive 0.0037 0.0037 1 29.54 0.0214 0.884812
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ranova(rf2)
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## logrf ~ treatment + whole.mean + alive + round + workers_alive + (1 | colony)
## npar logLik AIC LRT Df Pr(>Chisq)
## <none> 11 -250.45 522.89
## (1 | colony) 10 -254.84 529.68 8.7871 1 0.003034 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(rf2) # even though AIC is lower, I will keep round in
## Analysis of Variance Table
## npar Sum Sq Mean Sq F value
## treatment 4 3.5836 0.8959 5.1365
## whole.mean 1 0.0546 0.0546 0.3128
## alive 1 3.9818 3.9818 22.8293
## round 1 3.8302 3.8302 21.9603
## workers_alive 1 0.0037 0.0037 0.0214
plot(resid(rf2)) +
abline(h=0, col="red", lwd=2)
## integer(0)
plot(resid(rf7)) +
abline(h=0, col="red", lwd=2)
## integer(0)
qqnorm(resid(rf2));qqline(resid(rf2))
qqnorm(resid(rf7));qqline(resid(rf7))
rf.emm <- emmeans(rf2, "treatment")
summary(rf.emm)
## treatment emmean SE df lower.CL upper.CL
## 1 -7.03 0.127 201 -7.28 -6.78
## 2 -7.09 0.126 203 -7.34 -6.84
## 3 -7.28 0.123 227 -7.53 -7.04
## 4 -7.14 0.132 168 -7.40 -6.88
## 5 -6.92 0.130 189 -7.18 -6.67
##
## Results are averaged over the levels of: alive, round
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
pairs(rf.emm)
## contrast estimate SE df t.ratio p.value
## treatment1 - treatment2 0.0618 0.0918 33.9 0.674 0.9608
## treatment1 - treatment3 0.2530 0.0977 38.0 2.590 0.0924
## treatment1 - treatment4 0.1067 0.0956 30.7 1.116 0.7968
## treatment1 - treatment5 -0.1062 0.0931 32.3 -1.141 0.7836
## treatment2 - treatment3 0.1912 0.0932 39.8 2.051 0.2614
## treatment2 - treatment4 0.0449 0.0907 32.3 0.495 0.9873
## treatment2 - treatment5 -0.1680 0.0876 32.9 -1.917 0.3285
## treatment3 - treatment4 -0.1463 0.0950 33.4 -1.540 0.5449
## treatment3 - treatment5 -0.3592 0.0941 36.3 -3.816 0.0044
## treatment4 - treatment5 -0.2129 0.0906 28.6 -2.350 0.1589
##
## Results are averaged over the levels of: alive, round
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 5 estimates
plot(rf.emm, comparisons = TRUE)
aov.rf <- aov(relative_fat ~ treatment + whole.mean + alive , data = drf.pollen)
summary(aov.rf)
## Df Sum Sq Mean Sq F value Pr(>F)
## treatment 4 0.0000252 6.312e-06 7.141 1.45e-05 ***
## whole.mean 1 0.0000048 4.760e-06 5.385 0.0208 *
## alive 1 0.0000039 3.874e-06 4.382 0.0369 *
## Residuals 410 0.0003624 8.840e-07
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tukey_rf <- HSD.test(aov.rf, trt = "treatment",
console = TRUE)
##
## Study: aov.rf ~ "treatment"
##
## HSD Test for relative_fat
##
## Mean Square Error: 8.839966e-07
##
## treatment, means
##
## relative_fat std r Min Max
## 1 0.002020513 0.0014526607 78 2e-04 0.0112
## 2 0.001611628 0.0005615983 86 4e-04 0.0036
## 3 0.001365217 0.0005751793 69 2e-04 0.0029
## 4 0.001668478 0.0007492017 92 4e-04 0.0057
## 5 0.002027174 0.0010904282 92 7e-04 0.0072
##
## Alpha: 0.05 ; DF Error: 410
## Critical Value of Studentized Range: 3.874884
##
## Groups according to probability of means differences and alpha level( 0.05 )
##
## Treatments with the same letter are not significantly different.
##
## relative_fat groups
## 5 0.002027174 a
## 1 0.002020513 a
## 4 0.001668478 ab
## 2 0.001611628 b
## 3 0.001365217 b
tidy_tukey.rf <- as.data.frame(tukey_rf$groups)
tidy_tukey.rf
## relative_fat groups
## 5 0.002027174 a
## 1 0.002020513 a
## 4 0.001668478 ab
## 2 0.001611628 b
## 3 0.001365217 b
tukey.test.rf <- TukeyHSD(aov.rf, which = 'treatment')
tukey.test.rf
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = relative_fat ~ treatment + whole.mean + alive, data = drf.pollen)
##
## $treatment
## diff lwr upr p adj
## 2-1 -4.088849e-04 -8.116895e-04 -6.080350e-06 0.0446627
## 3-1 -6.552954e-04 -1.081047e-03 -2.295443e-04 0.0002933
## 4-1 -3.520346e-04 -7.485428e-04 4.447369e-05 0.1087559
## 5-1 6.661093e-06 -3.898472e-04 4.031693e-04 0.9999990
## 3-2 -2.464105e-04 -6.627629e-04 1.699419e-04 0.4843849
## 4-2 5.685035e-05 -3.295485e-04 4.432492e-04 0.9944280
## 5-2 4.155460e-04 2.914714e-05 8.019449e-04 0.0279054
## 4-3 3.032609e-04 -1.070032e-04 7.135249e-04 0.2556767
## 5-3 6.619565e-04 2.516925e-04 1.072221e-03 0.0001225
## 5-4 3.586957e-04 -2.113505e-05 7.385264e-04 0.0745357
rf_box <- drf.pollen %>%
ggplot(aes(x = treatment, y = relative_fat, fill = treatment)) +
geom_boxplot(outlier.shape = NA) +
ggdist::geom_dots(side = "both", color = "black", alpha = 0.5) +
scale_fill_manual(values = c("gray0", "cadetblue2", "cadetblue3", "cadetblue4", "deepskyblue4"),name = "Pristine Level", labels = c("Treatment 1 (control)", "Treatment 2", "Treatment 3", "Treatment 4", "Treatment 5")) +
theme_cowplot()+
theme(legend.position = "none") +
labs(x = "Treatment",
y = "Drone Relative Fat (g)",
title = "Average Drone Relative Fat (g) by Treatment")
rf_max <- drf.pollen %>%
group_by(treatment) %>%
summarize(rf_max = max(relative_fat))
rf_max
## # A tibble: 5 × 2
## treatment rf_max
## <fct> <dbl>
## 1 1 0.0112
## 2 2 0.0036
## 3 3 0.0029
## 4 4 0.0057
## 5 5 0.0072
tidier_tukey.rf <- tidy_tukey.rf %>%
rownames_to_column() %>% # converts rownames to columns
rename(treatment = rowname) # renames the column now called rowname to species
# join
rf_for_plotting <- full_join(tidier_tukey.rf, rf_max,
by = "treatment")
rf_box +
stat_compare_means(method = "anova") +
geom_text(data = rf_for_plotting,
aes(x = treatment,
y = 0.01 + rf_max,
label = groups))
## Outliers?
You know what let’s look for outliers
boxplot.stats(drf.pollen$relative_fat)$out
## [1] 0.0002 0.0050 0.0031 0.0040 0.0034 0.0112 0.0071 0.0032 0.0038 0.0036
## [11] 0.0035 0.0002 0.0057 0.0032 0.0031 0.0041 0.0072 0.0056 0.0042 0.0032
## [21] 0.0060 0.0049 0.0046 0.0031 0.0041
out <- boxplot.stats(drf.pollen$relative_fat)$out
out_ind <- which(drf.pollen$relative_fat %in% c(out))
out_ind
## [1] 40 43 48 64 66 67 68 71 73 105 142 210 241 243 244 322 364 373 374
## [20] 378 379 385 388 395 399
temp.drfp<-drf.pollen[-c(40 , 42 , 49, 54, 63, 64, 67, 71, 78 ,108, 145, 210, 238 ,243 ,244 ,315 ,372 ,373, 378, 379 ,380 ,386 ,388 ,391 ,396), ]
hist(temp.drfp$relative_fat)
shapiro.test(temp.drfp$relative_fat) # okay well that's a lot of outliers but this looks way better
##
## Shapiro-Wilk normality test
##
## data: temp.drfp$relative_fat
## W = 0.81035, p-value < 2.2e-16
templmer <- lmer(relative_fat ~ treatment + whole.mean + alive + emerge_time + (1|round/colony), data = temp.drfp)
templmer2 <- lmer(relative_fat ~ treatment + alive + emerge_time + (1|round/colony), data = temp.drfp)
summary(templmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: relative_fat ~ treatment + whole.mean + alive + emerge_time +
## (1 | round/colony)
## Data: temp.drfp
##
## REML criterion at convergence: -4409.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.8725 -0.5637 -0.1253 0.3321 7.1989
##
## Random effects:
## Groups Name Variance Std.Dev.
## colony:round (Intercept) 2.396e-09 4.895e-05
## round (Intercept) 2.192e-07 4.682e-04
## Residual 5.460e-07 7.389e-04
## Number of obs: 392, groups: colony:round, 51; round, 2
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 2.142e-05 6.799e-04 1.530e+01 0.032 0.975275
## treatment2 -2.384e-04 1.236e-04 3.042e+01 -1.929 0.063122 .
## treatment3 -5.008e-04 1.317e-04 3.244e+01 -3.802 0.000599 ***
## treatment4 -2.909e-04 1.250e-04 2.546e+01 -2.327 0.028203 *
## treatment5 6.923e-05 1.252e-04 3.123e+01 0.553 0.584253
## whole.mean 5.762e-04 2.645e-04 3.801e+01 2.178 0.035650 *
## aliveTRUE 7.376e-04 3.800e-04 3.807e+02 1.941 0.052980 .
## emerge_time 1.359e-05 1.025e-05 5.923e+01 1.325 0.190287
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) trtmn2 trtmn3 trtmn4 trtmn5 whl.mn alTRUE
## treatment2 -0.154
## treatment3 -0.064 0.508
## treatment4 -0.167 0.547 0.514
## treatment5 -0.186 0.554 0.488 0.549
## whole.mean -0.315 0.036 -0.178 -0.084 0.097
## aliveTRUE -0.497 -0.023 0.052 -0.032 -0.063 -0.113
## emerge_time -0.652 0.087 -0.038 0.177 0.158 0.246 -0.027
Anova(templmer)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: relative_fat
## Chisq Df Pr(>Chisq)
## treatment 25.4878 4 4.013e-05 ***
## whole.mean 4.7452 1 0.02938 *
## alive 3.7680 1 0.05224 .
## emerge_time 1.7554 1 0.18520
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(templmer, templmer2) #without outliers it seems like pollen consumption is less important
## df AIC
## templmer 11 -4387.282
## templmer2 10 -4399.428
temp.emm <- emmeans(templmer2, "treatment")
summary(temp.emm)
## treatment emmean SE df lower.CL upper.CL
## 1 0.001218 0.000367 1.99 -0.000371 0.00281
## 2 0.000977 0.000365 1.94 -0.000642 0.00260
## 3 0.000776 0.000363 1.91 -0.000859 0.00241
## 4 0.000958 0.000369 2.01 -0.000618 0.00253
## 5 0.001270 0.000367 2.00 -0.000314 0.00285
##
## Results are averaged over the levels of: alive
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
pairs(temp.emm)
## contrast estimate SE df t.ratio p.value
## treatment1 - treatment2 2.41e-04 0.000133 33.4 1.817 0.3812
## treatment1 - treatment3 4.42e-04 0.000139 34.3 3.173 0.0247
## treatment1 - treatment4 2.59e-04 0.000134 28.2 1.931 0.3249
## treatment1 - treatment5 -5.18e-05 0.000133 32.6 -0.389 0.9949
## treatment2 - treatment3 2.01e-04 0.000133 34.1 1.510 0.5632
## treatment2 - treatment4 1.86e-05 0.000127 27.2 0.146 0.9999
## treatment2 - treatment5 -2.93e-04 0.000126 31.9 -2.330 0.1619
## treatment3 - treatment4 -1.82e-04 0.000136 29.0 -1.338 0.6704
## treatment3 - treatment5 -4.94e-04 0.000134 33.3 -3.677 0.0069
## treatment4 - treatment5 -3.11e-04 0.000126 25.2 -2.469 0.1298
##
## Results are averaged over the levels of: alive
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 5 estimates
aov.temp <- aov(relative_fat ~ treatment + whole.mean + alive , data = temp.drfp)
aov.temp2 <- aov(relative_fat ~ treatment + alive , data = temp.drfp)
summary(aov.temp)
## Df Sum Sq Mean Sq F value Pr(>F)
## treatment 4 1.441e-05 3.602e-06 6.173 8.01e-05 ***
## whole.mean 1 1.030e-06 1.031e-06 1.768 0.18448
## alive 1 4.100e-06 4.104e-06 7.034 0.00833 **
## Residuals 385 2.247e-04 5.840e-07
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(aov.temp, aov.temp2, templmer2) # aov2 looks better
## df AIC
## aov.temp 8 -4505.470
## aov.temp2 7 -4506.251
## templmer2 10 -4399.428
tukey_temp <- HSD.test(aov.temp, trt = "treatment",
console = TRUE)
##
## Study: aov.temp ~ "treatment"
##
## HSD Test for relative_fat
##
## Mean Square Error: 5.835009e-07
##
## treatment, means
##
## relative_fat std r Min Max
## 1 0.001901449 0.0009912103 69 4e-04 0.0071
## 2 0.001601190 0.0005628015 84 4e-04 0.0036
## 3 0.001382353 0.0005614324 68 3e-04 0.0029
## 4 0.001635227 0.0007317181 88 4e-04 0.0057
## 5 0.001908434 0.0009172074 83 7e-04 0.0072
##
## Alpha: 0.05 ; DF Error: 385
## Critical Value of Studentized Range: 3.876006
##
## Groups according to probability of means differences and alpha level( 0.05 )
##
## Treatments with the same letter are not significantly different.
##
## relative_fat groups
## 5 0.001908434 a
## 1 0.001901449 a
## 4 0.001635227 ab
## 2 0.001601190 ab
## 3 0.001382353 b
tidy_tukey.temp <- as.data.frame(tukey_temp$groups)
tidy_tukey.temp
## relative_fat groups
## 5 0.001908434 a
## 1 0.001901449 a
## 4 0.001635227 ab
## 2 0.001601190 ab
## 3 0.001382353 b
tukey.test.temp <- TukeyHSD(aov.temp, which = 'treatment')
tukey.test.temp
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = relative_fat ~ treatment + whole.mean + alive, data = temp.drfp)
##
## $treatment
## diff lwr upr p adj
## 2-1 -3.002588e-04 -6.404098e-04 3.989218e-05 0.1123329
## 3-1 -5.190963e-04 -8.768396e-04 -1.613531e-04 0.0007907
## 4-1 -2.662220e-04 -6.028685e-04 7.042453e-05 0.1943859
## 5-1 6.984460e-06 -3.340894e-04 3.480583e-04 0.9999977
## 3-2 -2.188375e-04 -5.603589e-04 1.226838e-04 0.4009093
## 4-2 3.403680e-05 -2.853180e-04 3.533916e-04 0.9984071
## 5-2 3.072433e-04 -1.677519e-05 6.312617e-04 0.0725406
## 4-3 2.528743e-04 -8.515680e-05 5.909055e-04 0.2442035
## 5-3 5.260808e-04 1.836403e-04 8.685213e-04 0.0003055
## 5-4 2.732065e-04 -4.713110e-05 5.935440e-04 0.1353745
range(temp.drfp$relative_fat)
## [1] 0.0003 0.0072
temp_box <- temp.drfp %>%
ggplot(aes(x = treatment, y = relative_fat, fill = treatment)) +
geom_boxplot(outlier.shape = NA) +
ggdist::geom_dots(side = "both", color = "black", alpha = 0.5) +
scale_fill_manual(values = c("gray0", "cadetblue2", "cadetblue3", "cadetblue4", "deepskyblue4"),name = "Pristine Level", labels = c("Treatment 1 (control)", "Treatment 2", "Treatment 3", "Treatment 4", "Treatment 5")) +
theme_cowplot()+
coord_cartesian(ylim=c(0, .0035)) +
theme(legend.position = "none") +
labs(x = "Treatment",
y = "Drone Relative Fat (g)",
title = "Average Drone Relative Fat (g) by Treatment")
temp_max <- temp.drfp %>%
group_by(treatment) %>%
summarize(temp_max = max(relative_fat))
temp_max
## # A tibble: 5 × 2
## treatment temp_max
## <fct> <dbl>
## 1 1 0.0071
## 2 2 0.0036
## 3 3 0.0029
## 4 4 0.0057
## 5 5 0.0072
tidier_tukey.temp <- tidy_tukey.temp %>%
rownames_to_column() %>% # converts rownames to columns
rename(treatment = rowname) # renames the column now called rowname to species
# join
temp_for_plotting <- full_join(tidier_tukey.temp, temp_max,
by = "treatment")
temp_box +
stat_compare_means(method = "anova", aes(y = 0.0005)) +
geom_text(data = temp_for_plotting,
aes(x = treatment,
y = .0003 + temp_max,
label = groups))
I don’t know this doesn’t look any more compact I don’t know that removing those data points really helped with distribution that much. I do think that treatment 3 clearly differs from treatments 1 and 5 though we just need to figure out the right test to show that.