Bees are frequently exposed to fungicides in agricultural landscapes, and while these chemicals are generally not considered to be harmful to insect pollinators, the sublethal effects of fungicides are not well understood. We investigated the non-target effects of exposure to field-realistic concentrations of the fungicide, Pristine®, for the common eastern bumble bee (Bombus impatiens). We fed queenless microcolonies of 5 bumble bees pollen patties contaminated with Pristine® ranging from 0 ppb-150,000 ppb, and evaluated the effects of chronic fungicide exposure on brood production, colony weight change, pollen consumption, and drone body quality. We found that while microcolonies fed pollen containing 1,500 ppb consumed the most pollen and produced the most brood, microcolonies exposed to 15,000 ppb gained weight at significantly lower rates. Fungicide exposure also negatively impacted brood development time and drone body quality. Our findings indicate that Pristine® has sublethal effects on bumble bee microcolony growth and brood development, highlighting the potential negative impacts of chronic fungicide exposure to bee health. In the face of the global decline of pollinator species, it is important for us to broaden our understanding of how bees react towards exposure of often overlooked stressors, such as fungicides, to account for potential consequences for pollinator health and pollination services.
### Figure out average pollen consumption by treatment
pollen <- read_csv("pollen1.csv", col_types = cols(round = col_factor(levels = c("1",
"2")), treatment = col_factor(levels = c("1",
"2", "3", "4", "5")), replicate = col_factor(levels = c("1",
"2", "3", "4", "5", "6", "7", "9", "11",
"12")), start_date = col_date(format = "%m/%d/%Y"),
start_time = col_character(), end_date = col_date(format = "%m/%d/%Y"),
end_time = col_character()))
pollen$colony <- as.factor(pollen$colony)
pollen$pid <- as.factor(pollen$pid)
pollen$count <- as.factor(pollen$count)
pollen <- subset(pollen, pollen$round != 1)
pollen <- na.omit(pollen)
range(pollen$difference)
## [1] -0.30646 1.56542
# get rid of negative numbers
pollen$difference[pollen$difference < 0] <- NA
pollen <- na.omit(pollen)
range(pollen$difference)
## [1] 0.002715 1.565420
# add queenright original colony column
qro <- read_csv("qro.csv")
qro$colony <- as.factor(qro$colony)
qro$qro <- as.factor(qro$qro)
pollen <- merge(pollen, qro, by.x = "colony")
pollen$pid <- as.factor(pollen$pid)
pollen$dose_consumed <- as.numeric(pollen$dose_consumed)
pollen$whole_dif <- as.numeric(pollen$difference)
wd.1 <- lm(difference ~ treatment, data = pollen)
summary(wd.1)
##
## Call:
## lm(formula = difference ~ treatment, data = pollen)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4853 -0.2416 -0.1504 0.1576 1.0638
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.429541 0.024170 17.772 <2e-16 ***
## treatment2 0.072099 0.034886 2.067 0.0390 *
## treatment3 0.078078 0.034406 2.269 0.0235 *
## treatment4 0.058490 0.034988 1.672 0.0949 .
## treatment5 0.004982 0.035040 0.142 0.8870
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3375 on 915 degrees of freedom
## Multiple R-squared: 0.009927, Adjusted R-squared: 0.005599
## F-statistic: 2.294 on 4 and 915 DF, p-value: 0.05773
wd.emm <- emmeans(wd.1, "treatment")
summary(wd.emm)
## treatment emmean SE df lower.CL upper.CL
## 1 0.430 0.0242 915 0.382 0.477
## 2 0.502 0.0252 915 0.452 0.551
## 3 0.508 0.0245 915 0.460 0.556
## 4 0.488 0.0253 915 0.438 0.538
## 5 0.435 0.0254 915 0.385 0.484
##
## Confidence level used: 0.95
wd.summary <- pollen %>%
group_by(colony) %>%
summarize(whole.mean = mean(difference),
mean.dose = mean(dose_consumed))
as.data.frame(wd.summary) # this is the data frame I will merge with subsequent data frames to contain average pollen consumption per colony as a new column
## colony whole.mean mean.dose
## 1 1.11R2 0.2829509 0.00000
## 2 1.12R2 0.1697964 0.00000
## 3 1.1R2 0.5213458 0.00000
## 4 1.2R2 0.3374200 0.00000
## 5 1.3R2 0.4512891 0.00000
## 6 1.4R2 0.6063016 0.00000
## 7 1.5R2 0.7079516 0.00000
## 8 1.7R2 0.7400381 0.00000
## 9 1.9R2 0.2240081 0.00000
## 10 2.11R2 0.4178270 35.99958
## 11 2.12R2 0.4035568 35.62453
## 12 2.1R2 0.6101589 55.13368
## 13 2.2R2 0.5109234 47.65174
## 14 2.3R2 0.3280036 24.29676
## 15 2.4R2 0.3881394 32.86886
## 16 2.5R2 0.7194915 73.77696
## 17 2.7R2 0.5299685 46.45851
## 18 2.9R2 0.5857152 52.49618
## 19 3.11R2 0.4216410 357.65779
## 20 3.12R2 0.3390993 281.21803
## 21 3.1R2 0.3711948 307.88606
## 22 3.2R2 0.3461010 310.28287
## 23 3.3R2 0.8465806 919.09823
## 24 3.4R2 0.4120433 406.20875
## 25 3.5R2 0.8906211 969.89690
## 26 3.7R2 0.6266680 720.47811
## 27 3.9R2 0.4409511 417.98849
## 28 4.11R2 0.3456011 3697.92253
## 29 4.12R2 0.2496295 2085.61694
## 30 4.1R2 0.7074755 8936.65823
## 31 4.2R2 0.3871742 4380.60484
## 32 4.3R2 0.5800074 6846.91060
## 33 4.4R2 0.8356247 10230.80238
## 34 4.5R2 0.2335967 1757.69297
## 35 4.7R2 0.6237400 7030.06345
## 36 4.9R2 0.5322950 5682.26011
## 37 5.11R2 0.4113960 43070.09972
## 38 5.12R2 0.2077741 15383.81515
## 39 5.1R2 0.3782286 37367.48316
## 40 5.2R2 0.4912468 52985.85612
## 41 5.3R2 0.2128778 16364.60071
## 42 5.4R2 0.4563045 48624.01684
## 43 5.5R2 0.7095479 82144.93157
## 44 5.7R2 0.6113189 67415.74103
## 45 5.9R2 0.4188290 42601.27230
mortality <- read_csv("surviving workers per colony.csv")
mortality$colony <- as.factor(mortality$colony)
brood <- read_csv("brood.csv", col_types = cols(round = col_factor(levels = c("1",
"2")), treatment = col_factor(levels = c("1",
"2", "3", "4", "5")), replicate = col_factor(levels = c("1",
"2", "3", "4", "5", "7", "9", "11", "12"))))
brood$colony <- as.factor(brood$colony)
brood <- subset(brood, brood$round != 1)
brood <- merge(wd.summary, brood, by.x = "colony")
brood <- merge(brood, mortality, by.x = "colony")
brood.sub <- subset(brood, select = c(colony, duration))
drones <- read_csv("drones_rf.csv", col_types = cols(round = col_factor(levels = c("1","2")), treatment = col_factor(levels = c("1","2", "3", "4", "5")), notes = col_skip(), colony_start = col_skip(), colony_count = col_skip(), alive = col_skip(), emerge_date = col_skip()))
#this data set is for drone emerge times and health metrics
drones$order_on_sheet <- as.factor(drones$order_on_sheet)
drones$replicate <- as.factor(drones$replicate)
drones$colony <- as.factor(drones$colony)
drones$id <- as.factor(drones$id)
drones$relative_fat <- as.double(drones$relative_fat)
drones$radial <- as.double(drones$radial)
drones$`alive?` <- as.double(drones$`alive?`)
drf.pollen <- merge(wd.summary, drones, by.x = "colony") # this is a new data frame with average pollen consumption data per colony
drf.pollen$alive <- as.logical(drf.pollen$`alive?`)
drone.sum <- drf.pollen %>%
group_by(colony) %>%
summarise(count.drone = length(id))
drone.sum
## # A tibble: 40 × 2
## colony count.drone
## <fct> <int>
## 1 1.11R2 4
## 2 1.1R2 6
## 3 1.2R2 12
## 4 1.3R2 11
## 5 1.4R2 16
## 6 1.5R2 21
## 7 1.7R2 12
## 8 2.11R2 9
## 9 2.12R2 5
## 10 2.1R2 11
## # … with 30 more rows
drone.sum <- as.data.frame(drone.sum)
qro <- read_csv("qro.csv")
qro$colony <- as.factor(qro$colony)
qro$qro <- as.factor(qro$qro)
drf.pollen <- merge(drf.pollen, qro, by.x = "colony")
drf.pollen<- merge(drf.pollen, brood.sub, by.x = "colony")
range(drf.pollen$dry_weight)
## [1] 0.0166 0.0826
drf.pollen.dry <- drf.pollen[-c(131, 160, 177), ] #remove outlier that was covered in honey and dead bees
range(drf.pollen$dry_weight)
## [1] 0.0166 0.0826
drf.pollen.dry <- subset(drf.pollen.dry, select = c(dry_weight, treatment, whole.mean, mean.dose, workers_alive, alive, duration, qro, colony))
drf.pollen.dry <- na.omit(drf.pollen.dry)
ggplot(drf.pollen.dry, aes(x=dry_weight, fill = treatment)) +
geom_histogram(position = "identity", binwidth = 0.0025,col=I("black")) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
ggtitle("Drone Dry Weight") +
labs(y = "Count", x = "Weight (g) ")
shapiro.test(drf.pollen.dry$dry_weight)
##
## Shapiro-Wilk normality test
##
## data: drf.pollen.dry$dry_weight
## W = 0.99416, p-value = 0.1154
dry.model <- lm(dry_weight~ treatment + whole.mean + mean.dose + workers_alive + duration + qro, data = drf.pollen)
ols_vif_tol(dry.model)
## Variables Tolerance VIF
## 1 treatment2 0.50260697 1.989626
## 2 treatment3 0.59719170 1.674504
## 3 treatment4 0.40461328 2.471496
## 4 treatment5 0.07461722 13.401732
## 5 whole.mean 0.50173020 1.993103
## 6 mean.dose 0.09232082 10.831793
## 7 workers_alive 0.49903846 2.003854
## 8 duration 0.69478708 1.439290
## 9 qroB3 0.78901120 1.267409
## 10 qroB4 0.55986224 1.786154
## 11 qroB5 0.50314078 1.987515
vif(dry.model)
## GVIF Df GVIF^(1/(2*Df))
## treatment 19.152334 4 1.446364
## whole.mean 1.993103 1 1.411773
## mean.dose 10.831793 1 3.291169
## workers_alive 2.003854 1 1.415575
## duration 1.439290 1 1.199704
## qro 3.084743 3 1.206526
dry.mod1 <- update(dry.model, .~. - mean.dose)
vif(dry.mod1)
## GVIF Df GVIF^(1/(2*Df))
## treatment 2.032114 4 1.092681
## whole.mean 1.691258 1 1.300484
## workers_alive 1.982161 1 1.407892
## duration 1.386150 1 1.177349
## qro 2.905325 3 1.194536
dry.mod2 <- update(dry.model, .~. - whole.mean)
vif(dry.mod2)
## GVIF Df GVIF^(1/(2*Df))
## treatment 13.450555 4 1.383861
## mean.dose 9.191372 1 3.031728
## workers_alive 1.985847 1 1.409201
## duration 1.288983 1 1.135334
## qro 2.494860 3 1.164594
dry.mod3 <- update(dry.model, .~. - treatment)
vif(dry.mod3)
## GVIF Df GVIF^(1/(2*Df))
## whole.mean 1.399743 1 1.183107
## mean.dose 1.149282 1 1.072046
## workers_alive 1.808411 1 1.344772
## duration 1.137360 1 1.066471
## qro 2.347073 3 1.152801
# This shows that treatment and mean.dose are highly correlated
pair.dry.df <- subset(drf.pollen, select = c(dry_weight, treatment, whole.mean, mean.dose, workers_alive, alive, duration, qro)) #make a data frame containing only the explanatory variables in the model
ggpairs(pair.dry.df)
dry.int <- lmer(dry_weight~ treatment*whole.mean + workers_alive + duration + qro + (1|colony), data = drf.pollen.dry)
dry.g1 <- lmer(dry_weight~ treatment + whole.mean + mean.dose + workers_alive + duration + qro + (1|colony), data = drf.pollen.dry)
dry.g2 <- lmer(dry_weight~ treatment + whole.mean + workers_alive + duration + qro + (1|colony), data = drf.pollen.dry)
#this is the model we are keeping
dry.g3 <- lmer(dry_weight~ mean.dose + whole.mean + workers_alive + duration + qro + (1|colony), data = drf.pollen.dry)
anova(dry.int, dry.g1, dry.g2) #AIC is lower for dry.g2 and Chisq value is not sig.
## Data: drf.pollen.dry
## Models:
## dry.g2: dry_weight ~ treatment + whole.mean + workers_alive + duration + qro + (1 | colony)
## dry.g1: dry_weight ~ treatment + whole.mean + mean.dose + workers_alive + duration + qro + (1 | colony)
## dry.int: dry_weight ~ treatment * whole.mean + workers_alive + duration + qro + (1 | colony)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## dry.g2 13 -2815.0 -2762.7 1420.5 -2841.0
## dry.g1 14 -2813.1 -2756.7 1420.5 -2841.1 0.1073 1 0.7433
## dry.int 17 -2809.0 -2740.6 1421.5 -2843.0 1.9746 3 0.5777
anova(dry.g1, dry.g3) #mean.dose is not useful
## Data: drf.pollen.dry
## Models:
## dry.g3: dry_weight ~ mean.dose + whole.mean + workers_alive + duration + qro + (1 | colony)
## dry.g1: dry_weight ~ treatment + whole.mean + mean.dose + workers_alive + duration + qro + (1 | colony)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## dry.g3 10 -2797.0 -2756.7 1408.5 -2817.0
## dry.g1 14 -2813.1 -2756.7 1420.5 -2841.1 24.091 4 7.657e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(dry.g2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: dry_weight ~ treatment + whole.mean + workers_alive + duration +
## qro + (1 | colony)
## Data: drf.pollen.dry
##
## REML criterion at convergence: -2704.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.98339 -0.63521 -0.04443 0.67352 3.10627
##
## Random effects:
## Groups Name Variance Std.Dev.
## colony (Intercept) 2.555e-06 0.001598
## Residual 6.036e-05 0.007769
## Number of obs: 413, groups: colony, 39
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 4.815e-02 4.270e-03 11.275
## treatment2 -2.710e-03 1.683e-03 -1.610
## treatment3 -7.079e-03 1.672e-03 -4.233
## treatment4 -6.735e-04 1.610e-03 -0.418
## treatment5 -1.115e-04 1.673e-03 -0.067
## whole.mean 3.993e-03 3.817e-03 1.046
## workers_alive -8.929e-04 6.538e-04 -1.366
## duration -1.095e-04 7.134e-05 -1.535
## qroB3 4.339e-04 1.723e-03 0.252
## qroB4 3.319e-03 1.782e-03 1.862
## qroB5 1.601e-03 1.467e-03 1.091
##
## Correlation of Fixed Effects:
## (Intr) trtmn2 trtmn3 trtmn4 trtmn5 whl.mn wrkrs_ duratn qroB3
## treatment2 0.058
## treatment3 0.094 0.487
## treatment4 0.167 0.520 0.534
## treatment5 0.121 0.565 0.499 0.548
## whole.mean -0.217 0.131 -0.083 -0.147 0.177
## workers_alv -0.709 -0.103 -0.183 -0.278 -0.316 -0.106
## duration -0.555 -0.390 -0.168 -0.169 -0.291 -0.278 0.129
## qroB3 0.103 0.197 0.062 0.092 0.251 0.049 -0.100 -0.252
## qroB4 -0.016 0.065 0.048 0.219 -0.025 -0.487 0.220 -0.011 0.166
## qroB5 -0.434 0.117 -0.042 -0.097 -0.046 0.035 0.583 -0.131 0.200
## qroB4
## treatment2
## treatment3
## treatment4
## treatment5
## whole.mean
## workers_alv
## duration
## qroB3
## qroB4
## qroB5 0.308
drop1(dry.g2, test = "Chisq")
## Single term deletions
##
## Model:
## dry_weight ~ treatment + whole.mean + workers_alive + duration +
## qro + (1 | colony)
## npar AIC LRT Pr(Chi)
## <none> -2815.0
## treatment 4 -2796.2 26.7206 2.264e-05 ***
## whole.mean 1 -2815.9 1.0772 0.29932
## workers_alive 1 -2813.7 3.2311 0.07225 .
## duration 1 -2813.8 3.1801 0.07454 .
## qro 3 -2814.8 6.1222 0.10581
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dry.g2.update <- update(dry.g2, .~. -whole.mean)
anova(dry.g2.update, dry.g2)
## Data: drf.pollen.dry
## Models:
## dry.g2.update: dry_weight ~ treatment + workers_alive + duration + qro + (1 | colony)
## dry.g2: dry_weight ~ treatment + whole.mean + workers_alive + duration + qro + (1 | colony)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## dry.g2.update 12 -2815.9 -2767.6 1419.9 -2839.9
## dry.g2 13 -2815.0 -2762.7 1420.5 -2841.0 1.0772 1 0.2993
drop1(dry.g2.update, test = "Chisq")
## Single term deletions
##
## Model:
## dry_weight ~ treatment + workers_alive + duration + qro + (1 |
## colony)
## npar AIC LRT Pr(Chi)
## <none> -2815.9
## treatment 4 -2797.8 26.0848 3.042e-05 ***
## workers_alive 1 -2815.1 2.7472 0.09743 .
## duration 1 -2815.5 2.4136 0.12028
## qro 3 -2811.1 10.7628 0.01308 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dry.g2.update2 <- update(dry.g2.update, .~. -duration)
anova(dry.g2.update2, dry.g2.update)
## Data: drf.pollen.dry
## Models:
## dry.g2.update2: dry_weight ~ treatment + workers_alive + qro + (1 | colony)
## dry.g2.update: dry_weight ~ treatment + workers_alive + duration + qro + (1 | colony)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## dry.g2.update2 11 -2815.5 -2771.2 1418.7 -2837.5
## dry.g2.update 12 -2815.9 -2767.6 1419.9 -2839.9 2.4136 1 0.1203
drop1(dry.g2.update2, test = "Chisq")
## Single term deletions
##
## Model:
## dry_weight ~ treatment + workers_alive + qro + (1 | colony)
## npar AIC LRT Pr(Chi)
## <none> -2815.5
## treatment 4 -2797.1 26.4120 2.613e-05 ***
## workers_alive 1 -2815.3 2.1761 0.14017
## qro 3 -2812.2 9.2947 0.02562 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dry.g2.update3 <- update(dry.g2.update2, .~. -workers_alive)
anova(dry.g2.update2, dry.g2.update3)
## Data: drf.pollen.dry
## Models:
## dry.g2.update3: dry_weight ~ treatment + qro + (1 | colony)
## dry.g2.update2: dry_weight ~ treatment + workers_alive + qro + (1 | colony)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## dry.g2.update3 10 -2815.3 -2775.1 1417.6 -2835.3
## dry.g2.update2 11 -2815.5 -2771.2 1418.7 -2837.5 2.1761 1 0.1402
drop1(dry.g2.update3, test = "Chisq")
## Single term deletions
##
## Model:
## dry_weight ~ treatment + qro + (1 | colony)
## npar AIC LRT Pr(Chi)
## <none> -2815.3
## treatment 4 -2798.4 24.892 5.288e-05 ***
## qro 3 -2808.7 12.605 0.005575 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#New model we are keeping for dry weight
dry.g2.update3
## Linear mixed model fit by REML ['lmerMod']
## Formula: dry_weight ~ treatment + qro + (1 | colony)
## Data: drf.pollen.dry
## REML criterion at convergence: -2740.253
## Random effects:
## Groups Name Std.Dev.
## colony (Intercept) 0.001607
## Residual 0.007778
## Number of obs: 413, groups: colony, 39
## Fixed Effects:
## (Intercept) treatment2 treatment3 treatment4 treatment5 qroB3
## 0.0423110 -0.0038391 -0.0076907 -0.0013794 -0.0014854 -0.0003439
## qroB4 qroB5
## 0.0042521 0.0023159
summary(dry.g2.update3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: dry_weight ~ treatment + qro + (1 | colony)
## Data: drf.pollen.dry
##
## REML criterion at convergence: -2740.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.05397 -0.62226 -0.00129 0.67859 3.13654
##
## Random effects:
## Groups Name Variance Std.Dev.
## colony (Intercept) 2.582e-06 0.001607
## Residual 6.050e-05 0.007778
## Number of obs: 413, groups: colony, 39
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.0423110 0.0012739 33.214
## treatment2 -0.0038391 0.0015507 -2.476
## treatment3 -0.0076907 0.0016112 -4.773
## treatment4 -0.0013794 0.0014933 -0.924
## treatment5 -0.0014854 0.0015285 -0.972
## qroB3 -0.0003439 0.0016672 -0.206
## qroB4 0.0042521 0.0015001 2.835
## qroB5 0.0023159 0.0011541 2.007
##
## Correlation of Fixed Effects:
## (Intr) trtmn2 trtmn3 trtmn4 trtmn5 qroB3 qroB4
## treatment2 -0.675
## treatment3 -0.586 0.471
## treatment4 -0.688 0.523 0.485
## treatment5 -0.689 0.519 0.476 0.527
## qroB3 -0.323 0.108 0.005 0.029 0.182
## qroB4 -0.377 0.107 0.012 0.212 0.096 0.204
## qroB5 -0.428 0.135 0.054 0.063 0.114 0.277 0.292
plot(resid(dry.g2.update3)) +
abline(h=0, col="red", lwd=2)
## integer(0)
qqnorm(resid(dry.g2.update3));qqline(resid(dry.g2.update3))
Anova(dry.g2.update3)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: dry_weight
## Chisq Df Pr(>Chisq)
## treatment 27.733 4 1.412e-05 ***
## qro 10.802 3 0.01284 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dry.emm.update <- emmeans(dry.g2.update3, "treatment")
pairs(dry.emm.update)
## contrast estimate SE df t.ratio p.value
## treatment1 - treatment2 0.003839 0.00156 26.2 2.459 0.1312
## treatment1 - treatment3 0.007691 0.00163 26.2 4.733 0.0006
## treatment1 - treatment4 0.001379 0.00150 21.0 0.917 0.8872
## treatment1 - treatment5 0.001485 0.00154 23.8 0.967 0.8673
## treatment2 - treatment3 0.003852 0.00164 29.3 2.346 0.1592
## treatment2 - treatment4 -0.002460 0.00150 23.4 -1.640 0.4881
## treatment2 - treatment5 -0.002354 0.00152 26.3 -1.552 0.5399
## treatment3 - treatment4 -0.006311 0.00159 24.0 -3.957 0.0048
## treatment3 - treatment5 -0.006205 0.00162 26.5 -3.823 0.0060
## treatment4 - treatment5 0.000106 0.00148 21.2 0.072 1.0000
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 5 estimates
plot(dry.emm.update, comparisons = TRUE)
dry.sum <- drf.pollen %>%
group_by(treatment) %>%
summarise(dry.m = mean(dry_weight),
dry.sd = sd(dry_weight),
dryn = length(dry_weight)) %>%
mutate(sedry = dry.sd/sqrt(dryn))
dry.sum
## # A tibble: 5 × 5
## treatment dry.m dry.sd dryn sedry
## <fct> <dbl> <dbl> <int> <dbl>
## 1 1 0.0444 0.00816 82 0.000901
## 2 2 0.0395 0.00834 76 0.000957
## 3 3 0.0370 0.0104 66 0.00129
## 4 4 0.0416 0.00831 107 0.000804
## 5 5 0.0416 0.00731 90 0.000771
dryanova_summary <- summary(dry.g2.update3)
drytuk.means <- emmeans(object = dry.g2.update3,
specs = "treatment",
adjust = "Tukey",
type = "response")
drytuk.means
## treatment emmean SE df lower.CL upper.CL
## 1 0.0439 0.00110 22.3 0.0408 0.0470
## 2 0.0400 0.00115 29.3 0.0369 0.0432
## 3 0.0362 0.00122 29.3 0.0328 0.0395
## 4 0.0425 0.00108 17.5 0.0394 0.0456
## 5 0.0424 0.00114 23.9 0.0392 0.0456
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 5 estimates
dry.cld.model <- cld(object = drytuk.means,
adjust = "Tukey",
Letters = letters,
alpha = 0.05)
dry.cld.model
## treatment emmean SE df lower.CL upper.CL .group
## 3 0.0362 0.00122 29.3 0.0328 0.0395 a
## 2 0.0400 0.00115 29.3 0.0369 0.0432 ab
## 5 0.0424 0.00114 23.9 0.0392 0.0456 b
## 4 0.0425 0.00108 17.5 0.0394 0.0456 b
## 1 0.0439 0.00110 22.3 0.0408 0.0470 b
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 5 estimates
## P value adjustment: tukey method for comparing a family of 5 estimates
## significance level used: alpha = 0.05
## NOTE: If two or more means share the same grouping symbol,
## then we cannot show them to be different.
## But we also did not show them to be the same.
drytuk.treatment <- as.data.frame(dry.cld.model)
drytuk.treatment
## treatment emmean SE df lower.CL upper.CL .group
## 3 3 0.03617637 0.001219109 29.33451 0.03282901 0.03952374 a
## 2 2 0.04002794 0.001154746 29.33317 0.03685730 0.04319859 ab
## 5 5 0.04238163 0.001141717 23.87657 0.03919697 0.04556630 b
## 4 4 0.04248765 0.001075162 17.51284 0.03939309 0.04558220 b
## 1 1 0.04386704 0.001099481 22.29312 0.04078151 0.04695256 b
dry_max <- drf.pollen %>%
group_by(treatment) %>%
summarize(maxdry = max(mean(dry_weight)))
dry_for_plotting <- full_join(drytuk.treatment, dry_max,
by="treatment")
dp <- ggplot(data = dry.sum, aes(x = treatment, y = dry.m, fill = treatment)) +
geom_col(col = "black")+
coord_cartesian(ylim=c(0.03, 0.046)) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
geom_errorbar(aes(ymin = dry.m - sedry,
ymax = dry.m + sedry),
position = position_dodge(0.9), width = 0.4) +
labs(y = "Mean Drone Dry Weight (g)",) +
ggtitle("Average Drone Dry Weight (g) per Treatment") +
scale_x_discrete(name = "Treatment",
labels = c("0 PPB", "150 PPB", "1,500 PPB", "15,000 PPB", "150,000 PPB")) +
theme_cowplot()+
theme_classic(base_size = 30)
dp <- dp +
annotate(geom = "text",
x = 2.5, y = 0.045,
label = "P < 0.001",
size = 8) +
annotate(geom = "text",
x = c(3, 2, 4, 5, 1),
y = dry_for_plotting$maxdry + 0.002,
label = c("a", "ab", "b", "b", "b"),
size = 8) +
theme(legend.position = "none")
dp
A <- Anova(dry.g2.update3)
A <- as.data.frame(A)
A <- setDT(A)
A
## Chisq Df Pr(>Chisq)
## 1: 27.73338 4 1.412494e-05
## 2: 10.80220 3 1.284498e-02
dry_emm <- emmeans(dry.g2.update3, pairwise ~ treatment)
dry_contrasts <- as.data.frame(dry_emm$contrasts)
dry_means <- as.data.frame(dry_emm$emmeans)
dry_means <- setDT(dry_means)
dry_means
## treatment emmean SE df lower.CL upper.CL
## 1: 1 0.04386704 0.001099481 22.29312 0.04158859 0.04614548
## 2: 2 0.04002794 0.001154746 29.33317 0.03766739 0.04238850
## 3: 3 0.03617637 0.001219109 29.33451 0.03368425 0.03866850
## 4: 4 0.04248765 0.001075162 17.51284 0.04022430 0.04475099
## 5: 5 0.04238163 0.001141717 23.87657 0.04002460 0.04473867
dry_contrasts <- setDT(dry_contrasts)
dry_contrasts
## contrast estimate SE df t.ratio
## 1: treatment1 - treatment2 0.0038390929 0.001560937 26.16772 2.45947931
## 2: treatment1 - treatment3 0.0076906630 0.001625012 26.24382 4.73268043
## 3: treatment1 - treatment4 0.0013793901 0.001504353 20.99027 0.91693256
## 4: treatment1 - treatment5 0.0014854044 0.001536801 23.74828 0.96655602
## 5: treatment2 - treatment3 0.0038515701 0.001641710 29.32493 2.34607193
## 6: treatment2 - treatment4 -0.0024597029 0.001499795 23.37671 -1.64002620
## 7: treatment2 - treatment5 -0.0023536885 0.001516999 26.30894 -1.55154282
## 8: treatment3 - treatment4 -0.0063112729 0.001594873 24.03561 -3.95722601
## 9: treatment3 - treatment5 -0.0062052586 0.001623293 26.49833 -3.82263699
## 10: treatment4 - treatment5 0.0001060143 0.001480534 21.16046 0.07160545
## p.value
## 1: 0.1312276191
## 2: 0.0005895033
## 3: 0.8871812677
## 4: 0.8673230324
## 5: 0.1592058361
## 6: 0.4881304744
## 7: 0.5398742384
## 8: 0.0048269361
## 9: 0.0059675633
## 10: 0.9999934994
dry.sum <- setDT(dry.sum)
dry.sum
## treatment dry.m dry.sd dryn sedry
## 1: 1 0.04442317 0.008157861 82 0.0009008850
## 2: 2 0.03948684 0.008341372 76 0.0009568210
## 3: 3 0.03697606 0.010442439 66 0.0012853754
## 4: 4 0.04158318 0.008314269 107 0.0008037707
## 5: 5 0.04163189 0.007314701 90 0.0007710372
drf.pollen.rf <- subset(drf.pollen, select = c(relative_fat, treatment, whole.mean, workers_alive, alive, duration, qro, colony))
drf.pollen.rf <- na.omit(drf.pollen.rf)
shapiro.test(drf.pollen.rf$relative_fat)
##
## Shapiro-Wilk normality test
##
## data: drf.pollen.rf$relative_fat
## W = 0.8011, p-value < 2.2e-16
range(drf.pollen.rf$relative_fat)
## [1] 0.0002 0.0072
ggplot(drf.pollen.rf, aes(x=relative_fat, fill = treatment)) +
geom_histogram(position = "identity", binwidth = 0.0002,col=I("black")) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
ggtitle("Drone Relative Fat") +
labs(y = "Count", x = "Relative Fat(g) ")
drf.pollen.rf$logrf <- log(drf.pollen.rf$relative_fat)
shapiro.test(drf.pollen.rf$logrf)
##
## Shapiro-Wilk normality test
##
## data: drf.pollen.rf$logrf
## W = 0.93266, p-value = 4.097e-12
range(drf.pollen.rf$logrf)
## [1] -8.517193 -4.933674
ggplot(drf.pollen.rf, aes(x=logrf, fill = treatment)) +
geom_histogram(position = "identity", binwidth = 0.1,col=I("black")) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
ggtitle("Drone Relative Fat (log transformed)") +
labs(y = "Count", x = "log(Relative Fat(g)) ")
rf.model <- lm(relative_fat~ treatment + whole.mean + workers_alive + duration + qro, data = drf.pollen.rf)
vif(rf.model)
## GVIF Df GVIF^(1/(2*Df))
## treatment 1.964734 4 1.088085
## whole.mean 1.769399 1 1.330188
## workers_alive 2.024742 1 1.422934
## duration 1.394242 1 1.180780
## qro 3.217236 3 1.215012
rf.int <- lmer(relative_fat~ treatment*whole.mean + workers_alive + duration + qro + (1|colony), data = drf.pollen.rf)
#this is the model we will keep
rf.g1 <- lmer(relative_fat~ treatment + whole.mean + workers_alive + duration + qro + (1|colony), data = drf.pollen.rf)
rf.g3 <- glm(logrf ~ whole.mean, data = drf.pollen.rf)
qqnorm(resid(rf.g3));qqline(resid(rf.g3))
rf.int.2 <- lmer(logrf~ treatment*whole.mean + workers_alive + duration + qro + (1|colony), data = drf.pollen.rf)
rf.g2 <- lmer(logrf~ treatment + whole.mean + workers_alive + duration + qro + (1|colony), data = drf.pollen.rf)
anova(rf.int, rf.g1)#rf.int marginally better
## Data: drf.pollen.rf
## Models:
## rf.g1: relative_fat ~ treatment + whole.mean + workers_alive + duration + qro + (1 | colony)
## rf.int: relative_fat ~ treatment * whole.mean + workers_alive + duration + qro + (1 | colony)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## rf.g1 13 -4326.8 -4275.5 2176.4 -4352.8
## rf.int 17 -4327.9 -4260.8 2180.9 -4361.9 9.0484 4 0.0599 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(rf.int.2, rf.g2) #rf.g2 better
## Data: drf.pollen.rf
## Models:
## rf.g2: logrf ~ treatment + whole.mean + workers_alive + duration + qro + (1 | colony)
## rf.int.2: logrf ~ treatment * whole.mean + workers_alive + duration + qro + (1 | colony)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## rf.g2 13 452.60 503.89 -213.30 426.60
## rf.int.2 17 456.85 523.92 -211.42 422.85 3.7568 4 0.4399
AIC(rf.int, rf.g1)
## df AIC
## rf.int 17 -4092.649
## rf.g1 13 -4141.824
AIC(rf.int.2, rf.g2)
## df AIC
## rf.int.2 17 501.9528
## rf.g2 13 498.7718
drop1(rf.int, test = "Chisq") # keep all
## Single term deletions
##
## Model:
## relative_fat ~ treatment * whole.mean + workers_alive + duration +
## qro + (1 | colony)
## npar AIC LRT Pr(Chi)
## <none> -4327.9
## workers_alive 1 -4329.6 0.2639 0.60744
## duration 1 -4326.4 3.4647 0.06269 .
## qro 3 -4326.7 7.1391 0.06759 .
## treatment:whole.mean 4 -4326.8 9.0484 0.05990 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emtrends(rf.int, pairwise ~ treatment, var = "whole.mean")
## $emtrends
## treatment whole.mean.trend SE df lower.CL upper.CL
## 1 0.000793 0.000765 20.9 -0.000798 0.00238
## 2 -0.000761 0.000982 25.5 -0.002781 0.00126
## 3 0.000174 0.000678 27.2 -0.001215 0.00156
## 4 0.000694 0.000666 15.2 -0.000724 0.00211
## 5 0.002362 0.000892 14.9 0.000460 0.00426
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## treatment1 - treatment2 1.55e-03 0.001141 22.1 1.361 0.6574
## treatment1 - treatment3 6.19e-04 0.000895 19.3 0.691 0.9561
## treatment1 - treatment4 9.87e-05 0.001056 18.7 0.093 1.0000
## treatment1 - treatment5 -1.57e-03 0.001098 15.3 -1.429 0.6195
## treatment2 - treatment3 -9.35e-04 0.001131 25.1 -0.827 0.9198
## treatment2 - treatment4 -1.46e-03 0.001196 21.5 -1.217 0.7420
## treatment2 - treatment5 -3.12e-03 0.001218 19.7 -2.564 0.1164
## treatment3 - treatment4 -5.20e-04 0.000979 21.1 -0.531 0.9831
## treatment3 - treatment5 -2.19e-03 0.001035 17.4 -2.113 0.2584
## treatment4 - treatment5 -1.67e-03 0.001104 15.5 -1.511 0.5711
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 5 estimates
plot(emtrends(rf.int, pairwise ~ treatment, var = "whole.mean"))
emmip(rf.int, treatment ~ whole.mean, cov.reduce = range)
emmeans(rf.int, pairwise ~ treatment | whole.mean)
## $emmeans
## whole.mean = 0.57:
## treatment emmean SE df lower.CL upper.CL
## 1 0.00190 0.000110 15.8 0.00166 0.00213
## 2 0.00169 0.000120 18.6 0.00144 0.00195
## 3 0.00139 0.000124 30.9 0.00114 0.00164
## 4 0.00173 0.000110 13.8 0.00149 0.00196
## 5 0.00224 0.000118 12.1 0.00199 0.00250
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $contrasts
## whole.mean = 0.57:
## contrast estimate SE df t.ratio p.value
## treatment1 - treatment2 2.02e-04 0.000163 17.7 1.239 0.7294
## treatment1 - treatment3 5.07e-04 0.000164 20.4 3.098 0.0397
## treatment1 - treatment4 1.71e-04 0.000156 17.0 1.096 0.8060
## treatment1 - treatment5 -3.47e-04 0.000164 13.9 -2.118 0.2664
## treatment2 - treatment3 3.05e-04 0.000171 26.8 1.788 0.4008
## treatment2 - treatment4 -3.03e-05 0.000158 15.7 -0.192 0.9997
## treatment2 - treatment5 -5.49e-04 0.000160 14.4 -3.439 0.0269
## treatment3 - treatment4 -3.35e-04 0.000156 20.4 -2.150 0.2379
## treatment3 - treatment5 -8.54e-04 0.000163 17.8 -5.226 0.0005
## treatment4 - treatment5 -5.18e-04 0.000154 11.8 -3.358 0.0381
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 5 estimates
Let’s try the log transformed data instead
Drop1
drop1(rf.g2, test = "Chisq")
## Single term deletions
##
## Model:
## logrf ~ treatment + whole.mean + workers_alive + duration + qro +
## (1 | colony)
## npar AIC LRT Pr(Chi)
## <none> 452.60
## treatment 4 468.05 23.4461 0.0001031 ***
## whole.mean 1 452.15 1.5506 0.2130479
## workers_alive 1 450.63 0.0254 0.8734816
## duration 1 458.28 7.6762 0.0055954 **
## qro 3 451.79 5.1909 0.1583432
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rf.update1 <- update(rf.g2, .~. -workers_alive)
anova(rf.g2, rf.update1)
## Data: drf.pollen.rf
## Models:
## rf.update1: logrf ~ treatment + whole.mean + duration + qro + (1 | colony)
## rf.g2: logrf ~ treatment + whole.mean + workers_alive + duration + qro + (1 | colony)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## rf.update1 12 450.63 497.97 -213.31 426.63
## rf.g2 13 452.60 503.89 -213.30 426.60 0.0254 1 0.8735
drop1(rf.update1, test = "Chisq")
## Single term deletions
##
## Model:
## logrf ~ treatment + whole.mean + duration + qro + (1 | colony)
## npar AIC LRT Pr(Chi)
## <none> 450.63
## treatment 4 466.09 23.4639 0.0001023 ***
## whole.mean 1 450.16 1.5285 0.2163435
## duration 1 456.35 7.7158 0.0054739 **
## qro 3 451.01 6.3827 0.0944052 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rf.update2 <- update(rf.update1, .~. -whole.mean)
anova(rf.update2, rf.update1)
## Data: drf.pollen.rf
## Models:
## rf.update2: logrf ~ treatment + duration + qro + (1 | colony)
## rf.update1: logrf ~ treatment + whole.mean + duration + qro + (1 | colony)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## rf.update2 11 450.16 493.56 -214.08 428.16
## rf.update1 12 450.63 497.97 -213.31 426.63 1.5285 1 0.2163
drop1(rf.update2, test = "Chisq")
## Single term deletions
##
## Model:
## logrf ~ treatment + duration + qro + (1 | colony)
## npar AIC LRT Pr(Chi)
## <none> 450.16
## treatment 4 464.13 21.9708 0.0002031 ***
## duration 1 454.62 6.4629 0.0110151 *
## qro 3 454.78 10.6269 0.0139246 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pairs(emmeans(rf.update2, "treatment"))
## contrast estimate SE df t.ratio p.value
## treatment1 - treatment2 0.0257 0.0909 26.9 0.283 0.9985
## treatment1 - treatment3 0.2772 0.0904 24.9 3.068 0.0376
## treatment1 - treatment4 0.0496 0.0852 22.3 0.582 0.9763
## treatment1 - treatment5 -0.1308 0.0862 23.3 -1.517 0.5624
## treatment2 - treatment3 0.2515 0.0899 29.0 2.797 0.0636
## treatment2 - treatment4 0.0239 0.0827 21.7 0.289 0.9983
## treatment2 - treatment5 -0.1564 0.0819 24.0 -1.910 0.3393
## treatment3 - treatment4 -0.2276 0.0878 23.9 -2.594 0.1036
## treatment3 - treatment5 -0.4080 0.0876 25.0 -4.657 0.0008
## treatment4 - treatment5 -0.1804 0.0805 19.5 -2.242 0.2063
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 5 estimates
plot(resid(rf.int)) +
abline(h=0, col="red", lwd=2)
## integer(0)
qqnorm(resid(rf.int));qqline(resid(rf.int)) #doesn't look great
plot(rf.int)
Updated model - check model fit
qqnorm(resid(rf.update2));qqline(resid(rf.update2))
plot(drf.pollen.rf$treatment, drf.pollen.rf$relative_fat)
plot(drf.pollen.rf$relative_fat)
rf_sub <- subset(drf.pollen.rf, relative_fat<0.004) #Difference of 13 bees
ggplot(rf_sub, aes(x=relative_fat, fill = treatment)) +
geom_histogram(position = "identity", binwidth = 0.0002,col=I("black")) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
ggtitle("Drone Relative Fat - Manual subset data") +
labs(y = "Count", x = "Relative Fat(g) ")
shapiro.test(rf_sub$relative_fat)
##
## Shapiro-Wilk normality test
##
## data: rf_sub$relative_fat
## W = 0.97686, p-value = 1.232e-05
boxplot.stats(drf.pollen.rf$relative_fat)$out #don't care for this
## [1] 0.0002 0.0050 0.0040 0.0042 0.0071 0.0034 0.0032 0.0038 0.0036 0.0035
## [11] 0.0003 0.0002 0.0057 0.0032 0.0041 0.0072 0.0060 0.0042 0.0032 0.0049
## [21] 0.0056 0.0046 0.0041
lower_bound <- quantile(drf.pollen.rf$relative_fat, 0.025)
lower_bound
## 2.5%
## 5e-04
upper_bound <- quantile(drf.pollen.rf$relative_fat, 0.975)
upper_bound #this matches the subset data set I made getting rid of all values greater than 0.004
## 97.5%
## 0.0041475
outlier_ind <- which(drf.pollen.rf$relative_fat < lower_bound | drf.pollen.rf$relative_fat > upper_bound)
outlier_ind #difference of 16 bees
## [1] 37 38 47 48 122 164 181 189 223 264 332 339 348 350 351 352
drf.pollen.rf[outlier_ind,]
## relative_fat treatment whole.mean workers_alive alive duration qro colony
## 38 0.0002 1 0.6063016 1 TRUE 44 B5 1.4R2
## 39 0.0050 1 0.6063016 1 TRUE 44 B5 1.4R2
## 51 0.0042 1 0.7079516 4 TRUE 37 B4 1.5R2
## 52 0.0071 1 0.7079516 4 TRUE 37 B4 1.5R2
## 131 0.0004 2 0.3280036 5 FALSE 41 B3 2.3R2
## 174 0.0003 3 0.8465806 5 TRUE 62 B3 3.3R2
## 194 0.0004 3 0.8906211 4 TRUE 58 B4 3.5R2
## 202 0.0002 3 0.8906211 4 TRUE 58 B4 3.5R2
## 241 0.0057 4 0.7074755 4 TRUE 41 B1 4.1R2
## 292 0.0004 4 0.8356247 5 TRUE 46 B5 4.4R2
## 369 0.0072 5 0.4563045 5 TRUE 51 B5 5.4R2
## 376 0.0060 5 0.7095479 5 TRUE 43 B4 5.5R2
## 386 0.0042 5 0.7095479 5 TRUE 43 B4 5.5R2
## 388 0.0049 5 0.7095479 5 TRUE 43 B4 5.5R2
## 389 0.0056 5 0.7095479 5 TRUE 43 B4 5.5R2
## 390 0.0046 5 0.6113189 5 TRUE 41 B1 5.7R2
## logrf
## 38 -8.517193
## 39 -5.298317
## 51 -5.472671
## 52 -4.947660
## 131 -7.824046
## 174 -8.111728
## 194 -7.824046
## 202 -8.517193
## 241 -5.167289
## 292 -7.824046
## 369 -4.933674
## 376 -5.115996
## 386 -5.472671
## 388 -5.318520
## 389 -5.184989
## 390 -5.381699
plot(outlier_ind)
df2 <- drf.pollen.rf[-c(37, 38,47 , 48, 122, 164, 181 ,189, 223 ,264, 332, 339, 348 ,350 ,351, 352),]
ggplot(df2, aes(x=relative_fat, fill = treatment)) +
geom_histogram(position = "identity", binwidth = 0.0002,col=I("black")) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
ggtitle("Drone Relative Fat - Outliers removed") +
labs(y = "Count", x = "Relative Fat(g) ")
shapiro.test(df2$relative_fat) # This is worse than just cutting off the tail
##
## Shapiro-Wilk normality test
##
## data: df2$relative_fat
## W = 0.94807, p-value = 4.83e-10
rf.int.sub <- lmer(relative_fat~ treatment*whole.mean + workers_alive + duration + qro + (1|colony), data = rf_sub)
rf.g1.sub <- lmer(relative_fat~ treatment + whole.mean + workers_alive + duration + qro + (1|colony), data = rf_sub) #keep this one
rf.df2.model <- lmer(relative_fat~ treatment*whole.mean + workers_alive + duration + qro + (1|colony), data = df2)
anova(rf.int.sub, rf.g1.sub) #now the model without the interaction is better.
## Data: rf_sub
## Models:
## rf.g1.sub: relative_fat ~ treatment + whole.mean + workers_alive + duration + qro + (1 | colony)
## rf.int.sub: relative_fat ~ treatment * whole.mean + workers_alive + duration + qro + (1 | colony)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## rf.g1.sub 13 -4462.6 -4411.7 2244.3 -4488.6
## rf.int.sub 17 -4456.0 -4389.6 2245.0 -4490.0 1.4676 4 0.8324
AIC(rf.int.sub, rf.g1.sub)
## df AIC
## rf.int.sub 17 -4216.352
## rf.g1.sub 13 -4272.790
plot(resid(rf.int.sub)) +
abline(h=0, col="red", lwd=2)
## integer(0)
qqnorm(resid(rf.int.sub));qqline(resid(rf.int.sub)) #looks much better
qqnorm(resid(rf.df2.model));qqline(resid(rf.df2.model)) #looks marginally better but not good enough to justify cutting the data
plot(resid(rf.g1.sub)) +
abline(h=0, col="red", lwd=2)
## integer(0)
qqnorm(resid(rf.g1.sub));qqline(resid(rf.g1.sub))
emtrends(rf.int.sub, pairwise ~ treatment, var = "whole.mean")
## $emtrends
## treatment whole.mean.trend SE df lower.CL upper.CL
## 1 0.000608 0.000698 22.7 -0.000836 0.00205
## 2 -0.000138 0.000886 22.1 -0.001975 0.00170
## 3 0.000759 0.000603 24.9 -0.000484 0.00200
## 4 0.000592 0.000626 21.0 -0.000710 0.00189
## 5 0.000825 0.000865 21.9 -0.000970 0.00262
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## treatment1 - treatment2 7.45e-04 0.001042 21.6 0.715 0.9507
## treatment1 - treatment3 -1.51e-04 0.000828 20.9 -0.182 0.9997
## treatment1 - treatment4 1.61e-05 0.000975 22.1 0.017 1.0000
## treatment1 - treatment5 -2.17e-04 0.001059 21.7 -0.205 0.9996
## treatment2 - treatment3 -8.96e-04 0.001022 21.8 -0.877 0.9024
## treatment2 - treatment4 -7.29e-04 0.001094 21.6 -0.666 0.9615
## treatment2 - treatment5 -9.62e-04 0.001146 21.9 -0.840 0.9152
## treatment3 - treatment4 1.67e-04 0.000892 23.6 0.187 0.9997
## treatment3 - treatment5 -6.60e-05 0.000989 22.6 -0.067 1.0000
## treatment4 - treatment5 -2.33e-04 0.001054 21.9 -0.221 0.9994
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 5 estimates
plot(emtrends(rf.int.sub, pairwise ~ treatment, var = "whole.mean"))
emmip(rf.int.sub, treatment ~ whole.mean, cov.reduce = range)
drop1(rf.g1.sub, test = "Chisq")
## Single term deletions
##
## Model:
## relative_fat ~ treatment + whole.mean + workers_alive + duration +
## qro + (1 | colony)
## npar AIC LRT Pr(Chi)
## <none> -4462.6
## treatment 4 -4454.3 16.2968 0.002646 **
## whole.mean 1 -4460.8 3.7924 0.051487 .
## workers_alive 1 -4464.6 0.0171 0.895963
## duration 1 -4456.7 7.8442 0.005098 **
## qro 3 -4465.7 2.8542 0.414653
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
submod.2 <- update(rf.g1.sub, .~. -workers_alive)
anova(rf.g1.sub, submod.2)
## Data: rf_sub
## Models:
## submod.2: relative_fat ~ treatment + whole.mean + duration + qro + (1 | colony)
## rf.g1.sub: relative_fat ~ treatment + whole.mean + workers_alive + duration + qro + (1 | colony)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## submod.2 12 -4464.6 -4417.6 2244.3 -4488.6
## rf.g1.sub 13 -4462.6 -4411.7 2244.3 -4488.6 0.0171 1 0.896
drop1(submod.2, test = "Chisq")
## Single term deletions
##
## Model:
## relative_fat ~ treatment + whole.mean + duration + qro + (1 |
## colony)
## npar AIC LRT Pr(Chi)
## <none> -4464.6
## treatment 4 -4456.3 16.2809 0.002664 **
## whole.mean 1 -4462.8 3.7769 0.051963 .
## duration 1 -4458.7 7.8966 0.004953 **
## qro 3 -4466.3 4.2383 0.236856
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
submod.3 <- update(submod.2, .~. -qro)
anova(submod.2, submod.3)
## Data: rf_sub
## Models:
## submod.3: relative_fat ~ treatment + whole.mean + duration + (1 | colony)
## submod.2: relative_fat ~ treatment + whole.mean + duration + qro + (1 | colony)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## submod.3 9 -4466.3 -4431.1 2242.2 -4484.3
## submod.2 12 -4464.6 -4417.6 2244.3 -4488.6 4.2383 3 0.2369
drop1(submod.3, test = "Chisq")
## Single term deletions
##
## Model:
## relative_fat ~ treatment + whole.mean + duration + (1 | colony)
## npar AIC LRT Pr(Chi)
## <none> -4466.3
## treatment 4 -4458.2 16.083 0.002910 **
## whole.mean 1 -4462.1 6.233 0.012539 *
## duration 1 -4460.7 7.658 0.005652 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
qqnorm(resid(submod.3));qqline(resid(submod.3)) #fine ish
Keep “submod.3” : relative_fat ~ treatment + whole.mean + duration + (1 | colony)
Anova(rf.g1.sub)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: relative_fat
## Chisq Df Pr(>Chisq)
## treatment 13.7929 4 0.007986 **
## whole.mean 3.1754 1 0.074754 .
## workers_alive 0.0000 1 0.997282
## duration 5.9490 1 0.014726 *
## qro 1.9773 3 0.577132
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rf.emm <- emmeans(rf.g1.sub, "treatment")
pairs(rf.emm)
## contrast estimate SE df t.ratio p.value
## treatment1 - treatment2 -8.10e-06 0.000138 24.9 -0.059 1.0000
## treatment1 - treatment3 3.51e-04 0.000139 25.2 2.527 0.1162
## treatment1 - treatment4 1.37e-04 0.000136 23.2 1.010 0.8482
## treatment1 - treatment5 -1.21e-04 0.000140 23.6 -0.863 0.9076
## treatment2 - treatment3 3.59e-04 0.000137 28.5 2.613 0.0945
## treatment2 - treatment4 1.46e-04 0.000134 23.5 1.084 0.8131
## treatment2 - treatment5 -1.13e-04 0.000128 23.2 -0.879 0.9018
## treatment3 - treatment4 -2.14e-04 0.000133 23.4 -1.600 0.5118
## treatment3 - treatment5 -4.72e-04 0.000137 26.5 -3.435 0.0154
## treatment4 - treatment5 -2.58e-04 0.000133 21.5 -1.948 0.3237
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 5 estimates
plot(rf.emm, comparisons = TRUE)
Anova(rf.update2)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: logrf
## Chisq Df Pr(>Chisq)
## treatment 22.7850 4 0.0001398 ***
## duration 5.1138 1 0.0237368 *
## qro 9.0967 3 0.0280322 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rf2.emm <- emmeans(rf.update2, "treatment")
pairs(rf2.emm)
## contrast estimate SE df t.ratio p.value
## treatment1 - treatment2 0.0257 0.0909 26.9 0.283 0.9985
## treatment1 - treatment3 0.2772 0.0904 24.9 3.068 0.0376
## treatment1 - treatment4 0.0496 0.0852 22.3 0.582 0.9763
## treatment1 - treatment5 -0.1308 0.0862 23.3 -1.517 0.5624
## treatment2 - treatment3 0.2515 0.0899 29.0 2.797 0.0636
## treatment2 - treatment4 0.0239 0.0827 21.7 0.289 0.9983
## treatment2 - treatment5 -0.1564 0.0819 24.0 -1.910 0.3393
## treatment3 - treatment4 -0.2276 0.0878 23.9 -2.594 0.1036
## treatment3 - treatment5 -0.4080 0.0876 25.0 -4.657 0.0008
## treatment4 - treatment5 -0.1804 0.0805 19.5 -2.242 0.2063
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 5 estimates
plot(rf2.emm, comparisons = TRUE)
Keep the rfupdate.2 model. The means comparisons matches the visualization better for this model, and it is not based on the subset data. The qqplot doesn’t look as good for this model, but it’s not terrible and I’d rather keep the whole data set
rf.sum <- rf_sub %>%
group_by(treatment) %>%
summarise(mrf = mean(relative_fat),
sdrf = sd(relative_fat),
nrf = length(relative_fat)) %>%
mutate(serf = sdrf/sqrt(nrf))
rf.sum
## # A tibble: 5 × 5
## treatment mrf sdrf nrf serf
## <fct> <dbl> <dbl> <int> <dbl>
## 1 1 0.00181 0.000663 70 0.0000792
## 2 2 0.00168 0.000559 75 0.0000645
## 3 3 0.00143 0.000582 59 0.0000758
## 4 4 0.00161 0.000565 88 0.0000603
## 5 5 0.00184 0.000490 77 0.0000558
I should be able to make a model where 1 differs from 3 and 4, 2 differs from 3, 3 differs from 1, 2, 4, and 5, and 5 differs from 4 and 3. So actually the final model I will be keeping for the analysis is rf.int. This is the original data not subset, not log transformed, and the emmeans output most closely matches what we would expect to see based on the SE from the summary of the data means
rftuk.means <- emmeans(object = rf.int,
specs = "treatment",
adjust = "Tukey",
type = "response")
rftuk.means
## treatment emmean SE df lower.CL upper.CL
## 1 0.00190 0.000110 15.8 0.00157 0.00222
## 2 0.00169 0.000120 18.6 0.00135 0.00204
## 3 0.00139 0.000124 30.9 0.00105 0.00173
## 4 0.00173 0.000110 13.8 0.00140 0.00205
## 5 0.00224 0.000118 12.1 0.00188 0.00260
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 5 estimates
rf.cld.model <- cld(object = rftuk.means,
adjust = "Tukey",
Letters = letters,
alpha = 0.05)
rf.cld.model
## treatment emmean SE df lower.CL upper.CL .group
## 3 0.00139 0.000124 30.9 0.00105 0.00173 a
## 2 0.00169 0.000120 18.6 0.00135 0.00204 ab
## 4 0.00173 0.000110 13.8 0.00140 0.00205 ab
## 1 0.00190 0.000110 15.8 0.00157 0.00222 bc
## 5 0.00224 0.000118 12.1 0.00188 0.00260 c
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 5 estimates
## P value adjustment: tukey method for comparing a family of 5 estimates
## significance level used: alpha = 0.05
## NOTE: If two or more means share the same grouping symbol,
## then we cannot show them to be different.
## But we also did not show them to be the same.
rftuk.treatment <- as.data.frame(rf.cld.model)
rftuk.treatment
## treatment emmean SE df lower.CL upper.CL .group
## 3 3 0.001389876 0.0001241880 30.89240 0.001050062 0.001729690 a
## 2 2 0.001694930 0.0001197139 18.56017 0.001352669 0.002037191 ab
## 4 4 0.001725238 0.0001095014 13.84705 0.001399851 0.002050626 ab
## 1 1 0.001896636 0.0001104862 15.77815 0.001574420 0.002218852 bc
## 5 5 0.002243706 0.0001180008 12.05368 0.001884854 0.002602559 c
rf_max <- rf_sub %>%
group_by(treatment) %>%
summarize(maxrf = max(mean(relative_fat)))
rf_for_plotting <- full_join(rftuk.treatment, rf_max,
by="treatment")
rf <- ggplot(data = rf.sum, aes(x = treatment, y = mrf, fill = treatment)) +
geom_col(col = "black")+
coord_cartesian(ylim=c(0.0012, 0.002)) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
geom_errorbar(aes(ymin = mrf - serf,
ymax = mrf + serf),
position = position_dodge(0.9), width = 0.4) +
labs(y = "Mean Drone Relative Fat (g)",) +
ggtitle("Average Drone Relative Fat (g) per Treatment") +
scale_x_discrete(name = "Treatment",
labels = c("0 PPB", "150 PPB", "1,500 PPB", "15,000 PPB", "150,000 PPB")) +
theme_cowplot()+
theme_classic(base_size = 30)
rf <- rf +
annotate(geom = "text",
x = 1, y = 0.00199,
label = "P < 0.01",
size = 8) +
annotate(geom = "text",
x = c(3, 2, 4, 1, 5),
y = rf_for_plotting$maxrf + 0.0001,
label = c("a", "ab", "ab", "bc", "c"),
size = 8) +
theme(legend.position = "none")
rf
rf_A <- as.data.frame(Anova(rf.int))
Anova(rf.int)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: relative_fat
## Chisq Df Pr(>Chisq)
## treatment 27.2234 4 1.792e-05 ***
## whole.mean 2.8601 1 0.09080 .
## workers_alive 0.2191 1 0.63976
## duration 3.0144 1 0.08253 .
## qro 5.8911 3 0.11703
## treatment:whole.mean 7.8043 4 0.09902 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rf_A <- setDT(rf_A)
rf_A
## Chisq Df Pr(>Chisq)
## 1: 27.2233642 4 1.791521e-05
## 2: 2.8600678 1 9.080367e-02
## 3: 0.2190542 1 6.397615e-01
## 4: 3.0144209 1 8.252693e-02
## 5: 5.8911092 3 1.170299e-01
## 6: 7.8042932 4 9.901604e-02
rf_emm <- emmeans(rf.int, pairwise ~ treatment | whole.mean)
rf_contrasts <- as.data.frame(rf_emm$contrasts)
rf_means <- as.data.frame(rf_emm$emmeans)
rf_means <- setDT(rf_means)
rf_means
## treatment whole.mean emmean SE df lower.CL
## 1: 1 0.5698509 0.001896636 0.0001104862 15.77815 0.001662147
## 2: 2 0.5698509 0.001694930 0.0001197139 18.56017 0.001443963
## 3: 3 0.5698509 0.001389876 0.0001241880 30.89240 0.001136557
## 4: 4 0.5698509 0.001725238 0.0001095014 13.84705 0.001490138
## 5: 5 0.5698509 0.002243706 0.0001180008 12.05368 0.001986732
## upper.CL
## 1: 0.002131124
## 2: 0.001945897
## 3: 0.001643195
## 4: 0.001960339
## 5: 0.002500681
rf_contrasts
## whole.mean = 0.57:
## contrast estimate SE df t.ratio p.value
## treatment1 - treatment2 2.02e-04 0.000163 17.7 1.239 0.7294
## treatment1 - treatment3 5.07e-04 0.000164 20.4 3.098 0.0397
## treatment1 - treatment4 1.71e-04 0.000156 17.0 1.096 0.8060
## treatment1 - treatment5 -3.47e-04 0.000164 13.9 -2.118 0.2664
## treatment2 - treatment3 3.05e-04 0.000171 26.8 1.788 0.4008
## treatment2 - treatment4 -3.03e-05 0.000158 15.7 -0.192 0.9997
## treatment2 - treatment5 -5.49e-04 0.000160 14.4 -3.439 0.0269
## treatment3 - treatment4 -3.35e-04 0.000156 20.4 -2.150 0.2379
## treatment3 - treatment5 -8.54e-04 0.000163 17.8 -5.226 0.0005
## treatment4 - treatment5 -5.18e-04 0.000154 11.8 -3.358 0.0381
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 5 estimates
rf.sum <- setDT(rf.sum)
rf.sum
## treatment mrf sdrf nrf serf
## 1: 1 0.001812857 0.0006628707 70 7.922820e-05
## 2: 2 0.001680000 0.0005587147 75 6.451482e-05
## 3: 3 0.001428814 0.0005819263 59 7.576035e-05
## 4: 4 0.001614773 0.0005653886 88 6.027063e-05
## 5: 5 0.001835065 0.0004895665 77 5.579128e-05
rc.drones <- subset(drf.pollen, select = c(radial, treatment, whole.mean, workers_alive, alive, duration, qro, colony))
rc.drones <- na.omit(rc.drones)
ggplot(rc.drones, aes(x=radial, fill = treatment)) +
geom_histogram(position = "identity", binwidth = 0.05,col=I("black")) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
ggtitle("Drone Radial Cell Length") +
labs(y = "Count", x = "Length (mm) ")
shapiro.test(rc.drones$radial)
##
## Shapiro-Wilk normality test
##
## data: rc.drones$radial
## W = 0.98453, p-value = 0.000244
range(rc.drones$radial)
## [1] 1.6924 3.1542
rc.drones$radsquare <- (rc.drones$radial)^2
shapiro.test(rc.drones$radsquare)
##
## Shapiro-Wilk normality test
##
## data: rc.drones$radsquare
## W = 0.99253, p-value = 0.0397
ggplot(rc.drones, aes(x=radsquare, fill = treatment)) +
geom_histogram(position = "identity", binwidth = 0.5,col=I("black")) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
ggtitle("Drone Radial Cell Length ^2 ") +
labs(y = "Count", x = "Length ^2 (mm) ")
rad.model <- lm(radial~ treatment + whole.mean + workers_alive + duration + qro, data = rc.drones)
vif(rad.model)
## GVIF Df GVIF^(1/(2*Df))
## treatment 2.020243 4 1.091881
## whole.mean 1.737422 1 1.318113
## workers_alive 1.966186 1 1.402208
## duration 1.418566 1 1.191036
## qro 2.964081 3 1.198528
#No colinearity
rad.int <- lmer(radsquare~ treatment*whole.mean + workers_alive + duration + qro + (1|colony), data = rc.drones)
rad.g1 <- lmer(radsquare~ treatment + whole.mean + workers_alive + duration + qro + (1|colony), data = rc.drones) #this is the model we are keeping
rad.g2 <- lmer(radsquare~ whole.mean + workers_alive + duration + qro + (1|colony), data = rc.drones)
anova(rad.int, rad.g1)
## Data: rc.drones
## Models:
## rad.g1: radsquare ~ treatment + whole.mean + workers_alive + duration + qro + (1 | colony)
## rad.int: radsquare ~ treatment * whole.mean + workers_alive + duration + qro + (1 | colony)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## rad.g1 13 1137.5 1189.7 -555.77 1111.5
## rad.int 17 1143.4 1211.5 -554.70 1109.4 2.1546 4 0.7073
anova(rad.g1, rad.g2)
## Data: rc.drones
## Models:
## rad.g2: radsquare ~ whole.mean + workers_alive + duration + qro + (1 | colony)
## rad.g1: radsquare ~ treatment + whole.mean + workers_alive + duration + qro + (1 | colony)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## rad.g2 9 1139.3 1175.3 -560.64 1121.3
## rad.g1 13 1137.5 1189.7 -555.77 1111.5 9.7236 4 0.04535 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
drop1(rad.g1, test = "Chisq")
## Single term deletions
##
## Model:
## radsquare ~ treatment + whole.mean + workers_alive + duration +
## qro + (1 | colony)
## npar AIC LRT Pr(Chi)
## <none> 1137.5
## treatment 4 1139.3 9.7236 0.04535 *
## whole.mean 1 1135.6 0.0415 0.83864
## workers_alive 1 1136.0 0.4346 0.50974
## duration 1 1138.5 2.9488 0.08594 .
## qro 3 1135.4 3.8350 0.27984
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rad.g1.update <- update(rad.g1, .~. -whole.mean)
anova(rad.g1.update, rad.g1)
## Data: rc.drones
## Models:
## rad.g1.update: radsquare ~ treatment + workers_alive + duration + qro + (1 | colony)
## rad.g1: radsquare ~ treatment + whole.mean + workers_alive + duration + qro + (1 | colony)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## rad.g1.update 12 1135.6 1183.7 -555.80 1111.6
## rad.g1 13 1137.5 1189.7 -555.77 1111.5 0.0415 1 0.8386
drop1(rad.g1.update, test = "Chisq")
## Single term deletions
##
## Model:
## radsquare ~ treatment + workers_alive + duration + qro + (1 |
## colony)
## npar AIC LRT Pr(Chi)
## <none> 1135.6
## treatment 4 1137.3 9.6838 0.04611 *
## workers_alive 1 1134.0 0.4178 0.51805
## duration 1 1136.6 2.9715 0.08474 .
## qro 3 1133.6 3.9915 0.26239
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rad.g1.update2 <- update(rad.g1.update, .~. -workers_alive)
anova(rad.g1.update2, rad.g1.update)
## Data: rc.drones
## Models:
## rad.g1.update2: radsquare ~ treatment + duration + qro + (1 | colony)
## rad.g1.update: radsquare ~ treatment + workers_alive + duration + qro + (1 | colony)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## rad.g1.update2 11 1134.0 1178.1 -556.0 1112.0
## rad.g1.update 12 1135.6 1183.7 -555.8 1111.6 0.4178 1 0.518
drop1(rad.g1.update2, test = "Chisq")
## Single term deletions
##
## Model:
## radsquare ~ treatment + duration + qro + (1 | colony)
## npar AIC LRT Pr(Chi)
## <none> 1134.0
## treatment 4 1135.5 9.4634 0.05050 .
## duration 1 1134.7 2.6867 0.10119
## qro 3 1136.3 8.3357 0.03956 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rad.g1.update3 <- update(rad.g1.update2, .~. -duration)
anova(rad.g1.update3, rad.g1.update2)
## Data: rc.drones
## Models:
## rad.g1.update3: radsquare ~ treatment + qro + (1 | colony)
## rad.g1.update2: radsquare ~ treatment + duration + qro + (1 | colony)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## rad.g1.update3 10 1134.7 1174.8 -557.35 1114.7
## rad.g1.update2 11 1134.0 1178.1 -556.00 1112.0 2.6867 1 0.1012
drop1(rad.g1.update3, test = "Chisq")
## Single term deletions
##
## Model:
## radsquare ~ treatment + qro + (1 | colony)
## npar AIC LRT Pr(Chi)
## <none> 1134.7
## treatment 4 1136.0 9.3429 0.05308 .
## qro 3 1135.5 6.8008 0.07853 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
qqnorm(resid(rad.g1.update3));qqline(resid(rad.g1.update3))
plot(resid(rad.g1.update3)) +
abline(h=0, col="red", lwd=2)
## integer(0)
#yay! :)
summary(rad.g1.update3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: radsquare ~ treatment + qro + (1 | colony)
## Data: rc.drones
##
## REML criterion at convergence: 1130.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3472 -0.6072 0.0182 0.6166 4.0743
##
## Random effects:
## Groups Name Variance Std.Dev.
## colony (Intercept) 0.07885 0.2808
## Residual 0.87347 0.9346
## Number of obs: 407, groups: colony, 40
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 6.11568 0.18028 33.924
## treatment2 -0.15929 0.21767 -0.732
## treatment3 -0.57036 0.22613 -2.522
## treatment4 -0.08059 0.21442 -0.376
## treatment5 -0.08936 0.21684 -0.412
## qroB3 -0.07460 0.22968 -0.325
## qroB4 0.18359 0.21448 0.856
## qroB5 0.35329 0.16412 2.153
##
## Correlation of Fixed Effects:
## (Intr) trtmn2 trtmn3 trtmn4 trtmn5 qroB3 qroB4
## treatment2 -0.689
## treatment3 -0.596 0.488
## treatment4 -0.687 0.529 0.492
## treatment5 -0.692 0.529 0.487 0.528
## qroB3 -0.320 0.095 -0.003 0.036 0.180
## qroB4 -0.358 0.097 0.007 0.199 0.085 0.204
## qroB5 -0.415 0.140 0.037 0.063 0.100 0.274 0.283
Anova(rad.g1.update3)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: radsquare
## Chisq Df Pr(>Chisq)
## treatment 7.7130 4 0.1027
## qro 5.6914 3 0.1276
rad.emm <- emmeans(rad.g1.update3, "treatment")
pairs(rad.emm)
## contrast estimate SE df t.ratio p.value
## treatment1 - treatment2 0.15929 0.219 26.8 0.729 0.9479
## treatment1 - treatment3 0.57036 0.228 28.2 2.506 0.1177
## treatment1 - treatment4 0.08059 0.215 23.2 0.374 0.9955
## treatment1 - treatment5 0.08936 0.218 24.9 0.411 0.9936
## treatment2 - treatment3 0.41107 0.226 30.8 1.817 0.3826
## treatment2 - treatment4 -0.07870 0.211 24.8 -0.374 0.9956
## treatment2 - treatment5 -0.06994 0.211 26.2 -0.331 0.9973
## treatment3 - treatment4 -0.48977 0.224 26.9 -2.187 0.2150
## treatment3 - treatment5 -0.48100 0.226 28.6 -2.127 0.2364
## treatment4 - treatment5 0.00877 0.211 22.9 0.042 1.0000
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 5 estimates
rad.sum <- rc.drones %>%
group_by(treatment) %>%
summarise(rad.m = mean(radial),
rad.sd = sd(radial),
rad.n = length(radial)) %>%
mutate(serad = rad.sd/sqrt(rad.n))
rad.sum
## # A tibble: 5 × 5
## treatment rad.m rad.sd rad.n serad
## <fct> <dbl> <dbl> <int> <dbl>
## 1 1 2.51 0.161 79 0.0182
## 2 2 2.45 0.183 76 0.0209
## 3 3 2.36 0.243 62 0.0309
## 4 4 2.47 0.231 104 0.0227
## 5 5 2.48 0.165 86 0.0178
radanova_summary <- summary(rad.g1.update3)
radtuk.means <- emmeans(object = rad.g1.update3,
specs = "treatment",
adjust = "Tukey",
type = "response")
radtuk.means
## treatment emmean SE df lower.CL upper.CL
## 1 6.23 0.157 23.8 5.79 6.67
## 2 6.07 0.158 28.6 5.64 6.51
## 3 5.66 0.168 31.1 5.20 6.12
## 4 6.15 0.155 20.3 5.71 6.59
## 5 6.14 0.160 24.3 5.70 6.59
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 5 estimates
rad.cld.model <- cld(object = radtuk.means,
adjust = "Tukey",
Letters = letters,
alpha = 0.05)
rad.cld.model
## treatment emmean SE df lower.CL upper.CL .group
## 3 5.66 0.168 31.1 5.20 6.12 a
## 2 6.07 0.158 28.6 5.64 6.51 a
## 5 6.14 0.160 24.3 5.70 6.59 a
## 4 6.15 0.155 20.3 5.71 6.59 a
## 1 6.23 0.157 23.8 5.79 6.67 a
##
## Results are averaged over the levels of: qro
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 5 estimates
## P value adjustment: tukey method for comparing a family of 5 estimates
## significance level used: alpha = 0.05
## NOTE: If two or more means share the same grouping symbol,
## then we cannot show them to be different.
## But we also did not show them to be the same.
radtuk.treatment <- as.data.frame(rad.cld.model)
rad_max <- rc.drones %>%
group_by(treatment) %>%
summarize(maxrad = max(mean(radial)))
rad_for_plotting <- full_join(radtuk.treatment, rad_max,
by="treatment")
radp <- ggplot(data = rad.sum, aes(x = treatment, y = rad.m, fill = treatment)) +
geom_col(col = "black")+
coord_cartesian(ylim=c(2,2.55)) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
geom_errorbar(aes(ymin = rad.m - serad,
ymax = rad.m + serad),
position = position_dodge(0.9), width = 0.4) +
labs(y = "Mean Drone Radial Cell Length",) +
ggtitle("Average Drone Radial Cell Length (mm) per Treatment") +
scale_x_discrete(name = "Treatment",
labels = c("0 PPB", "150 PPB", "1,500 PPB", "15,000 PPB", "150,000 PPB")) +
theme_cowplot()+
theme_classic(base_size = 30) +
theme(legend.position = "none")
radp <- radp +
annotate(geom = "text",
x = 2, y = 2.54,
label = "P > 0.1",
size = 12) +
annotate(geom = "text",
x = c(3, 2, 5, 4, 1),
y = rad_for_plotting$maxrad + 0.05,
label = c("a", "a", "a", "a", "a"),
size = 12) +
theme(legend.position = "none")
radp
rad.g1.update3
## Linear mixed model fit by REML ['lmerMod']
## Formula: radsquare ~ treatment + qro + (1 | colony)
## Data: rc.drones
## REML criterion at convergence: 1130.311
## Random effects:
## Groups Name Std.Dev.
## colony (Intercept) 0.2808
## Residual 0.9346
## Number of obs: 407, groups: colony, 40
## Fixed Effects:
## (Intercept) treatment2 treatment3 treatment4 treatment5 qroB3
## 6.11568 -0.15929 -0.57036 -0.08059 -0.08936 -0.07460
## qroB4 qroB5
## 0.18359 0.35329
rad_A <- as.data.frame(Anova(rad.g1.update3))
rad_A <- setDT(rad_A)
rad_A
## Chisq Df Pr(>Chisq)
## 1: 7.713042 4 0.1026738
## 2: 5.691425 3 0.1276272
rad_emm <- emmeans(rad.g1.update3, pairwise ~ treatment)
rad_contrasts <- as.data.frame(rad_emm$contrasts)
rad_contrasts <- setDT(rad_contrasts)
rad_contrasts
## contrast estimate SE df t.ratio
## 1: treatment1 - treatment2 0.159291661 0.2185811 26.82775 0.72875315
## 2: treatment1 - treatment3 0.570356861 0.2275743 28.22011 2.50624471
## 3: treatment1 - treatment4 0.080588327 0.2154953 23.24891 0.37396799
## 4: treatment1 - treatment5 0.089356194 0.2176651 24.85426 0.41052153
## 5: treatment2 - treatment3 0.411065201 0.2262541 30.79816 1.81682950
## 6: treatment2 - treatment4 -0.078703333 0.2107108 24.76664 -0.37351345
## 7: treatment2 - treatment5 -0.069935467 0.2114946 26.25425 -0.33067254
## 8: treatment3 - treatment4 -0.489768534 0.2239194 26.85584 -2.18725366
## 9: treatment3 - treatment5 -0.481000667 0.2260942 28.59779 -2.12743525
## 10: treatment4 - treatment5 0.008767867 0.2105243 22.94982 0.04164777
## p.value
## 1: 0.9478777
## 2: 0.1176911
## 3: 0.9955388
## 4: 0.9936479
## 5: 0.3825667
## 6: 0.9955783
## 7: 0.9972502
## 8: 0.2150167
## 9: 0.2364409
## 10: 0.9999993
rad_means <- as.data.frame(rad_emm$emmeans)
rad_means <- setDT(rad_means)
rad_means
## treatment emmean SE df lower.CL upper.CL
## 1: 1 6.231249 0.1569505 23.84265 5.907206 6.555292
## 2: 2 6.071957 0.1584968 28.58486 5.747590 6.396324
## 3: 3 5.660892 0.1679997 31.07016 5.318286 6.003498
## 4: 4 6.150660 0.1552761 20.28489 5.827052 6.474269
## 5: 5 6.141893 0.1604075 24.32014 5.811058 6.472727
trunc.csv <- read_csv("duration.fortrunc.csv",
col_types = cols(round = col_factor(levels = c("1",
"2")), treatment = col_factor(levels = c("1",
"2", "3", "4", "5")), replicate = col_factor(levels = c("1",
"2", "3", "4", "5", "7", "9", "11",
"12")), start_date = col_date(format = "%m/%d/%Y"),
emerge_date = col_date(format = "%m/%d/%Y"),
end_date = col_date(format = "%m/%d/%Y"))) # this data set is for drone counts
trunc <- merge(wd.summary, trunc.csv, by.x = "colony")
trunc <- merge(mortality, trunc, by.x = "colony")
trunc$qro <- as.factor(trunc$origin)
count.drones <- subset(trunc, select = c(count, treatment, whole.mean, alive, duration, qro, colony))
count.drones <- na.omit(count.drones)
ggplot(count.drones, aes(x=count, fill = treatment)) +
geom_histogram(position = "identity", binwidth = 1 ,col=I("black")) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
ggtitle("Drone Count") +
labs(y = "Count", x = "Number of Drones")
count.model <- lm(count~ treatment + whole.mean + alive + duration + qro, data = count.drones)
vif(count.model)
## GVIF Df GVIF^(1/(2*Df))
## treatment 1.443153 4 1.046921
## whole.mean 2.049137 1 1.431481
## alive 2.511104 1 1.584646
## duration 2.084378 1 1.443738
## qro 1.897492 3 1.112662
count.pois <- glm(count ~ treatment + whole.mean +alive +duration + qro, data = count.drones, family = "poisson")
summary(count.pois) #overdispersed
##
## Call:
## glm(formula = count ~ treatment + whole.mean + alive + duration +
## qro, family = "poisson", data = count.drones)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.8240 -1.5124 -0.6275 0.9664 2.7418
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.15115 0.64327 -1.790 0.073528 .
## treatment2 -0.06390 0.15584 -0.410 0.681776
## treatment3 -0.45881 0.16131 -2.844 0.004452 **
## treatment4 -0.02662 0.16024 -0.166 0.868060
## treatment5 0.13738 0.16634 0.826 0.408849
## whole.mean 3.04430 0.40744 7.472 7.91e-14 ***
## alive 0.15824 0.06218 2.545 0.010933 *
## duration 0.02629 0.01308 2.010 0.044444 *
## qroB3 0.25413 0.15904 1.598 0.110063
## qroB4 -0.06042 0.18029 -0.335 0.737534
## qroB5 0.46523 0.13301 3.498 0.000469 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 275.44 on 44 degrees of freedom
## Residual deviance: 107.43 on 34 degrees of freedom
## AIC: 291.43
##
## Number of Fisher Scoring iterations: 5
count.pois.int <- glm(count ~ treatment*whole.mean +alive +duration + qro, data = count.drones, family = "poisson")
summary(count.pois.int) # tiny bit better but still over dispersed
##
## Call:
## glm(formula = count ~ treatment * whole.mean + alive + duration +
## qro, family = "poisson", data = count.drones)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6978 -1.3029 -0.3356 0.8896 3.0905
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.76021 0.98150 -2.812 0.004920 **
## treatment2 0.53851 0.70539 0.763 0.445218
## treatment3 0.23181 0.57204 0.405 0.685301
## treatment4 1.50047 0.59796 2.509 0.012096 *
## treatment5 0.50654 0.56511 0.896 0.370065
## whole.mean 4.61959 0.85370 5.411 6.26e-08 ***
## alive 0.16773 0.07043 2.381 0.017243 *
## duration 0.04214 0.01542 2.732 0.006286 **
## qroB3 0.31250 0.17676 1.768 0.077070 .
## qroB4 -0.17270 0.19161 -0.901 0.367412
## qroB5 0.54421 0.14467 3.762 0.000169 ***
## treatment2:whole.mean -1.04982 1.24490 -0.843 0.399064
## treatment3:whole.mean -1.27992 0.93366 -1.371 0.170416
## treatment4:whole.mean -2.57286 0.97332 -2.643 0.008208 **
## treatment5:whole.mean -0.55282 1.00544 -0.550 0.582440
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 275.444 on 44 degrees of freedom
## Residual deviance: 99.683 on 30 degrees of freedom
## AIC: 291.69
##
## Number of Fisher Scoring iterations: 5
count.int <- glm.nb(count~ treatment*whole.mean + alive + duration + qro, data = count.drones)
count.g1 <- glm.nb(count~ treatment + whole.mean + alive + duration + qro , data = count.drones) #this is the model we are keeping
count.g2 <- glm.nb(count~ treatment + alive + duration + qro, data = count.drones)
count.g3 <- glm.nb(count~ whole.mean + alive + duration + qro, data = count.drones)
anova(count.int, count.g1)
## Likelihood ratio tests of Negative Binomial Models
##
## Response: count
## Model theta Resid. df
## 1 treatment + whole.mean + alive + duration + qro 6.877672 34
## 2 treatment * whole.mean + alive + duration + qro 7.979431 30
## 2 x log-lik. Test df LR stat. Pr(Chi)
## 1 -251.3451
## 2 -247.4647 1 vs 2 4 3.880433 0.4224293
anova(count.g1, count.g2)
## Likelihood ratio tests of Negative Binomial Models
##
## Response: count
## Model theta Resid. df
## 1 treatment + alive + duration + qro 2.882863 35
## 2 treatment + whole.mean + alive + duration + qro 6.877672 34
## 2 x log-lik. Test df LR stat. Pr(Chi)
## 1 -275.2316
## 2 -251.3451 1 vs 2 1 23.8865 1.021861e-06
anova(count.g1, count.g3)
## Likelihood ratio tests of Negative Binomial Models
##
## Response: count
## Model theta Resid. df
## 1 whole.mean + alive + duration + qro 5.201726 38
## 2 treatment + whole.mean + alive + duration + qro 6.877672 34
## 2 x log-lik. Test df LR stat. Pr(Chi)
## 1 -258.0562
## 2 -251.3451 1 vs 2 4 6.711111 0.1519653
AIC(count.g1, count.g3)
## df AIC
## count.g1 12 275.3451
## count.g3 8 274.0562
AIC(count.g1, count.int)
## df AIC
## count.g1 12 275.3451
## count.int 16 279.4647
drop1(count.g1, test = "Chisq")
## Single term deletions
##
## Model:
## count ~ treatment + whole.mean + alive + duration + qro
## Df Deviance AIC LRT Pr(>Chi)
## <none> 52.237 273.35
## treatment 4 59.450 272.56 7.2135 0.125026
## whole.mean 1 82.877 301.99 30.6403 3.106e-08 ***
## alive 1 60.595 279.70 8.3586 0.003839 **
## duration 1 53.307 272.42 1.0701 0.300918
## qro 3 65.299 280.41 13.0619 0.004505 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
count.g1.update <- update(count.g1, .~. -duration)
anova(count.g1, count.g1.update)
## Likelihood ratio tests of Negative Binomial Models
##
## Response: count
## Model theta Resid. df
## 1 treatment + whole.mean + alive + qro 6.376087 35
## 2 treatment + whole.mean + alive + duration + qro 6.877672 34
## 2 x log-lik. Test df LR stat. Pr(Chi)
## 1 -252.3811
## 2 -251.3451 1 vs 2 1 1.03604 0.3087442
drop1(count.g1.update, test = "Chisq") #get rid of duration
## Single term deletions
##
## Model:
## count ~ treatment + whole.mean + alive + qro
## Df Deviance AIC LRT Pr(>Chi)
## <none> 51.476 272.38
## treatment 4 57.689 270.60 6.213 0.1837678
## whole.mean 1 83.444 302.35 31.968 1.568e-08 ***
## alive 1 65.863 284.77 14.387 0.0001488 ***
## qro 3 67.517 282.42 16.041 0.0011120 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
qqnorm(resid(count.g1.update));qqline(resid(count.g1.update))
plot(resid(count.g1.update)) +
abline(h=0, col="red", lwd=2)
## integer(0)
#these are actually not bad
summary(count.g1.update)
##
## Call:
## glm.nb(formula = count ~ treatment + whole.mean + alive + qro,
## data = count.drones, init.theta = 6.376086876, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1372 -1.1678 -0.2329 0.4935 1.9080
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.880940 0.440731 -1.999 0.045628 *
## treatment2 -0.184572 0.255323 -0.723 0.469744
## treatment3 -0.547470 0.262738 -2.084 0.037187 *
## treatment4 -0.087990 0.260836 -0.337 0.735860
## treatment5 0.006537 0.265187 0.025 0.980334
## whole.mean 3.250836 0.567554 5.728 1.02e-08 ***
## alive 0.327408 0.088239 3.710 0.000207 ***
## qroB3 0.180576 0.267796 0.674 0.500116
## qroB4 -0.068601 0.314774 -0.218 0.827478
## qroB5 0.813970 0.217973 3.734 0.000188 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(6.3761) family taken to be 1)
##
## Null deviance: 132.224 on 44 degrees of freedom
## Residual deviance: 51.476 on 35 degrees of freedom
## AIC: 274.38
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 6.38
## Std. Err.: 2.36
##
## 2 x log-likelihood: -252.381
Anova(count.g1.update)
## Analysis of Deviance Table (Type II tests)
##
## Response: count
## LR Chisq Df Pr(>Chisq)
## treatment 6.213 4 0.1837678
## whole.mean 31.968 1 1.568e-08 ***
## alive 14.387 1 0.0001488 ***
## qro 16.041 3 0.0011120 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
em.count <- emmeans(count.g1.update, "treatment")
pairs(em.count)
## contrast estimate SE df z.ratio p.value
## treatment1 - treatment2 0.18457 0.255 Inf 0.723 0.9513
## treatment1 - treatment3 0.54747 0.263 Inf 2.084 0.2271
## treatment1 - treatment4 0.08799 0.261 Inf 0.337 0.9972
## treatment1 - treatment5 -0.00654 0.265 Inf -0.025 1.0000
## treatment2 - treatment3 0.36290 0.255 Inf 1.426 0.6110
## treatment2 - treatment4 -0.09658 0.255 Inf -0.379 0.9956
## treatment2 - treatment5 -0.19111 0.253 Inf -0.756 0.9431
## treatment3 - treatment4 -0.45948 0.258 Inf -1.783 0.3837
## treatment3 - treatment5 -0.55401 0.260 Inf -2.134 0.2058
## treatment4 - treatment5 -0.09453 0.266 Inf -0.355 0.9966
##
## Results are averaged over the levels of: qro
## Results are given on the log (not the response) scale.
## P value adjustment: tukey method for comparing a family of 5 estimates
plot(em.count, comparisons = TRUE)
count.sum <- count.drones %>%
group_by(treatment) %>%
summarise(count.m = mean(count),
count.sd = sd(count),
count.n = length(count)) %>%
mutate(secount = count.sd/sqrt(count.n))
count.sum
## # A tibble: 5 × 5
## treatment count.m count.sd count.n secount
## <fct> <dbl> <dbl> <int> <dbl>
## 1 1 9.67 7.47 9 2.49
## 2 2 9.78 6.67 9 2.22
## 3 3 8.67 6.76 9 2.25
## 4 4 12.2 9.13 9 3.04
## 5 5 10.9 6.92 9 2.31
countanova_summary <- summary(count.g1.update)
counttuk.means <- emmeans(object = count.g1.update,
specs = "treatment",
adjust = "Tukey",
type = "response")
counttuk.means
## treatment response SE df asymp.LCL asymp.UCL
## 1 9.57 1.85 Inf 5.82 15.73
## 2 7.96 1.51 Inf 4.89 12.95
## 3 5.54 1.09 Inf 3.33 9.19
## 4 8.76 1.83 Inf 5.13 14.98
## 5 9.63 1.86 Inf 5.87 15.82
##
## Results are averaged over the levels of: qro
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 5 estimates
## Intervals are back-transformed from the log scale
count.cld.model <- cld(object = counttuk.means,
adjust = "Tukey",
Letters = letters,
alpha = 0.05)
count.cld.model
## treatment response SE df asymp.LCL asymp.UCL .group
## 3 5.54 1.09 Inf 3.33 9.19 a
## 2 7.96 1.51 Inf 4.89 12.95 a
## 4 8.76 1.83 Inf 5.13 14.98 a
## 1 9.57 1.85 Inf 5.82 15.73 a
## 5 9.63 1.86 Inf 5.87 15.82 a
##
## Results are averaged over the levels of: qro
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 5 estimates
## Intervals are back-transformed from the log scale
## P value adjustment: tukey method for comparing a family of 5 estimates
## Tests are performed on the log scale
## significance level used: alpha = 0.05
## NOTE: If two or more means share the same grouping symbol,
## then we cannot show them to be different.
## But we also did not show them to be the same.
counttuk.means <- as.data.frame(counttuk.means)
countuk.treatment <- as.data.frame(count.cld.model)
count_max <- count.drones %>%
group_by(treatment) %>%
summarize(maxcount = max(mean(count)))
count_for_plotting <- full_join(countuk.treatment, count_max,
by="treatment")
ggplot(data = count.sum, aes(x = treatment, y = count.m, fill = treatment)) +
geom_col(col = "black")+
coord_cartesian(ylim=c(1,17.5)) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
geom_errorbar(aes(ymin = count.m-secount,
ymax = count.m+secount),
position = position_dodge(0.9), width = 0.4) +
labs(y = "Mean Drone Count",) +
ggtitle("Average Drone Count per Treatment") +
scale_x_discrete(name = "Treatment",
labels = c("0 PPB", "150 PPB", "1,500 PPB", "15,000 PPB", "150,000 PPB")) +
theme_cowplot()+
theme_classic(base_size = 30) +
theme(legend.position = "none") +
annotate(geom = "text",
x = 1.5, y = 16,
label = "P > 0.05",
size = 15) +
annotate(geom = "text",
x = c(3, 2, 4, 1, 5),
y = count_for_plotting$maxcount + 3.5,
label = c("a", "a", "a", "a", "a"),
size = 12) +
theme(legend.position = "none")
count.g1.update
##
## Call: glm.nb(formula = count ~ treatment + whole.mean + alive + qro,
## data = count.drones, init.theta = 6.376086876, link = log)
##
## Coefficients:
## (Intercept) treatment2 treatment3 treatment4 treatment5 whole.mean
## -0.880940 -0.184572 -0.547470 -0.087990 0.006537 3.250836
## alive qroB3 qroB4 qroB5
## 0.327408 0.180576 -0.068601 0.813970
##
## Degrees of Freedom: 44 Total (i.e. Null); 35 Residual
## Null Deviance: 132.2
## Residual Deviance: 51.48 AIC: 274.4
count_A <- as.data.frame(Anova(count.g1.update))
count_A <- setDT(count_A)
count_A
## LR Chisq Df Pr(>Chisq)
## 1: 6.213405 4 1.837678e-01
## 2: 31.967787 1 1.567504e-08
## 3: 14.386875 1 1.488361e-04
## 4: 16.041405 3 1.112033e-03
Anova(count.g1.update)
## Analysis of Deviance Table (Type II tests)
##
## Response: count
## LR Chisq Df Pr(>Chisq)
## treatment 6.213 4 0.1837678
## whole.mean 31.968 1 1.568e-08 ***
## alive 14.387 1 0.0001488 ***
## qro 16.041 3 0.0011120 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
count_emm <- emmeans(count.g1.update, pairwise ~ treatment)
count_contrasts <- as.data.frame(count_emm$contrasts)
count_contrasts <- setDT(count_contrasts)
count_contrasts
## contrast estimate SE df z.ratio p.value
## 1: treatment1 - treatment2 0.184571649 0.2553227 Inf 0.72289543 0.9512914
## 2: treatment1 - treatment3 0.547470482 0.2627383 Inf 2.08371021 0.2270542
## 3: treatment1 - treatment4 0.087990455 0.2608359 Inf 0.33734025 0.9972171
## 4: treatment1 - treatment5 -0.006536866 0.2651874 Inf -0.02464999 0.9999999
## 5: treatment2 - treatment3 0.362898833 0.2545568 Inf 1.42561040 0.6110446
## 6: treatment2 - treatment4 -0.096581194 0.2549618 Inf -0.37880646 0.9956356
## 7: treatment2 - treatment5 -0.191108515 0.2529530 Inf -0.75551003 0.9431299
## 8: treatment3 - treatment4 -0.459480027 0.2577608 Inf -1.78258336 0.3837289
## 9: treatment3 - treatment5 -0.554007348 0.2596596 Inf -2.13359054 0.2057703
## 10: treatment4 - treatment5 -0.094527321 0.2659881 Inf -0.35538180 0.9965921
count_means <- as.data.frame(count_emm$emmeans)
count_means <- setDT(count_means)
count_means
## treatment emmean SE df asymp.LCL asymp.UCL
## 1: 1 2.258580 0.1935822 Inf 1.879166 2.637994
## 2: 2 2.074009 0.1897322 Inf 1.702140 2.445877
## 3: 3 1.711110 0.1973104 Inf 1.324389 2.097831
## 4: 4 2.170590 0.2086615 Inf 1.761621 2.579559
## 5: 5 2.265117 0.1931262 Inf 1.886597 2.643638
duration <- subset(trunc, select = c(count, treatment, whole.mean, alive, duration, qro, colony))
ggplot(duration, aes(x=duration, fill = treatment)) +
geom_histogram(position = "identity", binwidth = 1 ,col=I("black")) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
ggtitle("Colony Duration") +
labs(y = "Count", x = "Days")
descdist(duration$duration, discrete = TRUE)
## summary statistics
## ------
## min: 20 max: 58
## median: 42
## mean: 42.46667
## estimated sd: 6.542171
## estimated skewness: -0.2500756
## estimated kurtosis: 6.629389
duration$square.dur <- (duration$duration)^2
descdist(duration$square.dur, discrete = TRUE)
## summary statistics
## ------
## min: 400 max: 3364
## median: 1764
## mean: 1845.267
## estimated sd: 553.6501
## estimated skewness: 0.8579907
## estimated kurtosis: 5.719182
dur.model <- lm(duration~ treatment + whole.mean + alive + qro, data = duration)
vif(dur.model)
## GVIF Df GVIF^(1/(2*Df))
## treatment 1.144322 4 1.016994
## whole.mean 1.641378 1 1.281163
## alive 1.708799 1 1.307210
## qro 1.550084 3 1.075786
dur.pois <- glm(duration ~ treatment + whole.mean + alive + qro, data = duration, family = "poisson")
summary(count.pois) #overdispersed
##
## Call:
## glm(formula = count ~ treatment + whole.mean + alive + duration +
## qro, family = "poisson", data = count.drones)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.8240 -1.5124 -0.6275 0.9664 2.7418
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.15115 0.64327 -1.790 0.073528 .
## treatment2 -0.06390 0.15584 -0.410 0.681776
## treatment3 -0.45881 0.16131 -2.844 0.004452 **
## treatment4 -0.02662 0.16024 -0.166 0.868060
## treatment5 0.13738 0.16634 0.826 0.408849
## whole.mean 3.04430 0.40744 7.472 7.91e-14 ***
## alive 0.15824 0.06218 2.545 0.010933 *
## duration 0.02629 0.01308 2.010 0.044444 *
## qroB3 0.25413 0.15904 1.598 0.110063
## qroB4 -0.06042 0.18029 -0.335 0.737534
## qroB5 0.46523 0.13301 3.498 0.000469 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 275.44 on 44 degrees of freedom
## Residual deviance: 107.43 on 34 degrees of freedom
## AIC: 291.43
##
## Number of Fisher Scoring iterations: 5
dur.int <- glm.nb(duration~ treatment*whole.mean + alive + qro, data = duration)
dur.g1 <- glm.nb(duration~ treatment + whole.mean + alive + qro, data = duration)
drop1(dur.g1, test = "Chisq")
## Single term deletions
##
## Model:
## duration ~ treatment + whole.mean + alive + qro
## Df Deviance AIC LRT Pr(>Chi)
## <none> 22.659 293.65
## treatment 4 28.258 291.25 5.5995 0.231122
## whole.mean 1 27.591 296.58 4.9320 0.026363 *
## alive 1 32.902 301.90 10.2434 0.001372 **
## qro 3 27.751 292.75 5.0917 0.165204
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dur.g2 <- glm.nb(duration~ treatment + alive + qro, data = duration)
dur.g3 <- glm.nb(duration~ whole.mean + alive + qro, data = duration)
# Switch to squared data below to get rid of warning messages
dur.pois <- glm(square.dur ~ treatment + whole.mean + alive + qro, data = duration, family = "poisson")
summary(count.pois) #overdispersed
##
## Call:
## glm(formula = count ~ treatment + whole.mean + alive + duration +
## qro, family = "poisson", data = count.drones)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.8240 -1.5124 -0.6275 0.9664 2.7418
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.15115 0.64327 -1.790 0.073528 .
## treatment2 -0.06390 0.15584 -0.410 0.681776
## treatment3 -0.45881 0.16131 -2.844 0.004452 **
## treatment4 -0.02662 0.16024 -0.166 0.868060
## treatment5 0.13738 0.16634 0.826 0.408849
## whole.mean 3.04430 0.40744 7.472 7.91e-14 ***
## alive 0.15824 0.06218 2.545 0.010933 *
## duration 0.02629 0.01308 2.010 0.044444 *
## qroB3 0.25413 0.15904 1.598 0.110063
## qroB4 -0.06042 0.18029 -0.335 0.737534
## qroB5 0.46523 0.13301 3.498 0.000469 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 275.44 on 44 degrees of freedom
## Residual deviance: 107.43 on 34 degrees of freedom
## AIC: 291.43
##
## Number of Fisher Scoring iterations: 5
dur.int <- glm.nb(square.dur~ treatment*whole.mean + alive + qro, data = duration)
dur.g1 <- glm.nb(square.dur~ treatment + whole.mean + alive + qro, data = duration)
drop1(dur.g1, test = "F")
## Single term deletions
##
## Model:
## square.dur ~ treatment + whole.mean + alive + qro
## Df Deviance AIC F value Pr(>F)
## <none> 45.458 684.39
## treatment 4 58.601 689.54 2.5298 0.0578470 .
## whole.mean 1 55.290 692.23 7.5698 0.0093366 **
## alive 1 67.018 703.95 16.6000 0.0002517 ***
## qro 3 56.324 689.26 2.7888 0.0549046 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dur.g2 <- glm.nb(square.dur~ treatment + alive + qro, data = duration)
dur.g3 <- glm.nb(square.dur~ whole.mean + alive + qro, data = duration) # stick with this one?
dur.g4 <- glm.nb(square.dur ~ treatment, data = duration)
AIC(dur.pois, dur.g1)
## df AIC
## dur.pois 10 4199.0536
## dur.g1 11 686.3947
AIC(dur.g1, dur.g2, dur.g3, dur.g4)
## df AIC
## dur.g1 11 686.3947
## dur.g2 10 693.2944
## dur.g3 7 689.9570
## dur.g4 6 703.4817
anova(dur.g1, dur.g3, dur.g4)
## Likelihood ratio tests of Negative Binomial Models
##
## Response: square.dur
## Model theta Resid. df 2 x log-lik.
## 1 treatment 11.60760 40 -691.4817
## 2 whole.mean + alive + qro 16.31718 39 -675.9570
## 3 treatment + whole.mean + alive + qro 21.05744 35 -664.3947
## Test df LR stat. Pr(Chi)
## 1
## 2 1 vs 2 1 15.5247 8.143412e-05
## 3 2 vs 3 4 11.5623 2.092096e-02
qqnorm(resid(dur.g3));qqline(resid(dur.g3))
qqnorm(resid(dur.g1));qqline(resid(dur.g1)) # I almost feel like this one fits better
summary(dur.g1)
##
## Call:
## glm.nb(formula = square.dur ~ treatment + whole.mean + alive +
## qro, data = duration, init.theta = 21.05743915, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.89476 -0.75776 0.00199 0.60069 2.22508
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.48890 0.12691 59.012 < 2e-16 ***
## treatment2 -0.22563 0.10490 -2.151 0.03148 *
## treatment3 -0.14635 0.10716 -1.366 0.17200
## treatment4 -0.30842 0.10401 -2.965 0.00302 **
## treatment5 -0.33825 0.10672 -3.169 0.00153 **
## whole.mean -0.73649 0.23250 -3.168 0.00154 **
## alive 0.13810 0.02756 5.012 5.4e-07 ***
## qroB3 -0.28456 0.10963 -2.596 0.00944 **
## qroB4 -0.01014 0.12836 -0.079 0.93706
## qroB5 0.13285 0.08959 1.483 0.13813
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(21.0574) family taken to be 1)
##
## Null deviance: 91.425 on 44 degrees of freedom
## Residual deviance: 45.458 on 35 degrees of freedom
## AIC: 686.39
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 21.06
## Std. Err.: 4.47
##
## 2 x log-likelihood: -664.395
summary(dur.g3)
##
## Call:
## glm.nb(formula = square.dur ~ whole.mean + alive + qro, data = duration,
## init.theta = 16.31718118, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.3506 -0.5620 -0.0943 0.4540 2.6766
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.38105 0.13375 55.184 < 2e-16 ***
## whole.mean -0.68794 0.25724 -2.674 0.007488 **
## alive 0.11240 0.02980 3.772 0.000162 ***
## qroB3 -0.26889 0.12412 -2.166 0.030287 *
## qroB4 -0.04101 0.14408 -0.285 0.775925
## qroB5 0.11116 0.10092 1.101 0.270697
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(16.3172) family taken to be 1)
##
## Null deviance: 71.078 on 44 degrees of freedom
## Residual deviance: 45.569 on 39 degrees of freedom
## AIC: 689.96
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 16.32
## Std. Err.: 3.45
##
## 2 x log-likelihood: -675.957
Anova(dur.g1)
## Analysis of Deviance Table (Type II tests)
##
## Response: square.dur
## LR Chisq Df Pr(>Chisq)
## treatment 13.1426 4 0.010600 *
## whole.mean 9.8316 1 0.001715 **
## alive 21.5601 1 3.429e-06 ***
## qro 10.8661 3 0.012472 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Anova(dur.g3)
## Analysis of Deviance Table (Type II tests)
##
## Response: square.dur
## LR Chisq Df Pr(>Chisq)
## whole.mean 7.2474 1 0.0071004 **
## alive 13.2906 1 0.0002667 ***
## qro 7.1824 3 0.0663058 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# I am going to keep model dur.g1. I think that since whole.mean exlains deviation in the data for both models, it is more important to include treatment as our primary explanatory variable.
em.dur<- emmeans(dur.g1, "treatment")
pairs(em.dur)
## contrast estimate SE df z.ratio p.value
## treatment1 - treatment2 0.2256 0.105 Inf 2.151 0.1987
## treatment1 - treatment3 0.1464 0.107 Inf 1.366 0.6497
## treatment1 - treatment4 0.3084 0.104 Inf 2.965 0.0252
## treatment1 - treatment5 0.3383 0.107 Inf 3.169 0.0133
## treatment2 - treatment3 -0.0793 0.104 Inf -0.763 0.9410
## treatment2 - treatment4 0.0828 0.104 Inf 0.798 0.9314
## treatment2 - treatment5 0.1126 0.105 Inf 1.075 0.8197
## treatment3 - treatment4 0.1621 0.105 Inf 1.542 0.5353
## treatment3 - treatment5 0.1919 0.105 Inf 1.831 0.3556
## treatment4 - treatment5 0.0298 0.106 Inf 0.281 0.9986
##
## Results are averaged over the levels of: qro
## Results are given on the log (not the response) scale.
## P value adjustment: tukey method for comparing a family of 5 estimates
plot(em.dur, comparisons = TRUE)
dur.sum <- duration %>%
group_by(treatment) %>%
summarise(cdur.m = mean(duration),
dur.sd = sd(duration),
dur.n = length(duration)) %>%
mutate(sedur = dur.sd/sqrt(dur.n))
dur.sum
## # A tibble: 5 × 5
## treatment cdur.m dur.sd dur.n sedur
## <fct> <dbl> <dbl> <int> <dbl>
## 1 1 45 7.21 9 2.40
## 2 2 42 3.16 9 1.05
## 3 3 44.6 5.08 9 1.69
## 4 4 39.3 4.87 9 1.62
## 5 5 41.4 9.96 9 3.32
durtuk.means <- emmeans(object = dur.g1,
specs = "treatment",
adjust = "Tukey",
type = "response")
durtuk.means
## treatment response SE df asymp.LCL asymp.UCL
## 1 2127 172 Inf 1727 2619
## 2 1697 133 Inf 1388 2075
## 3 1837 144 Inf 1503 2246
## 4 1562 125 Inf 1272 1919
## 5 1516 117 Inf 1245 1847
##
## Results are averaged over the levels of: qro
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 5 estimates
## Intervals are back-transformed from the log scale
dur.cld.model <- cld(object = durtuk.means,
adjust = "Tukey",
Letters = letters,
alpha = 0.05)
dur.cld.model
## treatment response SE df asymp.LCL asymp.UCL .group
## 5 1516 117 Inf 1245 1847 a
## 4 1562 125 Inf 1272 1919 a
## 2 1697 133 Inf 1388 2075 ab
## 3 1837 144 Inf 1503 2246 ab
## 1 2127 172 Inf 1727 2619 b
##
## Results are averaged over the levels of: qro
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 5 estimates
## Intervals are back-transformed from the log scale
## P value adjustment: tukey method for comparing a family of 5 estimates
## Tests are performed on the log scale
## significance level used: alpha = 0.05
## NOTE: If two or more means share the same grouping symbol,
## then we cannot show them to be different.
## But we also did not show them to be the same.
durtuk.means <- as.data.frame(durtuk.means)
dur_max <- duration %>%
group_by(treatment) %>%
summarize(maxdur = max((duration)))
dur_for_plotting <- full_join(durtuk.means, dur_max,
by="treatment")
dur_for_plotting$maxse <- dur_for_plotting$response + dur_for_plotting$SE
ggplot(data = dur.sum, aes(x = treatment, y = cdur.m, fill = treatment)) +
geom_col(col = "black")+
coord_cartesian(ylim=c(5, 53)) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
geom_errorbar(aes(ymin = cdur.m-sedur,
ymax = cdur.m+sedur),
position = position_dodge(0.9), width = 0.4) +
labs(y = "Days",) +
ggtitle("Average Colony Duration per Treatment") +
scale_x_discrete(name = "Treatment",
labels = c("0 PPB", "150 PPB", "1,500 PPB", "15,000 PPB", "150,000 PPB")) +
theme_cowplot()+
theme_classic(base_size = 30) +
theme(legend.position = "none") +
annotate(geom = "text",
x = 1, y = 51.5,
label = "P < 0.05",
size = 12) +
annotate(geom = "text",
x = c(5, 4, 2, 3, 1),
y = c(45.5, 41.7, 44, 47, 48.3),
label = c("a", "a", "ab", "ab", "b"),
size = 12) +
theme(legend.position = "none")
dur_for_plotting
## treatment response SE df asymp.LCL asymp.UCL maxdur maxse
## 1 2127 172 Inf 1727 2619 58 2299
## 2 1697 133 Inf 1388 2075 47 1830
## 3 1837 144 Inf 1503 2246 55 1981
## 4 1562 125 Inf 1272 1919 46 1687
## 5 1516 117 Inf 1245 1847 58 1633
##
## Results are averaged over the levels of: qro
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 5 estimates
## Intervals are back-transformed from the log scale
brood.for.merge <- subset(brood, select = c(colony, brood_cells, honey_pot, eggs, dead_larvae, live_larvae, dead_pupae, live_pupae, dead_drones, live_drones, drones))
merged_df <- merge(trunc, brood.for.merge, by = "colony", all = TRUE)
emerge <- subset(merged_df, select = c(emerge, treatment, whole.mean, alive, qro, colony, count, brood_cells, honey_pot, eggs, dead_larvae, live_larvae, dead_pupae, live_pupae, dead_drones, live_drones, drones))
ggplot(emerge, aes(x=emerge, fill = treatment)) +
geom_histogram(position = "identity", binwidth = 1 ,col=I("black")) +
scale_fill_manual(values = c("peachpuff3", "darkseagreen", "lightseagreen", "darkolivegreen3", "darkolivegreen4"),
name = "Pristine Level",
labels = c("Treatment 1 (control)", "Treatment 2",
"Treatment 3", "Treatment 4", "Treatment 5")) +
ggtitle("Days Until First Drone Emerged") +
labs(y = "Count", x = "Days")
library(stargazer)
library(censReg)
# Create a new variable indicating censoring
emerge$censored <- ifelse(is.na(emerge$emerge), 1, 0)
# Set censoring limits
emerge$lower_limit <- ifelse(is.na(emerge$emerge), 0, emerge$emerge)
emerge$upper_limit <- ifelse(is.na(emerge$emerge), Inf, emerge$emerge)
model <- censReg(lower_limit ~ treatment + whole.mean + qro + alive, data = emerge, left = 0, right = Inf)
stargazer(model, type = "text")
##
## ===============================================
## Dependent variable:
## ---------------------------
## lower_limit
## -----------------------------------------------
## treatment2 -2.316
## (3.466)
##
## treatment3 0.272
## (3.511)
##
## treatment4 -1.493
## (3.447)
##
## treatment5 -3.369
## (3.642)
##
## whole.mean -9.527
## (7.623)
##
## qroB3 -2.794
## (3.737)
##
## qroB4 3.949
## (4.328)
##
## qroB5 9.938***
## (2.978)
##
## alive 8.498***
## (1.040)
##
## logSigma 1.957***
## (0.115)
##
## Constant 0.257
## (4.785)
##
## -----------------------------------------------
## Observations 45
## Log Likelihood -139.010
## Akaike Inf. Crit. 300.019
## Bayesian Inf. Crit. 319.892
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
mod1 <- lm(lower_limit ~ treatment + whole.mean + qro + alive, data = emerge)
stargazer(model, mod1, type = "text")
##
## =====================================================
## Dependent variable:
## ---------------------------------
## lower_limit
## censored OLS
## regression
## (1) (2)
## -----------------------------------------------------
## treatment2 -2.316 -1.891
## (3.466) (3.501)
##
## treatment3 0.272 0.724
## (3.511) (3.577)
##
## treatment4 -1.493 -1.319
## (3.447) (3.470)
##
## treatment5 -3.369 -2.278
## (3.642) (3.560)
##
## whole.mean -9.527 -10.233
## (7.623) (7.755)
##
## qroB3 -2.794 -1.810
## (3.737) (3.652)
##
## qroB4 3.949 3.542
## (4.328) (4.278)
##
## qroB5 9.938*** 8.750***
## (2.978) (2.990)
##
## alive 8.498*** 7.379***
## (1.040) (0.918)
##
## logSigma 1.957***
## (0.115)
##
## Constant 0.257 5.596
## (4.785) (4.230)
##
## -----------------------------------------------------
## Observations 45 45
## R2 0.722
## Adjusted R2 0.651
## Log Likelihood -139.010
## Akaike Inf. Crit. 300.019
## Bayesian Inf. Crit. 319.892
## Residual Std. Error 7.314 (df = 35)
## F Statistic 10.110*** (df = 9; 35)
## =====================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
AIC(model)
## 'log Lik.' 300.019 (df=11)
AIC(mod1)
## [1] 317.4738