#Trying to find FTD is tricky for weeks 9 and 10, because week '9' was collected after 11 days, and week '10' was collected only 2 days after that
#We can't add '9' and '10' and call it week 10, because they were collected at different time intervals
#We can add flies caught on collection week '9' to '10', and divide the total by the total number of days elapsed between collection week '8' and '10' which is 13 days.
#Add weeks '9' and '10' flies together per trap
#Read in combined dataset
mp2 <- read_excel("MAO_multilurecollections_2024(combinedwk10).xlsx",
col_types = c("numeric", "numeric", "skip",
"numeric", "text", "text", "numeric",
"numeric", "numeric", "numeric", "numeric",
"numeric", "numeric", "numeric",
"numeric", "numeric"))
## Warning: Expecting numeric in G34 / R34C7: got 'NA'
## Warning: Expecting numeric in H34 / R34C8: got 'NA'
## Warning: Expecting numeric in I34 / R34C9: got 'NA'
## Warning: Expecting numeric in J34 / R34C10: got 'NA'
## Warning: Expecting numeric in K34 / R34C11: got 'NA'
## Warning: Expecting numeric in L34 / R34C12: got 'NA'
## Warning: Expecting numeric in M34 / R34C13: got 'NA'
## Warning: Expecting numeric in N34 / R34C14: got 'NA'
## Warning: Expecting numeric in O34 / R34C15: got 'NA'
## Warning: Expecting numeric in P34 / R34C16: got 'NA'
mp2$`multilure #` <- as.factor(mp2$`multilure #`)
mp2<- mp2 %>% rename(trap = `multilure #`)
mp2 <-mp2 %>% rename(total = `total mel`)
mp2 <- mp2 %>% filter(!is.na(total))
melon2 <- mp2[, c("trap", "week", "days_out", "total")]
#NOw we can find the flies caught per trap daily, based on the collection interval
melon2$FTD <- melon2$total / melon2$days_out
#Finding number of flies caught per trap daily, given the time between collection periods
melon_summary <- melon2 %>%
group_by(week) %>%
summarise(
n=n(),
total_flies = sum(total, na.rm =TRUE),
mean=mean(total, na.rm = TRUE),
se=sd(total, na.rm = TRUE)/sqrt(n),
meanFTD= mean(FTD, na.rm = TRUE),
seFTD = sd(FTD, na.rm = TRUE)/sqrt(n))
#total flies captured
ggplot(melon_summary, aes(x = week, y = mean)) + geom_point()+ geom_line() +
labs(title = "Mean flies caught weekly",
x = "Week",
y = "Mean MF caught")+
scale_y_continuous(limits = c(0, (max(melon_summary$mean) + max(melon_summary$se)))) + scale_x_continuous(limits = c(1, 10), breaks = seq(1, 10, by = 1)) +
theme_clean()
###FTD whole plot bar graph:
melon_summary$week <- as.factor(melon_summary$week)
ggplot(melon_summary, aes(x=week, y=meanFTD)) +geom_bar(stat="identity",position="dodge")+
geom_errorbar(aes(ymin = meanFTD-seFTD, ymax = meanFTD+seFTD),
position = position_dodge(width = 0.9), width = 0.2)+
labs(title = "Average Flies Caught per Trap Daily- Tomato plot 2024",
x = "Week",
y = " Mean Flies per trap/per day (FTD)") + theme_clean()
ggplot(melon_summary, aes(x=week, y=mean)) +geom_bar(stat="identity",position="dodge")+
geom_errorbar(aes(ymin = mean-se, ymax = mean+se),
position = position_dodge(width = 0.9), width = 0.2)+
labs(title = "Average Flies Caught per Week- Tomato plot 2024",
x = "Week",
y = " Mean Flies per Week") + theme_clean()
library(lme4)
#Poisson has to be fed integers, so we can scale the count data by time to collect in the model, but we have to put the whole counts in the model.We do this by adding (offset(days_out)), except, Poisson makes a log model, so we want to take the log(offset(days_out)).
#Examples drawn from multiple tutorials to try to figure out how to make this model.
#From Statistical modeling in R 2017
#glmer() can handle repeated measures and gives us an AIC so we can compare model fits.
#Don't forget to set week as a factor, as a continuous variable it doesn't make a binned distribution.
melon2$week <- as.factor(melon2$week)
glmm <- glmer(total ~ week + (1 | trap) + offset(log(days_out)), data = melon2, family = poisson)
summary(glmm)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: total ~ week + (1 | trap) + offset(log(days_out))
## Data: melon2
##
## AIC BIC logLik deviance df.resid
## 961.1 983.8 -470.6 941.1 61
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -8.6291 -1.9141 -0.2859 1.3812 7.8239
##
## Random effects:
## Groups Name Variance Std.Dev.
## trap (Intercept) 0.3482 0.5901
## Number of obs: 71, groups: trap, 8
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.46707 0.21205 11.634 < 2e-16 ***
## week2 -0.30062 0.05493 -5.473 4.42e-08 ***
## week3 -0.25813 0.05427 -4.757 1.97e-06 ***
## week4 -1.15905 0.06973 -16.622 < 2e-16 ***
## week5 -1.64911 0.10005 -16.484 < 2e-16 ***
## week6 -2.81369 0.16200 -17.368 < 2e-16 ***
## week7 -2.81997 0.14185 -19.880 < 2e-16 ***
## week8 -3.00606 0.17759 -16.927 < 2e-16 ***
## week10 -3.53809 0.15829 -22.352 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) week2 week3 week4 week5 week6 week7 week8
## week2 -0.110
## week3 -0.112 0.431
## week4 -0.087 0.335 0.339
## week5 -0.063 0.234 0.236 0.184
## week6 -0.037 0.144 0.146 0.114 0.079
## week7 -0.043 0.165 0.167 0.130 0.090 0.056
## week8 -0.034 0.132 0.133 0.104 0.072 0.045 0.051
## week10 -0.038 0.148 0.149 0.116 0.081 0.050 0.057 0.046
### AIC: 961.1
Anova(glmm)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: total
## Chisq Df Pr(>Chisq)
## week 1618.3 8 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#For poisson models, we should check for overdispersion. Overdispersion is when more variance is present than expected by the model. We pretty much know it will be overdispersed, because we didn't take into account treatment. We were just looking at the change over time. We asked, is there a change over time? And there is, but treatment is also a factor in change over time.
library(ggResidpanel)
resid_xpanel(glmm, type = "deviance")
#Overdispersed?
library(performance)
check_overdispersion(glmm)
## # Overdispersion test
##
## dispersion ratio = 9.812
## Pearson's Chi-Squared = 598.519
## p-value = < 0.001
## Overdispersion detected.
#It is of course, over dispersed and the AIC is high, the model fit isn't very good without the before and after treatment factor.
#include in the data again to include another factor called "treat" which is "0" before of "1" after treatment
melon3 <- mp2[, c("trap", "week", "days_out", "treat", "total")]
melon3$week <- as.factor(melon3$week)
melon3$treat <- as.factor(melon3$treat)
str(melon3)
## tibble [71 × 5] (S3: tbl_df/tbl/data.frame)
## $ trap : Factor w/ 8 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 1 2 ...
## $ week : Factor w/ 9 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 2 2 ...
## $ days_out: num [1:71] 7 7 7 7 7 7 7 7 7 7 ...
## $ treat : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ total : num [1:71] 234 19 223 145 67 21 35 34 112 27 ...
glmm1 <- glmer(total ~ week + treat + (1 | trap) + offset(log(days_out)), data = melon3, family = poisson)
## fixed-effect model matrix is rank deficient so dropping 1 column / coefficient
summary(glmm1) #fixed-effect model matrix is rank deficient so dropping 8 columns / coefficients
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: total ~ week + treat + (1 | trap) + offset(log(days_out))
## Data: melon3
##
## AIC BIC logLik deviance df.resid
## 961.1 983.8 -470.6 941.1 61
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -8.6291 -1.9141 -0.2859 1.3812 7.8239
##
## Random effects:
## Groups Name Variance Std.Dev.
## trap (Intercept) 0.3482 0.5901
## Number of obs: 71, groups: trap, 8
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.46707 0.21205 11.634 < 2e-16 ***
## week2 -0.30062 0.05493 -5.473 4.42e-08 ***
## week3 -0.25813 0.05427 -4.757 1.97e-06 ***
## week4 -1.15905 0.06973 -16.622 < 2e-16 ***
## week5 -1.64911 0.10005 -16.484 < 2e-16 ***
## week6 -2.81369 0.16200 -17.368 < 2e-16 ***
## week7 -2.81997 0.14185 -19.880 < 2e-16 ***
## week8 -3.00606 0.17759 -16.927 < 2e-16 ***
## week10 -3.53809 0.15829 -22.352 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) week2 week3 week4 week5 week6 week7 week8
## week2 -0.110
## week3 -0.112 0.431
## week4 -0.087 0.335 0.339
## week5 -0.063 0.234 0.236 0.184
## week6 -0.037 0.144 0.146 0.114 0.079
## week7 -0.043 0.165 0.167 0.130 0.090 0.056
## week8 -0.034 0.132 0.133 0.104 0.072 0.045 0.051
## week10 -0.038 0.148 0.149 0.116 0.081 0.050 0.057 0.046
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 1 column / coefficient
#This "fixed-effect" error often occurs when factors are too highly correlated. Week and treat are correlated lol, So we can stick with just using week.
#Use a negative binomial model from MASS package
library(MASS)
glmm.nb <- glmer.nb(total ~ week + (1 | trap) + offset(log(days_out)), data = melon3)
summary(glmm.nb)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: Negative Binomial(2.3121) ( log )
## Formula: total ~ week + (1 | trap) + offset(log(days_out))
## Data: melon3
##
## AIC BIC logLik deviance df.resid
## 566.4 591.3 -272.2 544.4 60
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.3056 -0.7311 -0.1793 0.3989 3.2471
##
## Random effects:
## Groups Name Variance Std.Dev.
## trap (Intercept) 0.2552 0.5052
## Number of obs: 71, groups: trap, 8
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.42696 0.30128 8.056 7.91e-16 ***
## week2 -0.22796 0.33612 -0.678 0.497641
## week3 -0.07975 0.34215 -0.233 0.815697
## week4 -1.13379 0.34061 -3.329 0.000873 ***
## week5 -1.59155 0.36740 -4.332 1.48e-05 ***
## week6 -2.86350 0.37718 -7.592 3.15e-14 ***
## week7 -2.68905 0.37260 -7.217 5.32e-13 ***
## week8 -2.94580 0.39435 -7.470 8.02e-14 ***
## week10 -3.45352 0.38176 -9.046 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) week2 week3 week4 week5 week6 week7 week8
## week2 -0.563
## week3 -0.578 0.496
## week4 -0.561 0.493 0.494
## week5 -0.541 0.469 0.483 0.471
## week6 -0.500 0.435 0.442 0.444 0.415
## week7 -0.533 0.452 0.473 0.462 0.443 0.415
## week8 -0.508 0.437 0.448 0.437 0.421 0.381 0.421
## week10 -0.520 0.449 0.460 0.445 0.428 0.393 0.430 0.433
### AIC: 566.4 better than the previous two model
#Check overdispersion
resid_xpanel(glmm, type = "deviance")
resid_xpanel(glmm, type = "pearson")
check_overdispersion(glmm.nb)
## # Overdispersion test
##
## dispersion ratio = 0.662
## p-value = 0.824
## No overdispersion detected.
#None detected we have a winner!
#Look at ANOVA again
Anova(glmm.nb)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: total
## Chisq Df Pr(>Chisq)
## week 203.54 8 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Step 1:Use emmeans to estimate marginal means of the 'week' variable
emm_week <- emmeans(glmm.nb, ~ week)
# View the results
summary(emm_week) #The estimated marginal means are the estimate for total, based on the individual traps present (mean/traps) given the offset (time inteval). So, its the estimated FTD!
## week emmean SE df asymp.LCL asymp.UCL
## 1 4.47 0.301 Inf 3.875 5.06
## 2 4.24 0.300 Inf 3.650 4.82
## 3 4.39 0.298 Inf 3.801 4.97
## 4 3.33 0.303 Inf 2.738 3.92
## 5 2.87 0.325 Inf 2.236 3.51
## 6 1.60 0.346 Inf 0.924 2.28
## 7 1.78 0.332 Inf 1.126 2.43
## 8 1.52 0.354 Inf 0.825 2.21
## 10 1.01 0.342 Inf 0.342 1.68
##
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
#Also,the model is predicting the logarithm of the expected count of total, not the raw count itself. Because the model is negative binomial which uses a logit scale.
plot(emm_week)
#Sidak adjustment is another method for controlling the family-wise error rate but is more appropriate when you’re dealing with multiple comparisons across multiple groups or comparisons with a more complex structure (like your model, which might involve random effects). The Sidak adjustment is slightly less conservative than Tukey's, but it still helps to control for Type I errors when making multiple comparisons.
pair_results <- cld(emm_week, alpha = 0.05, Letters = letters, adjust = "sidak")
# Step 3: Convert the CLD results to a data frame (if not already)
pair_df <- as.data.frame(pair_results)
str(pair_df)
## Classes 'summary_emm' and 'data.frame': 9 obs. of 7 variables:
## $ week : Factor w/ 9 levels "1","2","3","4",..: 9 8 6 7 5 4 2 3 1
## $ emmean : num 1.01 1.52 1.6 1.78 2.87 ...
## $ SE : num 0.342 0.354 0.346 0.332 0.325 ...
## $ df : num Inf Inf Inf Inf Inf ...
## $ asymp.LCL: num 0.0662 0.539 0.6456 0.8591 1.9734 ...
## $ asymp.UCL: num 1.96 2.5 2.56 2.69 3.77 ...
## $ .group : chr " a " " a " " ab " " ab " ...
## - attr(*, "estName")= chr "emmean"
## - attr(*, "clNames")= chr [1:2] "asymp.LCL" "asymp.UCL"
## - attr(*, "pri.vars")= chr "week"
## - attr(*, "adjust")= chr "sidak"
## - attr(*, "side")= num 0
## - attr(*, "delta")= num 0
## - attr(*, "type")= chr "link"
## - attr(*, "mesg")= chr [1:7] "Results are given on the log (not the response) scale." "Confidence level used: 0.95" "Conf-level adjustment: sidak method for 9 estimates" "Results are given on the log (not the response) scale." ...
## - attr(*, "linkname")= chr "log"
## - attr(*, "digits")= int 7
summary(pair_df)
## week emmean SE df asymp.LCL asymp.UCL .group
## 10 1.011541 0.3418284 Inf 0.066205 1.956878 a
## 8 1.519265 0.3544554 Inf 0.539008 2.499522 a
## 6 1.601570 0.3456681 Inf 0.645615 2.557525 ab
## 7 1.776020 0.3315549 Inf 0.859095 2.692945 ab
## 5 2.873515 0.3254934 Inf 1.973353 3.773677 bc
## 4 3.331276 0.3027500 Inf 2.494012 4.168540 cd
## 2 4.237107 0.2995478 Inf 3.408699 5.065516 de
## 3 4.385317 0.2979315 Inf 3.561379 5.209256 de
## 1 4.465066 0.3012766 Inf 3.631876 5.298255 e
##
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 9 estimates
## P value adjustment: sidak method for 36 tests
## significance level used: alpha = 0.05
## NOTE: If two or more means share the same grouping symbol,
## then we cannot show them to be different.
## But we also did not show them to be the same.
ggplot(pair_df, aes(x = interaction(week), # Week on x-axis
y = emmean, # Estimated marginal means on y-axis
color = .group, # Grouping based on CLD letters
group = .group)) + # Group by unique identifier for lines
# Add points for the means
geom_point(shape = 15, size = 4, position = position_dodge(0.5)) +
# Add error bars (for example, confidence intervals)
geom_errorbar(aes(ymin = asymp.LCL, ymax = asymp.UCL), width = 0.2, position = position_dodge(0.5)) +
# Set axis labels and plot title
labs(x = "Week", y = "log(Estimated Mean)", title = "Estimated Marginal Means for Week and mean flies per week") +
# Use a clean theme for the plot
theme_bw() +
# Rotate x-axis labels for better readability (optional)
theme(axis.text.x = element_text(angle = 45, hjust = 1))
#Use a negative binomial model from MASS package
library(MASS)
glmm.nbt <- glmer.nb(total ~ treat + (1 | trap) + offset(log(days_out)), data = melon3)
summary(glmm.nbt)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: Negative Binomial(0.6534) ( log )
## Formula: total ~ treat + (1 | trap) + offset(log(days_out))
## Data: melon3
##
## AIC BIC logLik deviance df.resid
## 626.2 635.2 -309.1 618.2 67
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.7962 -0.6963 -0.4279 0.2283 4.1537
##
## Random effects:
## Groups Name Variance Std.Dev.
## trap (Intercept) 0.03631 0.1905
## Number of obs: 71, groups: trap, 8
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.4490 0.3434 7.132 9.87e-13 ***
## treat1 -1.4135 0.3598 -3.929 8.55e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## treat1 -0.844
### AIC: 568.8 is slightly worse than the week model
Anova(glmm.nbt)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: total
## Chisq Df Pr(>Chisq)
## treat 15.434 1 8.546e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Check overdispersion
check_overdispersion(glmm.nbt)
## # Overdispersion test
##
## dispersion ratio = 0.603
## p-value = 0.664
## No overdispersion detected.
#Try to look at pairwise comparisons
## Step 1:Use emmeans to estimate marginal means of the 'week' variable
emm_week2 <- emmeans(glmm.nbt, ~ treat)
# View the results
summary(emm_week2)
## treat emmean SE df asymp.LCL asymp.UCL
## 0 4.49 0.343 Inf 3.81 5.16
## 1 3.07 0.197 Inf 2.69 3.46
##
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
plot(emm_week2)
# Step 2: Conduct post-hoc comparisons with Tukey adjustment and add groupings (letters)
pair_results2 <- cld(emm_week2, alpha = 0.05, Letters = letters, adjust = "sidak")
# Step 3: Convert the CLD results to a data frame (if not already)
pair_df2 <- as.data.frame(pair_results2)
str(pair_df2)
## Classes 'summary_emm' and 'data.frame': 2 obs. of 7 variables:
## $ treat : Factor w/ 2 levels "0","1": 2 1
## $ emmean : num 3.07 4.49
## $ SE : num 0.197 0.343
## $ df : num Inf Inf
## $ asymp.LCL: num 2.63 3.72
## $ asymp.UCL: num 3.51 5.26
## $ .group : chr " a " " b"
## - attr(*, "estName")= chr "emmean"
## - attr(*, "clNames")= chr [1:2] "asymp.LCL" "asymp.UCL"
## - attr(*, "pri.vars")= chr "treat"
## - attr(*, "adjust")= chr "sidak"
## - attr(*, "side")= num 0
## - attr(*, "delta")= num 0
## - attr(*, "type")= chr "link"
## - attr(*, "mesg")= chr [1:6] "Results are given on the log (not the response) scale." "Confidence level used: 0.95" "Conf-level adjustment: sidak method for 2 estimates" "Results are given on the log (not the response) scale." ...
## - attr(*, "linkname")= chr "log"
## - attr(*, "digits")= int 7
summary(pair_df2)
## treat emmean SE df asymp.LCL asymp.UCL .group
## 1 3.073625 0.1973065 Inf 2.632354 3.514897 a
## 0 4.487108 0.3433704 Inf 3.719168 5.255048 b
##
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 2 estimates
## significance level used: alpha = 0.05
## NOTE: If two or more means share the same grouping symbol,
## then we cannot show them to be different.
## But we also did not show them to be the same.
# Step 4: Create a ggplot to visualize the estimated marginal means and groupings
# Make sure the data frame contains the necessary variables: 'Instruction', 'Month', 'estimate', 'cld', 'group'
ggplot(pair_df2, aes(x = interaction(treat), # Week on x-axis
y = emmean, # Estimated marginal means on y-axis
color = .group, # Grouping based on CLD letters
group = .group)) + # Group by unique identifier for lines
# Add points for the means
geom_point(shape = 15, size = 4, position = position_dodge(0.5)) +
# Add error bars (for example, confidence intervals)
geom_errorbar(aes(ymin = asymp.LCL, ymax = asymp.UCL), width = 0.2, position = position_dodge(0.5)) +
# Set axis labels and plot title
labs(x = "Week", y = "log(Estimated Mean)", title = "Estimated Marginal Means for treatment on mean flies per week") +
# Use a clean theme for the plot
theme_bw() +
# Rotate x-axis labels for better readability (optional)
theme(axis.text.x = element_text(angle = 45, hjust = 1))
#Model to compare proportion of female to male flies caught over time.
str(mp2)
## tibble [71 × 15] (S3: tbl_df/tbl/data.frame)
## $ trap : Factor w/ 8 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 1 2 ...
## $ week : num [1:71] 1 1 1 1 1 1 1 1 2 2 ...
## $ days_out : num [1:71] 7 7 7 7 7 7 7 7 7 7 ...
## $ treat : chr [1:71] "0" "0" "0" "0" ...
## $ block : chr [1:71] "A" "A" "A" "A" ...
## $ m_mel : num [1:71] 55 9 68 72 24 4 12 24 42 14 ...
## $ f_mel : num [1:71] 65 10 48 73 43 17 23 10 70 13 ...
## $ xy_mel : num [1:71] 114 0 107 0 0 0 0 0 0 0 ...
## $ total : num [1:71] 234 19 223 145 67 21 35 34 112 27 ...
## $ m_ori : num [1:71] 2 1 2 0 0 0 0 0 2 1 ...
## $ f_ori : num [1:71] 2 2 2 1 3 0 0 0 2 4 ...
## $ total ori: num [1:71] 4 3 4 1 3 0 0 0 4 5 ...
## $ m_med : num [1:71] 0 0 0 0 0 0 0 0 0 0 ...
## $ f_med : num [1:71] 0 0 0 0 0 0 0 0 0 0 ...
## $ total med: num [1:71] 0 0 0 0 0 0 0 0 0 0 ...
melon4 <- mp2[, c("trap", "week", "days_out", "treat", "m_mel", "f_mel")]
melon4$treat <- as.factor(melon4$treat)
str(melon4)
## tibble [71 × 6] (S3: tbl_df/tbl/data.frame)
## $ trap : Factor w/ 8 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 1 2 ...
## $ week : num [1:71] 1 1 1 1 1 1 1 1 2 2 ...
## $ days_out: num [1:71] 7 7 7 7 7 7 7 7 7 7 ...
## $ treat : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ m_mel : num [1:71] 55 9 68 72 24 4 12 24 42 14 ...
## $ f_mel : num [1:71] 65 10 48 73 43 17 23 10 70 13 ...
#Add in proportion variables
melon4$sum <- melon4$f_mel + melon4$m_mel
melon4$propf <- melon4$f_mel/melon4$sum
melon4$propm <- melon4$m_mel/melon4$sum
melon4$week <- as.factor(melon4$week)
str(melon4)
## tibble [71 × 9] (S3: tbl_df/tbl/data.frame)
## $ trap : Factor w/ 8 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 1 2 ...
## $ week : Factor w/ 9 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 2 2 ...
## $ days_out: num [1:71] 7 7 7 7 7 7 7 7 7 7 ...
## $ treat : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ m_mel : num [1:71] 55 9 68 72 24 4 12 24 42 14 ...
## $ f_mel : num [1:71] 65 10 48 73 43 17 23 10 70 13 ...
## $ sum : num [1:71] 120 19 116 145 67 21 35 34 112 27 ...
## $ propf : num [1:71] 0.542 0.526 0.414 0.503 0.642 ...
## $ propm : num [1:71] 0.458 0.474 0.586 0.497 0.358 ...
#Visualize data
# Summarize data by week
melon_summary4 <- melon4 %>%
group_by(week) %>%
summarize(
total_flies = sum(sum), # Total count of flies for each week
total_males = sum(m_mel), # Total count of males for each week
total_females = sum(f_mel) # Total count of females for each week
) %>%
mutate(
prop_males = total_males / total_flies, # Proportion of males
prop_females = total_females / total_flies # Proportion of females
)
# Create the stacked pyramid plot
ggplot(melon_summary4, aes(x = week)) +
geom_bar(aes(y = -prop_females, fill = "Female"), stat = "identity", width = 0.6) +
geom_bar(aes(y = prop_males, fill = "Male"), stat = "identity", width = 0.6) +
geom_text(aes(y = 0, label = paste("n =", total_flies)), vjust = 0.5, size = 5) +
# Flip the axes for the pyramid effect
coord_flip() +
labs(x = "Week", y = "Proportion of Flies",
title = "Proportion of Male and Female Flies by Week") +
scale_fill_manual(values = c("Male" = "lightblue", "Female" = "pink")) +
theme_clean() +
theme(legend.position = "bottom")
#We feed the model a matrix of number of males and number of females coutned per trap per week.
#We still have the offset term, and the trap as the random effect
glmm.sex <- glmer(cbind(m_mel, f_mel) ~ week + (1 | trap) + offset(log(days_out)), data = melon4, family = binomial)
summary(glmm.sex)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: cbind(m_mel, f_mel) ~ week + (1 | trap) + offset(log(days_out))
## Data: melon4
##
## AIC BIC logLik deviance df.resid
## 386.4 409.0 -183.2 366.4 61
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.583 -1.206 0.000 0.949 3.025
##
## Random effects:
## Groups Name Variance Std.Dev.
## trap (Intercept) 0.142 0.3769
## Number of obs: 71, groups: trap, 8
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.01823 0.16147 -12.499 < 2e-16 ***
## week2 0.03109 0.12164 0.256 0.798287
## week3 0.61910 0.12374 5.003 5.64e-07 ***
## week4 -0.03556 0.14927 -0.238 0.811683
## week5 0.86604 0.22522 3.845 0.000120 ***
## week6 0.55302 0.33579 1.647 0.099576 .
## week7 1.04833 0.34343 3.052 0.002270 **
## week8 1.64326 0.46741 3.516 0.000439 ***
## week10 0.84636 0.41126 2.058 0.039593 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) week2 week3 week4 week5 week6 week7 week8
## week2 -0.381
## week3 -0.398 0.486
## week4 -0.311 0.411 0.404
## week5 -0.219 0.272 0.278 0.231
## week6 -0.127 0.167 0.175 0.146 0.096
## week7 -0.145 0.170 0.184 0.150 0.102 0.064
## week8 -0.110 0.137 0.140 0.110 0.077 0.039 0.053
## week10 -0.124 0.153 0.157 0.121 0.084 0.047 0.058 0.056
### AIC: 386.4
Anova(glmm.sex)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: cbind(m_mel, f_mel)
## Chisq Df Pr(>Chisq)
## week 62.797 8 1.314e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Check overdispersion
resid_xpanel(glmm.sex, type = "deviance")
resid_xpanel(glmm.sex, type = "pearson")
check_overdispersion(glmm.sex)
## # Overdispersion test
##
## dispersion ratio = 1.045
## p-value = 0.776
## No overdispersion detected.
#Overdispersion was not detected
## Step 1:Use emmeans to estimate marginal means of the 'week' variable
emm_week_sex <- emmeans(glmm.sex, ~ week)
# View the results
summary(emm_week_sex)#The estimated marginal means are the estimate for total, based on the individual traps present (mean/traps) given the offset (time inteval). So, its the estimated FTD!
## week emmean SE df asymp.LCL asymp.UCL
## 1 0.0199 0.161 Inf -0.2966 0.336
## 2 0.0510 0.161 Inf -0.2646 0.366
## 3 0.6390 0.160 Inf 0.3261 0.952
## 4 -0.0157 0.183 Inf -0.3737 0.342
## 5 0.8859 0.247 Inf 0.4024 1.369
## 6 0.5729 0.354 Inf -0.1202 1.266
## 7 1.0682 0.358 Inf 0.3671 1.769
## 8 1.6631 0.477 Inf 0.7274 2.599
## 10 0.8662 0.423 Inf 0.0376 1.695
##
## Results are given on the logit (not the response) scale.
## Confidence level used: 0.95
#Also,the model is predicting the logarithm of the expected count of total, not the raw count itself. Because the model is negative binomial which uses a logit scale.
plot(emm_week_sex)
###NOtice the smaller the sample size the larger the variation
#Sidak adjustment is another method for controlling the family-wise error rate but is more appropriate when you’re dealing with multiple comparisons across multiple groups or comparisons with a more complex structure (like your model, which might involve random effects). The Sidak adjustment is slightly less conservative than Tukey's, but it still helps to control for Type I errors when making multiple comparisons.
pair_results <- cld(emm_week_sex, alpha = 0.05, Letters = letters, adjust = "sidak")
# Step 3: Convert the CLD results to a data frame (if not already)
pair_df_sex <- as.data.frame(pair_results)
summary(pair_df_sex)
## week emmean SE df asymp.LCL asymp.UCL .group
## 4 -0.0156839 0.1826714 Inf -0.5208672 0.4894993 a
## 1 0.0198806 0.1614702 Inf -0.4266700 0.4664311 a
## 2 0.0509665 0.1609849 Inf -0.3942419 0.4961749 a
## 6 0.5729018 0.3536137 Inf -0.4050272 1.5508309 ab
## 3 0.6389757 0.1596426 Inf 0.1974794 1.0804719 b
## 10 0.8662363 0.4227614 Inf -0.3029229 2.0353954 ab
## 5 0.8859169 0.2466880 Inf 0.2036938 1.5681400 b
## 7 1.0682074 0.3577396 Inf 0.0788679 2.0575469 ab
## 8 1.6631369 0.4774456 Inf 0.3427470 2.9835268 b
##
## Results are given on the logit (not the response) scale.
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 9 estimates
## Results are given on the log odds ratio (not the response) scale.
## P value adjustment: sidak method for 36 tests
## significance level used: alpha = 0.05
## NOTE: If two or more means share the same grouping symbol,
## then we cannot show them to be different.
## But we also did not show them to be the same.
ggplot(pair_df_sex, aes(x = interaction(week), # Week on x-axis
y = emmean, # Estimated marginal means on y-axis
color = .group, # Grouping based on CLD letters
group = .group)) + # Group by unique identifier for lines
# Add points for the means
geom_point(shape = 15, size = 4, position = position_dodge(0.5)) +
# Add error bars (for example, confidence intervals)
geom_errorbar(aes(ymin = asymp.LCL, ymax = asymp.UCL), width = 0.2, position = position_dodge(0.5)) +
# Set axis labels and plot title
labs(x = "Week", y = "log(Proportion of males to females)", title = "Estimated Marginal Means for difference in sex proportion") +
# Use a clean theme for the plot
theme_bw() +
# Rotate x-axis labels for better readability (optional)
theme(axis.text.x = element_text(angle = 45, hjust = 1))
#Model to compare proportion of female to male flies caught over time.
str(mp2)
## tibble [71 × 15] (S3: tbl_df/tbl/data.frame)
## $ trap : Factor w/ 8 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 1 2 ...
## $ week : num [1:71] 1 1 1 1 1 1 1 1 2 2 ...
## $ days_out : num [1:71] 7 7 7 7 7 7 7 7 7 7 ...
## $ treat : chr [1:71] "0" "0" "0" "0" ...
## $ block : chr [1:71] "A" "A" "A" "A" ...
## $ m_mel : num [1:71] 55 9 68 72 24 4 12 24 42 14 ...
## $ f_mel : num [1:71] 65 10 48 73 43 17 23 10 70 13 ...
## $ xy_mel : num [1:71] 114 0 107 0 0 0 0 0 0 0 ...
## $ total : num [1:71] 234 19 223 145 67 21 35 34 112 27 ...
## $ m_ori : num [1:71] 2 1 2 0 0 0 0 0 2 1 ...
## $ f_ori : num [1:71] 2 2 2 1 3 0 0 0 2 4 ...
## $ total ori: num [1:71] 4 3 4 1 3 0 0 0 4 5 ...
## $ m_med : num [1:71] 0 0 0 0 0 0 0 0 0 0 ...
## $ f_med : num [1:71] 0 0 0 0 0 0 0 0 0 0 ...
## $ total med: num [1:71] 0 0 0 0 0 0 0 0 0 0 ...
ori <- mp2[, c("trap", "week", "days_out", "treat", "m_ori", "f_ori")]
ori$treat <- as.factor(ori$treat)
ori$week <- as.factor(ori$week)
ori$sum <- ori$f_ori + ori$m_ori
str(ori)
## tibble [71 × 7] (S3: tbl_df/tbl/data.frame)
## $ trap : Factor w/ 8 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 1 2 ...
## $ week : Factor w/ 9 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 2 2 ...
## $ days_out: num [1:71] 7 7 7 7 7 7 7 7 7 7 ...
## $ treat : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ m_ori : num [1:71] 2 1 2 0 0 0 0 0 2 1 ...
## $ f_ori : num [1:71] 2 2 2 1 3 0 0 0 2 4 ...
## $ sum : num [1:71] 4 3 4 1 3 0 0 0 4 5 ...
#Visualize data
# Summarize data by week
ori_summary <- ori %>%
group_by(week) %>%
summarize(
total_flies = sum(sum), # Total count of flies for each week
total_males = sum(m_ori), # Total count of males for each week
total_females = sum(f_ori) # Total count of females for each week
) %>%
mutate(
prop_males = total_males / total_flies, # Proportion of males
prop_females = total_females / total_flies # Proportion of females
)
# Create the stacked pyramid plot
ggplot(ori_summary, aes(x = week)) +
geom_bar(aes(y = -total_females, fill = "Female"), stat = "identity", width = 0.6) +
geom_bar(aes(y = total_males, fill = "Male"), stat = "identity", width = 0.6) +
geom_text(aes(y = 0, label = paste("n =", total_flies)), vjust = 0.5, size = 5) +
# Flip the axes for the pyramid effect
coord_flip() +
labs(x = "Week", y = "Count",
title = "Proportion of Male and Female Flies by Week") +
scale_fill_manual(values = c("Male" = "lightblue", "Female" = "pink")) +
theme_clean() +
theme(legend.position = "bottom")