load packages

library(ggplot2)
library(lme4)
## Warning: package 'lme4' was built under R version 4.4.2
## Loading required package: Matrix
library(lmerTest)
## Warning: package 'lmerTest' was built under R version 4.4.2
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(car) # for anova
## Warning: package 'car' was built under R version 4.4.2
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.2
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

set seed

set.seed(123)
# Get US map data
us_states <- map_data("state")

# Define colors for each highlighted state
us_states$highlight <- "normal"  # Default as normal (white)
us_states$highlight[us_states$region == "colorado"] <- "colorado"
us_states$highlight[us_states$region == "maryland"] <- "maryland"
us_states$highlight[us_states$region == "massachusetts"] <- "massachusetts"

# Define color mapping
state_colors <- c("colorado" = "orange", 
                  "maryland" = "mediumseagreen",  # Bluish green
                  "massachusetts" = "skyblue", 
                  "normal" = "white")

# Plot the map
ggplot(us_states, aes(x = long, y = lat, group = group, fill = highlight)) +
  geom_polygon(color = "black", size = 0.3) +  # Black borders for states
  scale_fill_manual(values = state_colors) +  # Set custom colors
  coord_fixed(1.3) +  # Keep aspect ratio correct
  theme_classic() +  # Apply classic theme
  theme(legend.position = "none",  # Remove legend
        axis.text = element_blank(), 
        axis.ticks = element_blank(), 
        axis.title = element_blank()) +
  labs(title = "US Map Highlighting Colorado, Maryland, and Massachusetts")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

set working directory

setwd("C:\\Users\\Owner\\OneDrive\\Home page photos\\Documents\\Manuscripts\\Hybrid FWW")

#upload

diapause <- read.csv("C:\\Users\\Owner\\OneDrive\\Home page photos\\Documents\\Manuscripts\\Hybrid FWW\\Ryan_FW Diapause summer 2024 - cleaned.csv")
head(diapause)
##   Year No. Mat.line Sex Color Location Host extraction.date sticker.color
## 1   24   6        1   M     R       CO   BC          24-May             5
## 2   24   5        1   M     R       CO   BC          24-May             5
## 3   24   4        1   M     R       CO   BC          24-May             5
## 4   24   4        1   F     R       CO  Mal          20-May             3
## 5   24   1        1   M     R       CO   BC          24-May             5
## 6   24   2        1   M     R       CO   BC          24-May             5
##   Emergence.date J_extract J_emergence diapuse_time Notes
## 1         17-Jun       145         169           24      
## 2         14-Jun       145         165           20      
## 3         13-Jun       145         165           20      
## 4         13-Jun       141         165           24      
## 5         12-Jun       145         164           19      
## 6         12-Jun       145         164           19
str(diapause)
## 'data.frame':    1916 obs. of  14 variables:
##  $ Year           : int  24 24 24 24 24 24 24 24 24 24 ...
##  $ No.            : int  6 5 4 4 1 2 3 2 3 1 ...
##  $ Mat.line       : chr  "1" "1" "1" "1" ...
##  $ Sex            : chr  "M" "M" "M" "F" ...
##  $ Color          : chr  "R" "R" "R" "R" ...
##  $ Location       : chr  "CO" "CO" "CO" "CO" ...
##  $ Host           : chr  "BC" "BC" "BC" "Mal" ...
##  $ extraction.date: chr  "24-May" "24-May" "24-May" "20-May" ...
##  $ sticker.color  : int  5 5 5 3 5 5 5 3 3 3 ...
##  $ Emergence.date : chr  "17-Jun" "14-Jun" "13-Jun" "13-Jun" ...
##  $ J_extract      : int  145 145 145 141 145 145 145 141 141 141 ...
##  $ J_emergence    : int  169 165 165 165 164 164 164 162 162 159 ...
##  $ diapuse_time   : chr  "24" "20" "20" "24" ...
##  $ Notes          : chr  "" "" "" "" ...

Code for Standard error

SE<- function(x) sqrt(var(x)/length(x))
diapause$diapuse_time<-as.integer(diapause$diapuse_time) #changing column to interger
## Warning: NAs introduced by coercion
#diapause<-diapause %>% drop_na(diapuse_time) # removing NA

diapause<-diapause[!is.na(diapause$diapuse_time), ] # removing NA

mean(diapause$diapuse_time)
## [1] 19.58021

subset DC

DC<-subset(diapause, Location == "DC")

subset by color and sex

DC.red<-subset(DC, Color == "R")
DC.red.M<-subset(DC.red, Sex == "M")
DC.red.F<-subset(DC.red, Sex == "F")

DC.black<-subset(DC, Color == "B")
DC.black.M<-subset(DC.black, Sex == "M")
DC.black.F<-subset(DC.black, Sex == "F")

Mean and Standard error

Red

print("DC Red -- mean, SE, N")
## [1] "DC Red -- mean, SE, N"
mean(DC.red$diapuse_time)
## [1] 30.93443
SE(DC.red$diapuse_time)
## [1] 1.168079
length(DC.red$diapuse_time)
## [1] 61
print("**************************************")
## [1] "**************************************"
print("DC Red male-- mean, SE, N")
## [1] "DC Red male-- mean, SE, N"
mean(DC.red.M$diapuse_time)
## [1] 26.27273
SE(DC.red.M$diapuse_time)
## [1] 2.000413
length(DC.red.M$diapuse_time)
## [1] 11
print("**************************************")
## [1] "**************************************"
print("DC Red female-- mean, SE, N")
## [1] "DC Red female-- mean, SE, N"
mean(DC.red.F$diapuse_time)
## [1] 31.96
SE(DC.red.F$diapuse_time)
## [1] 1.318923
length(DC.red.F$diapuse_time)
## [1] 50
print("**************************************")
## [1] "**************************************"
print("DC Black male-- mean, SE, N")
## [1] "DC Black male-- mean, SE, N"
mean(DC.black.M$diapuse_time)
## [1] 16.23926
SE(DC.black.M$diapuse_time)
## [1] 0.1278568
length(DC.black.M$diapuse_time)
## [1] 163
print("**************************************")
## [1] "**************************************"
print("DC Black female-- mean, SE, N")
## [1] "DC Black female-- mean, SE, N"
mean(DC.black.F$diapuse_time)
## [1] 16.875
SE(DC.black.F$diapuse_time)
## [1] 0.2946484
length(DC.black.F$diapuse_time)
## [1] 144

subset CO

CO<-subset(diapause, Location == "CO")

subset by sex

# all CO are red
CO.red.M<-subset(CO, Sex == "M")
CO.red.F<-subset(CO, Sex == "F")

Mean and Standard error

Red

print("CO Red -- mean, SE, N")
## [1] "CO Red -- mean, SE, N"
mean(CO$diapuse_time)
## [1] 22.375
SE(CO$diapuse_time)
## [1] 0.2179168
length(CO$diapuse_time)
## [1] 256
print("**************************************")
## [1] "**************************************"
print("CO Red male-- mean, SE, N")
## [1] "CO Red male-- mean, SE, N"
mean(CO.red.M$diapuse_time)
## [1] 21.48718
SE(CO.red.M$diapuse_time)
## [1] 0.2266555
length(CO.red.M$diapuse_time)
## [1] 117
print("**************************************")
## [1] "**************************************"
print("CO Red female-- mean, SE, N")
## [1] "CO Red female-- mean, SE, N"
mean(CO.red.F$diapuse_time)
## [1] 23.1223
SE(CO.red.F$diapuse_time)
## [1] 0.3411848
length(CO.red.F$diapuse_time)
## [1] 139

subset MA

MA<-subset(diapause, Location == "MA")

subset by color and sex

MA.red<-subset(MA, Color == "R")
MA.red.M<-subset(MA.red, Sex == "M")
MA.red.F<-subset(MA.red, Sex == "F")

MA.black<-subset(MA, Color == "B")
MA.black.M<-subset(MA.black, Sex == "M")
MA.black.F<-subset(MA.black, Sex == "F")

Mean and Standard error

Red

print("MA Red -- mean, SE, N")
## [1] "MA Red -- mean, SE, N"
mean(MA.red$diapuse_time)
## [1] NaN
SE(MA.red$diapuse_time)
## [1] NA
length(MA.red$diapuse_time)
## [1] 0
print("**************************************")
## [1] "**************************************"
print("MA Red male-- mean, SE, N")
## [1] "MA Red male-- mean, SE, N"
mean(MA.red.M$diapuse_time)
## [1] NaN
SE(MA.red.M$diapuse_time)
## [1] NA
length(MA.red.M$diapuse_time)
## [1] 0
print("**************************************")
## [1] "**************************************"
print("MA Red female-- mean, SE, N")
## [1] "MA Red female-- mean, SE, N"
mean(MA.red.F$diapuse_time)
## [1] NaN
SE(MA.red.F$diapuse_time)
## [1] NA
length(MA.red.F$diapuse_time)
## [1] 0
print("**************************************")
## [1] "**************************************"
print("MA Black male-- mean, SE, N")
## [1] "MA Black male-- mean, SE, N"
mean(MA.black.M$diapuse_time)
## [1] 16.02564
SE(MA.black.M$diapuse_time)
## [1] 0.176526
length(MA.black.M$diapuse_time)
## [1] 117
print("**************************************")
## [1] "**************************************"
print("MA Black female-- mean, SE, N")
## [1] "MA Black female-- mean, SE, N"
mean(MA.black.F$diapuse_time)
## [1] 17.13934
SE(MA.black.F$diapuse_time)
## [1] 0.2965994
length(MA.black.F$diapuse_time)
## [1] 122

Box plot for DC diapause

ggplot(DC, aes(x = Sex, y = diapuse_time, fill = Color)) +
  geom_boxplot() +
  scale_fill_manual(values = c( "grey", "brown3")) +  # Manually setting colors
  theme_classic() + 
  labs(title = "Diapause Emergence Times for sympatric DC Population by Color and Sex",
       x = "Color",
       y = "diapause_time")

Anova for Diapause differences in DC

anova_model.DC <- aov(diapuse_time ~ Color * Sex, data = DC)
summary(anova_model.DC)
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Color         1  10548   10548 554.705 <2e-16 ***
## Sex           1    117     117   6.139 0.0137 *  
## Color:Sex     1    206     206  10.823 0.0011 ** 
## Residuals   364   6922      19                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Post-hoc Test (If Interaction is Significant)

if (summary(anova_model.DC)[[1]][["Pr(>F)"]][3] < 0.05) {
  TukeyHSD(anova_model.DC)
}
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = diapuse_time ~ Color * Sex, data = DC)
## 
## $Color
##         diff      lwr      upr p adj
## R-B 14.39697 13.19488 15.59905     0
## 
## $Sex
##          diff       lwr        upr   p adj
## M-F -1.088918 -1.984268 -0.1935676 0.01728
## 
## $`Color:Sex`
##                diff        lwr         upr     p adj
## R:F-B:F  15.0850000  13.237595  16.9324054 0.0000000
## B:M-B:F  -0.6357362  -1.922864   0.6513918 0.5796077
## R:M-B:F   9.3977273   5.877134  12.9183206 0.0000000
## B:M-R:F -15.7207362 -17.540180 -13.9012926 0.0000000
## R:M-R:F  -5.6872727  -9.435373  -1.9391727 0.0006191
## R:M-B:M  10.0334635   6.527462  13.5394648 0.0000000

Model Assumptions Check

Normality of Residuals

par(mfrow = c(1, 2))
hist(residuals(anova_model.DC), main = "Histogram of Residuals", xlab = "Residuals")
qqnorm(residuals(anova_model.DC))
qqline(residuals(anova_model.DC), col = "red")

Homogeneity of Variance

plot(anova_model.DC, which = 1)

leveneTest(diapuse_time ~ Color * Sex, data = DC)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   3  48.715 < 2.2e-16 ***
##       364                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

subset reds

red<- subset(diapause, Color == "R")
black<-subset(diapause, Color == "B")
hybrid<-subset(diapause, Color == "H")

subset by sex

Female<-diapause[diapause$Sex != "F", ]
Male<-diapause[diapause$Sex != "M",]

*****************************************************

Reds

*****************************************************

Box plot for allopatric reds diapause emergence times

ggplot(red, aes(x = Sex, y = diapuse_time, fill = Location)) +
  geom_boxplot() +
  scale_fill_manual(values = c("CO" = "orange", "DC" = "mediumseagreen")) +  # Ensure colors match Location values
  theme_classic() + 
  theme(
   panel.background = element_rect(fill = "lightpink1", color = NA),  # Change panel background to light pink
    plot.background = element_rect(fill = "lightpink1", color = NA)    # Change entire plot background to light pink
  ) +
  labs(title = "Diapause Emergence Times for Allopatric Red Population (CO & DC) by Location and Sex",
       x = "Sex",
       y = "Diapause Time")

Anova for Diapause differences in Allopatric reds

anova_model.red <- aov(diapuse_time ~ Location * Sex, data = red)
summary(anova_model.red)
##               Df Sum Sq Mean Sq F value   Pr(>F)    
## Location       1   3609    3609 148.010  < 2e-16 ***
## Sex            1    332     332  13.609 0.000265 ***
## Location:Sex   1    130     130   5.317 0.021773 *  
## Residuals    313   7632      24                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Post-hoc Test (If Interaction is Significant)

if (summary(anova_model.red)[[1]][["Pr(>F)"]][3] < 0.05) {
  TukeyHSD(anova_model.red)
}
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = diapuse_time ~ Location * Sex, data = red)
## 
## $Location
##           diff      lwr      upr p adj
## DC-CO 8.559426 7.175128 9.943724     0
## 
## $Sex
##          diff       lwr        upr     p adj
## M-F -2.033057 -3.145246 -0.9208682 0.0003744
## 
## $`Location:Sex`
##                 diff         lwr         upr     p adj
## DC:F-CO:F   8.837698   6.7344141 10.94098156 0.0000000
## CO:M-CO:F  -1.635123  -3.2353402 -0.03490518 0.0430909
## DC:M-CO:F   3.150425  -0.8444320  7.14528220 0.1767219
## CO:M-DC:F -10.472821 -12.6277827 -8.31785828 0.0000000
## DC:M-DC:F  -5.687273  -9.9348661 -1.43967937 0.0034481
## DC:M-CO:M   4.785548   0.7632421  8.80785346 0.0122878

Model Assumptions Check

Normality of Residuals

par(mfrow = c(1, 2))
hist(residuals(anova_model.red), main = "Histogram of Residuals", xlab = "Residuals")
qqnorm(residuals(anova_model.red))
qqline(residuals(anova_model.red), col = "red")

Homogeneity of Variance

plot(anova_model.red, which = 1)

leveneTest(diapuse_time ~ Location * Sex, data = red)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value    Pr(>F)    
## group   3  33.422 < 2.2e-16 ***
##       313                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

*****************************************************

black

*****************************************************

table(red$Location, red$Sex)
##     
##        F   M
##   CO 139 117
##   DC  50  11

remove extra data

black <- black %>%
  mutate(Location = trimws(Location)) %>%  # Remove leading/trailing spaces
  filter(Location %in% c("MA", "DC")) %>%  # Keep only valid values
  droplevels()  # Drop any unused factor levels

Box plot for allopatric black diapause emergence times

ggplot(black, aes(x = Sex, y = diapuse_time, fill = Location)) +
  geom_boxplot() +
  scale_fill_manual(values = c("MA" = "skyblue", "DC" = "mediumseagreen")) +  # Ensure colors match Location values
  theme_classic() + 
  theme(
    panel.background = element_rect(fill = "grey70", color = NA),  # Change panel background to light pink
    plot.background = element_rect(fill = "grey70", color = NA)    # Change entire plot background to light pink
  ) +
  labs(title = "Diapause Emergence Times for Allopatric black Population (MA & DC) by Location and Sex",
       x = "Sex",
       y = "Diapause Time")

Anova for Diapause differences in Allopatric reds

anova_model.black <- aov(diapuse_time ~ Location * Sex, data = black)
summary(anova_model.black)
##               Df Sum Sq Mean Sq F value   Pr(>F)    
## Location       1      1    1.01   0.139 0.708933    
## Sex            1    104  103.64  14.372 0.000167 ***
## Location:Sex   1      6    6.36   0.882 0.348036    
## Residuals    548   3952    7.21                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Post-hoc Test (If Interaction is Significant)

if (summary(anova_model.black)[[1]][["Pr(>F)"]][3] < 0.05) {
  TukeyHSD(anova_model.black)
}

Model Assumptions Check

Normality of Residuals

par(mfrow = c(1, 2))
hist(residuals(anova_model.black), main = "Histogram of Residuals", xlab = "Residuals")
qqnorm(residuals(anova_model.black))
qqline(residuals(anova_model.black), col = "red")

Homogeneity of Variance

plot(anova_model.black, which = 1)

leveneTest(diapuse_time ~ Location * Sex, data = black)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value  Pr(>F)  
## group   3  3.0759 0.02725 *
##       548                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1