Purpose

Identify reproductive barriers between two sympatric moth-pollinated plant species, Schiedea kaalae and S. hookeri by fitting a generalized linear mixed model (GLMM).

Fixed effects:

  • crosstype

Potential random effects:

  • mompid - maternal plant, specified by its population and ID

Data Import

sds <- read.table("f1seedmass.csv", header=T, sep="\t")
sds$smass <- sds$smass/10

sdsm2 <- sds[grepl("x",sds$momfullcross),]
sdsm2.split <- cbind(sdsm2$crossid, as.data.frame(matrix(unlist(matrix(lapply(matrix(unlist(strsplit(as.character(sdsm2$momfullcross), " x ", fixed=T)), nrow=2), function(x) { spl <- unlist(strsplit(unlist(strsplit(x, " ", fixed=T)), "-", fixed=T)); c(spl[1],gsub("-NA","",paste(spl[2],spl[3],spl[4], sep="-"))) }), ncol=2)), ncol=4, byrow=T)))
colnames(sdsm2.split) <- c("crossid","mommompop","mommomid","momdadpop","momdadid")

sdsd2 <- sds[grepl("x",sds$dadfullcross),]
sdsd2.split <- cbind(sdsd2$crossid, as.data.frame(matrix(unlist(matrix(lapply(matrix(unlist(strsplit(as.character(sdsd2$dadfullcross), " x ", fixed=T)), nrow=2), function(x) { spl <- unlist(strsplit(unlist(strsplit(x, " ", fixed=T)), "-", fixed=T)); c(spl[1],gsub("-NA","",paste(spl[2],spl[3],spl[4], sep="-"))) }), ncol=2)), ncol=4, byrow=T)))
colnames(sdsd2.split) <- c("crossid","dadmompop","dadmomid","daddadpop","daddadid")

sdsm1 <- sds[!grepl("x",sds$momfullcross),]
sdsm1.split <- cbind(sdsm1$crossid, as.data.frame(matrix(unlist(matrix(lapply(as.character(sdsm1$momfullcross), function(x) { spl <- unlist(strsplit(unlist(strsplit(x, " ", fixed=T)), "-", fixed=T)); c(spl[1],gsub("-NA","",paste(spl[2],spl[3],spl[4], sep="-"))) }), ncol=2)), ncol=2, byrow=T)))
colnames(sdsm1.split) <- c("crossid","mompop","momid")

sdsd1 <- sds[!grepl("x",sds$dadfullcross),]
sdsd1.split <- cbind(sdsd1$crossid, as.data.frame(matrix(unlist(matrix(lapply(as.character(sdsd1$dadfullcross), function(x) { spl <- unlist(strsplit(unlist(strsplit(x, " ", fixed=T)), "-", fixed=T)); c(spl[1],gsub("-NA","",paste(spl[2],spl[3],spl[4], sep="-"))) }), ncol=2)), ncol=2, byrow=T)))
colnames(sdsd1.split) <- c("crossid","dadpop","dadid")


sds <- merge(sds, sdsm2.split, by="crossid", all.x=T)
sds <- merge(sds, sdsd2.split, by="crossid", all.x=T)
sds <- merge(sds, sdsm1.split, by="crossid", all.x=T)
sds <- merge(sds, sdsd1.split, by="crossid", all.x=T)

pop2sp <- function(x) { factor(ifelse(x %in% c("904","3587", "892"), "kaal", ifelse(is.na(x),NA,"hook")))}
sds$mommomspecies <- pop2sp(sds$mommompop)
sds$momdadspecies <- pop2sp(sds$momdadpop)
sds$dadmomspecies <- pop2sp(sds$dadmompop)
sds$daddadspecies <- pop2sp(sds$daddadpop)
sds$momcross <- factor(toupper(paste0(substr(sds$mommomspecies,0,1), substr(sds$momdadspecies,0,1))))
sds$dadcross <- factor(toupper(paste0(substr(sds$dadmomspecies,0,1), substr(sds$daddadspecies,0,1))))

sds$momcross <- factor(ifelse(sds$momcross=="NANA", toupper(substr(as.character(pop2sp(sds$mompop)),0,1)), as.character(sds$momcross)))
sds$dadcross <- factor(ifelse(sds$dadcross=="NANA", toupper(substr(as.character(pop2sp(sds$dadpop)),0,1)), as.character(sds$dadcross)))
is.na(sds$dadcross) <- is.na(sds$momcross) <- sds$crosstype=="control"

table(sds$crossF)
   
   FF FH FK HF KF 
   60 46 36 79  9
with(sds, beanplot(smass~crossF, las=2))

table(sds$momcross, sds$dadcross)
       
         H HK  K KH
     H   0 23  0 56
     HK 18 15  7  0
     K   0  3  0  6
     KH 28  0 29 45
table(sds$momcross, sds$dadcross, sds$crossF) #errors in crosstype!
   , ,  = FF
   
       
         H HK  K KH
     H   0  0  0  0
     HK  0 15  0  0
     K   0  0  0  0
     KH  0  0  0 45
   
   , ,  = FH
   
       
         H HK  K KH
     H   0  0  0  0
     HK 18  0  0  0
     K   0  0  0  0
     KH 28  0  0  0
   
   , ,  = FK
   
       
         H HK  K KH
     H   0  0  0  0
     HK  0  0  7  0
     K   0  0  0  0
     KH  0  0 29  0
   
   , ,  = HF
   
       
         H HK  K KH
     H   0 23  0 56
     HK  0  0  0  0
     K   0  0  0  0
     KH  0  0  0  0
   
   , ,  = KF
   
       
         H HK  K KH
     H   0  0  0  0
     HK  0  0  0  0
     K   0  3  0  6
     KH  0  0  0  0
#levels(sds$dadcross) <- c("H",  "H", "HK", "K",  "KH", "K")
#levels(sds$momcross) <- c("H",  "H", "K", "K")
#table(sds$momcross, sds$dadcross)

#sds <- sds[!(sds$momcross=="KH" & sds$dadcross=="HK"),]

Models

f1.md        <- glm(smass~momcross*dadcross, data=sds, family="gaussian")

Model summaries

summary(f1.md)
   
   Call:
   glm(formula = smass ~ momcross * dadcross, family = "gaussian", 
       data = sds)
   
   Deviance Residuals: 
         Min         1Q     Median         3Q        Max  
   -0.109944  -0.014266   0.000486   0.014210   0.092222  
   
   Coefficients: (6 not defined because of singularities)
                          Estimate Std. Error t value Pr(>|t|)    
   (Intercept)            0.140008   0.007661  18.275  < 2e-16 ***
   momcrossHK             0.076937   0.010085   7.629 7.10e-13 ***
   momcrossK              0.299310   0.011954  25.039  < 2e-16 ***
   momcrossKH             0.072421   0.005571  12.999  < 2e-16 ***
   dadcrossHK            -0.023182   0.009611  -2.412 0.016680 *  
   dadcrossK             -0.025773   0.007373  -3.496 0.000572 ***
   dadcrossKH            -0.030651   0.006698  -4.576 7.92e-06 ***
   momcrossHK:dadcrossHK  0.011837   0.013675   0.866 0.387645    
   momcrossK:dadcrossHK  -0.077136   0.020849  -3.700 0.000273 ***
   momcrossKH:dadcrossHK        NA         NA      NA       NA    
   momcrossHK:dadcrossK   0.004258   0.014422   0.295 0.768119    
   momcrossK:dadcrossK          NA         NA      NA       NA    
   momcrossKH:dadcrossK         NA         NA      NA       NA    
   momcrossHK:dadcrossKH        NA         NA      NA       NA    
   momcrossK:dadcrossKH         NA         NA      NA       NA    
   momcrossKH:dadcrossKH        NA         NA      NA       NA    
   ---
   Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
   
   (Dispersion parameter for gaussian family taken to be 0.0007743861)
   
       Null deviance: 0.99164  on 229  degrees of freedom
   Residual deviance: 0.17036  on 220  degrees of freedom
   AIC: -983.1
   
   Number of Fisher Scoring iterations: 2
AIC(f1.md)
   [1] -983.1034

lsmeans and Tukey tests

#Count
rg.tr <- ref.grid(f1.md)
#summary(rg)
c.lsm <- lsmeans(rg.tr, ~ momcross*dadcross)
plot(c.lsm)

cld.mod <- cld(c.lsm, Letters=letters) #tukey letterings
cld.mod$response <- cld.mod$lsmean
cld.mod$uSE <- cld.mod$lsmean+cld.mod$SE
cld.mod$lSE <- cld.mod$lsmean-cld.mod$SE
cld.mod
    momcross dadcross    lsmean          SE df asymp.LCL asymp.UCL .group
    H        KH       0.1093571 0.003718645 NA 0.1020687 0.1166456  a    
    H        HK       0.1168261 0.005802496 NA 0.1054534 0.1281988  a    
    KH       KH       0.1817778 0.004148323 NA 0.1736472 0.1899083   b   
    KH       K        0.1866552 0.005167492 NA 0.1765271 0.1967833   b   
    HK       K        0.1954286 0.010517917 NA 0.1748138 0.2160433   bc  
    HK       HK       0.2056000 0.007185105 NA 0.1915175 0.2196825   bc  
    KH       H        0.2124286 0.005258959 NA 0.2021212 0.2227359    c  
    HK       H        0.2169444 0.006559074 NA 0.2040889 0.2298000    c  
    K        HK       0.3390000 0.016066384 NA 0.3075105 0.3704895     d 
    K        KH       0.4086667 0.011360649 NA 0.3864002 0.4309331      e
    H        H               NA          NA NA        NA        NA       
    K        H               NA          NA NA        NA        NA       
    KH       HK              NA          NA NA        NA        NA       
    H        K               NA          NA NA        NA        NA       
    K        K               NA          NA NA        NA        NA       
    HK       KH              NA          NA NA        NA        NA       
     response       uSE       lSE
    0.1093571 0.1130758 0.1056385
    0.1168261 0.1226286 0.1110236
    0.1817778 0.1859261 0.1776295
    0.1866552 0.1918227 0.1814877
    0.1954286 0.2059465 0.1849107
    0.2056000 0.2127851 0.1984149
    0.2124286 0.2176875 0.2071696
    0.2169444 0.2235035 0.2103854
    0.3390000 0.3550664 0.3229336
    0.4086667 0.4200273 0.3973060
           NA        NA        NA
           NA        NA        NA
           NA        NA        NA
           NA        NA        NA
           NA        NA        NA
           NA        NA        NA
   
   Confidence level used: 0.95 
   P value adjustment: tukey method for comparing a family of 16 estimates 
   significance level used: alpha = 0.05
ggplot(as.data.frame(cld.mod), aes(y=response, x=momcross, fill=dadcross)) +
  geom_col(position=position_dodge2()) +
  geom_linerange(aes(ymin=lSE, ymax=uSE), position=position_dodge2(0.9)) +
  labs(x="Maternal plant", y="Seed mass") +
  geom_text(aes(label=trimws(.group), x=momcross, hjust=length(trimws(.group))/30), position=position_dodge2(0.9), vjust=-4) + 
  geom_text(aes(label=dadcross, y=0.05), position=position_dodge2(0.9)) +
  scale_y_continuous(expand = expand_scale(mult=c(0,.1)), breaks = scales::pretty_breaks(n = 10))+
  scale_fill_manual(values=c("gray80","gray60","gray20","gray40"))+
  theme_classic() + theme(legend.text=element_text(size=rel(1)), legend.position="bottom", axis.text = element_text(colour="black", size=rel(1)), text=element_text(size=14), axis.ticks.x = element_blank()) + guides(fill=F)