Modified from hybridViability.Rmd (Oct 2017)

Purpose

Identify reproductive barriers between two sympatric moth-pollinated plant species, Schiedea kaalae and S. hookeri by fitting a generalized linear mixed model (GLMM).

In this analysis the response variable is the pollen viability of each cross. Other barriers (hybrid survival, flowering) could be analyzed in a similar framework, with appropriate changes to the underlying distribution.

Data Import

setwd("~/MyDocs/Dropbox/UCI/Schiedea/Analysis/HybridSeeds")
pol <- read.table("f2pollen.csv", header=T, sep="\t", colClasses=c(date="Date"))
pol <- pol[!is.na(pol$V1) & pol$dadfullcross!="9999",]

pol$v <- rowSums(pol[,grepl("V", colnames(pol))])/10 # average viable counts
pol$i <- rowSums(pol[,grepl("I", colnames(pol))])/10 # average inviable counts
pol$vpf <- (pol$v * 444.4/20) * 10      # number viable per flower (5 flrs)
pol$ipf <- (pol$i * 444.4/20) * 10      # number inviable per flower (5 flrs)
pol$vp <- pol$vpf / (pol$vpf + pol$ipf) # proportion viable
pol$tpf <- pol$vpf + pol$ipf
pol$t <- 10*(pol$v + pol$i)

polm2 <- pol[grepl("x",pol$momfullcross),]
polm2.split <- cbind(polm2$crossid, as.data.frame(matrix(unlist(matrix(lapply(matrix(unlist(strsplit(as.character(polm2$momfullcross), " x ", fixed=T)), nrow=2), function(x) { spl <- unlist(strsplit(unlist(strsplit(x, " ", fixed=T)), "-", fixed=T)); c(spl[1],gsub("-NA","",paste(spl[2],spl[3],spl[4], sep="-"))) }), ncol=2)), ncol=4, byrow=T)))
colnames(polm2.split) <- c("crossid","mommompop","mommomid","momdadpop","momdadid")

pold2 <- pol[grepl("x",pol$dadfullcross),]
pold2.split <- cbind(pold2$crossid, as.data.frame(matrix(unlist(matrix(lapply(matrix(unlist(strsplit(as.character(pold2$dadfullcross), " x ", fixed=T)), nrow=2), function(x) { spl <- unlist(strsplit(unlist(strsplit(x, " ", fixed=T)), "-", fixed=T)); c(spl[1],gsub("-NA","",paste(spl[2],spl[3],spl[4], sep="-"))) }), ncol=2)), ncol=4, byrow=T)))
colnames(pold2.split) <- c("crossid","dadmompop","dadmomid","daddadpop","daddadid")

polm1 <- pol[!grepl("x",pol$momfullcross),]
polm1.split <- cbind(polm1$crossid, as.data.frame(matrix(unlist(matrix(lapply(as.character(polm1$momfullcross), function(x) { spl <- unlist(strsplit(unlist(strsplit(x, " ", fixed=T)), "-", fixed=T)); c(spl[1],gsub("-NA","",paste(spl[2],spl[3],spl[4], sep="-"))) }), ncol=1)), ncol=2, byrow=T)))
colnames(polm1.split) <- c("crossid","mompop","momid")

pold1 <- pol[!grepl("x",pol$dadfullcross),]
pold1.split <- cbind(pold1$crossid, as.data.frame(matrix(unlist(matrix(lapply(as.character(pold1$dadfullcross), function(x) { spl <- unlist(strsplit(unlist(strsplit(x, " ", fixed=T)), "-", fixed=T)); c(spl[1],gsub("-NA","",paste(spl[2],spl[3],spl[4], sep="-"))) }), ncol=1)), ncol=2, byrow=T)))
colnames(pold1.split) <- c("crossid","dadpop","dadid")


pol <- merge(pol, polm2.split, by="crossid", all.x=T)
pol <- merge(pol, pold2.split, by="crossid", all.x=T)
pol <- merge(pol, polm1.split, by="crossid", all.x=T)
pol <- merge(pol, pold1.split, by="crossid", all.x=T)

popcols <- c("mompop","dadpop","mommompop","momdadpop","dadmompop","daddadpop")
pol[popcols] <- lapply(pol[popcols], function(x) { sapply(x, mapvalues, from = c("794","866","899","879","892","904","3587"), to = c("WK","WK","WK","879WKG","892WKG","904WPG","3587WP"), warn_missing=F) })

kaalpops <- c("904WPG","3587WP", "892WKG")
pop2sp <- function(x) { factor(ifelse(x %in% kaalpops, "kaal", ifelse(is.na(x),NA,"hook")))}
pol$mommomspecies <- pop2sp(pol$mommompop)
pol$momdadspecies <- pop2sp(pol$momdadpop)
pol$dadmomspecies <- pop2sp(pol$dadmompop)
pol$daddadspecies <- pop2sp(pol$daddadpop)
pol$momcross <- factor(toupper(paste0(substr(pol$mommomspecies,0,1), substr(pol$momdadspecies,0,1))))
pol$dadcross <- factor(toupper(paste0(substr(pol$dadmomspecies,0,1), substr(pol$daddadspecies,0,1))))

pol$momcross <- factor(ifelse(pol$momcross=="NANA", toupper(substr(as.character(pop2sp(pol$mompop)),0,1)), as.character(pol$momcross)))
pol$dadcross <- factor(ifelse(pol$dadcross=="NANA", toupper(substr(as.character(pop2sp(pol$dadpop)),0,1)), as.character(pol$dadcross)))
# is.na(pol$dadcross) <- is.na(pol$momcross) <- pol$crosstype=="control"

pol$dadcross <- factor(pol$dadcross, c("H",  "HK", "KH", "K"))
pol$momcross  <- factor(pol$momcross, c("H",  "HK", "KH", "K"))
pol$crosstype <- with(pol, factor(paste(momcross, dadcross, sep="x")))
pol$species <- factor(ifelse(pol$mompop %in% kaalpops, "kaal", "hook"))
pol$dadsp <-   factor(ifelse(pol$dadpop %in% kaalpops, "kaal", "hook"))
pol <- within(pol, smompop <- as.factor(paste(species,mompop,sep="")))
pol <- within(pol, mompid <- as.factor(paste(mompop,momid,sep=".")))
pol <- within(pol, dadpid <- as.factor(paste(dadpop,dadid,sep=".")))

pol$mompop <- as.character(pol$mompop)
pol$mompop[is.na(pol$mompop)] <- as.character(pol$momcross[is.na(pol$mompop)])
pol$mompop <- as.factor(pol$mompop)
pol$dadpop <- as.character(pol$dadpop)
pol$dadpop[is.na(pol$dadpop)] <- as.character(pol$dadcross[is.na(pol$dadpop)])
pol$dadpop <- as.factor(pol$dadpop)


# check final structure
str(pol)

   'data.frame':    113 obs. of  55 variables:
    $ crossid      : Factor w/ 113 levels "101-1","110-9",..: 1 2 3 4 5 6 7 8 9 10 ...
    $ date         : Date, format: "18-07-23" "18-07-16" ...
    $ momfullcross : Factor w/ 49 levels "3587-12","3587-14",..: 28 29 30 31 31 32 33 4 33 34 ...
    $ dadfullcross : Factor w/ 66 levels "3587-12 4/3/13",..: 12 3 19 29 38 41 20 5 42 21 ...
    $ V1           : int  32 24 26 19 20 21 6 12 84 3 ...
    $ I1           : int  9 5 7 1 23 10 39 30 10 2 ...
    $ V2           : int  75 19 40 25 25 26 7 14 90 1 ...
    $ I2           : int  12 10 5 5 33 18 34 23 6 6 ...
    $ V3           : int  67 22 37 22 18 24 7 11 82 0 ...
    $ I3           : int  7 11 9 0 3 25 40 37 9 2 ...
    $ V4           : int  46 29 32 32 19 18 4 10 82 4 ...
    $ I4           : int  11 11 6 4 40 20 33 42 9 4 ...
    $ V5           : int  66 20 29 14 21 24 3 12 70 2 ...
    $ I5           : int  7 10 3 9 23 24 29 34 3 2 ...
    $ V6           : int  74 26 33 15 23 13 7 13 68 2 ...
    $ I6           : int  8 13 8 2 27 15 26 36 2 7 ...
    $ V7           : int  62 23 26 12 22 25 7 19 65 1 ...
    $ I7           : int  8 21 9 3 24 13 41 32 10 4 ...
    $ V8           : int  52 22 31 26 26 19 3 13 66 3 ...
    $ I8           : int  11 16 10 5 30 14 28 44 4 8 ...
    $ V9           : int  61 22 30 23 27 19 4 17 94 1 ...
    $ I9           : int  10 19 5 6 43 15 34 39 3 6 ...
    $ V10          : int  53 24 29 22 32 26 4 20 72 4 ...
    $ I10          : int  6 14 7 2 39 23 23 40 6 10 ...
    $ v            : num  58.8 23.1 31.3 21 23.3 21.5 5.2 14.1 77.3 2.1 ...
    $ i            : num  8.9 13 6.9 3.7 28.5 17.7 32.7 35.7 6.2 5.1 ...
    $ vpf          : num  13065 5133 6955 4666 5177 ...
    $ ipf          : num  1978 2889 1533 822 6333 ...
    $ vp           : num  0.869 0.64 0.819 0.85 0.45 ...
    $ tpf          : num  15043 8021 8488 5488 11510 ...
    $ t            : num  677 361 382 247 518 392 379 498 835 72 ...
    $ mommompop    : Factor w/ 4 levels "3587WP","879WKG",..: 2 NA NA 3 3 NA 3 NA 3 3 ...
    $ mommomid     : Factor w/ 12 levels "1","10","10-1",..: 12 NA NA 1 1 NA 2 NA 2 7 ...
    $ momdadpop    : Factor w/ 3 levels "879WKG","892WKG",..: 3 NA NA 1 1 NA 1 NA 1 1 ...
    $ momdadid     : Factor w/ 14 levels "1-ID-1","1-ID-9",..: 11 NA NA 5 5 NA 3 NA 3 3 ...
    $ dadmompop    : Factor w/ 4 levels "3587WP","879WKG",..: NA 1 2 NA NA 3 NA 1 3 NA ...
    $ dadmomid     : Factor w/ 12 levels "1","10","10-1",..: NA 4 3 NA NA 7 NA 4 7 NA ...
    $ daddadpop    : Factor w/ 3 levels "879WKG","892WKG",..: NA 1 3 NA NA 1 NA 1 1 NA ...
    $ daddadid     : Factor w/ 22 levels "1-ID-1","10-1-ID",..: NA 2 18 NA NA 2 NA 20 5 NA ...
    $ mompop       : Factor w/ 6 levels "3587WP","879WKG",..: 5 2 3 6 6 3 6 1 6 6 ...
    $ momid        : Factor w/ 21 levels "1","10","10-1",..: NA 21 1 NA NA 2 NA 18 NA NA ...
    $ dadpop       : Factor w/ 7 levels "3587WP","879WKG",..: 7 6 5 2 3 6 2 6 6 2 ...
    $ dadid        : Factor w/ 16 levels "1","10","10-1",..: 7 NA NA 16 4 NA 8 NA NA 9 ...
    $ mommomspecies: Factor w/ 2 levels "hook","kaal": 1 NA NA 2 2 NA 2 NA 2 2 ...
    $ momdadspecies: Factor w/ 2 levels "hook","kaal": 2 NA NA 1 1 NA 1 NA 1 1 ...
    $ dadmomspecies: Factor w/ 2 levels "hook","kaal": NA 2 1 NA NA 2 NA 2 2 NA ...
    $ daddadspecies: Factor w/ 2 levels "hook","kaal": NA 1 2 NA NA 1 NA 1 1 NA ...
    $ momcross     : Factor w/ 4 levels "H","HK","KH",..: 2 1 4 3 3 4 3 4 3 3 ...
    $ dadcross     : Factor w/ 4 levels "H","HK","KH",..: 1 3 2 1 4 3 1 3 3 1 ...
    $ crosstype    : Factor w/ 10 levels "HKxH","HKxHK",..: 1 5 9 6 7 10 6 10 8 6 ...
    $ species      : Factor w/ 2 levels "hook","kaal": 1 1 2 1 1 2 1 2 1 1 ...
    $ dadsp        : Factor w/ 2 levels "hook","kaal": 1 1 1 1 2 1 1 1 1 1 ...
    $ smompop      : Factor w/ 5 levels "hook879WKG","hookNA",..: 2 1 4 2 2 4 2 3 2 2 ...
    $ mompid       : Factor w/ 23 levels "3587WP.12","3587WP.14",..: 23 14 15 23 23 16 23 3 23 23 ...
    $ dadpid       : Factor w/ 18 levels "3587WP.12-4/3/13",..: 18 17 17 7 10 17 5 17 17 6 ...

load("hybridpollen.Rdata")
hybpol$momcross <- factor(substr(as.character(hybpol$cross),1,1))
hybpol$dadcross <- factor(substr(as.character(hybpol$cross),2,2))
pol$mompid <- pol$momfullcross
pol$dadpid <- pol$dadfullcross

merging <- intersect(colnames(hybpol), colnames(pol))#c("date", "v", "i", "vp", "tpf", "t", "momcross","dadcross","mompid","dadpid", "mompop","dadpop","crosstype")
bothpol <- rbind(pol[, merging], hybpol[, merging])

#with(bothpol, plot(vp~interaction(momcross,dadcross), las=2))
#with(bothpol, plot(t~interaction(momcross,dadcross), las=2))
table(bothpol$momcross, bothpol$dadcross)

       
         H HK KH  K
     H  30 13 20 61
     HK  9  6  0  6
     KH 16  0 13 13
     K  49  7 10 57

pol <- bothpol
pol$v <- pol$v*10
pol$i <- pol$i*10
pol$momdadcross <- with(pol, droplevels(interaction(momcross, dadcross)))

Viable percentage versus total pollen

ggplot(pol, aes(x=tpf,y=vp)) + geom_smooth() + geom_point(aes(color=interaction(momcross,dadcross, sep=" x "))) + scale_color_discrete("Cross") + labs(x="Pollen grains per flower", y="Proportion viable")+theme_minimal()

   `geom_smooth()` using method = 'loess' and formula 'y ~ x'

summary(lm(vp~tpf, data=pol))

   
   Call:
   lm(formula = vp ~ tpf, data = pol)
   
   Residuals:
       Min      1Q  Median      3Q     Max 
   -0.7437 -0.1887  0.0092  0.1957  0.4812 
   
   Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
   (Intercept) 3.219e-01  4.272e-02   7.534 5.51e-13 ***
   tpf         2.487e-05  3.888e-06   6.396 5.90e-10 ***
   ---
   Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
   
   Residual standard error: 0.2646 on 308 degrees of freedom
   Multiple R-squared:  0.1173, Adjusted R-squared:  0.1144 
   F-statistic: 40.91 on 1 and 308 DF,  p-value: 5.905e-10

#Viable percentage versus viable

ggplot(pol, aes(x=vp,y=vpf)) + geom_smooth() + geom_point(aes(color=interaction(momcross,dadcross, sep=" x "))) + scale_color_discrete("Cross") + labs(x="Proportion viable", y="Viable pollen grains per flower")+theme_minimal()

   `geom_smooth()` using method = 'loess' and formula 'y ~ x'

summary(lm(vpf~vp, data=pol))

   
   Call:
   lm(formula = vpf ~ vp, data = pol)
   
   Residuals:
       Min      1Q  Median      3Q     Max 
   -7653.7 -1192.8   -27.8   980.5  6474.7 
   
   Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
   (Intercept)   -970.4      276.0  -3.516 0.000504 ***
   vp           12607.9      429.7  29.340  < 2e-16 ***
   ---
   Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
   
   Residual standard error: 2124 on 308 degrees of freedom
   Multiple R-squared:  0.7365, Adjusted R-squared:  0.7356 
   F-statistic: 860.8 on 1 and 308 DF,  p-value: < 2.2e-16

Pollen viability

Distributions by random factors

ggplot(aes(y=vp, x=mompid, color=dadcross), data=pol) + geom_point() + coord_flip() + labs(x="Maternal plant", y="Proportion viable")+theme_minimal()

Distributions by fixed factors

(bothplot <- ggplot(pol, aes(y=vp, x=dadcross, fill=dadcross)) + 
    facet_wrap(vars(momcross), nrow=1)+ ggtitle("Maternal parent") +
   geom_boxplot() + geom_point(aes(color=mompop)) + scale_color_discrete("Maternal population",na.value="grey20") +
  labs(x="Paternal parent", y="Pollen viability") +
  scale_y_continuous(expand = expand_scale(mult=c(0,0)), breaks = scales::pretty_breaks(n = 5), limits=c(0,1)) +
  scale_fill_manual("Paternal plant", values=c("grey90","gray70","gray50","gray30"))+
  theme_classic() + theme(legend.text=element_text(size=rel(1)), legend.position="bottom", axis.text = element_text(colour="black", size=rel(1)), text=element_text(size=14), axis.ticks.x = element_blank(), plot.title=element_text(hjust=0.5, size=rel(1)))+guides(fill=F))

ggsave("pollenviability.png", bothplot, width=7, height=6)

Models

sc.bin          <- glm(vp~momcross*dadcross, data=pol, family="binomial", weights=pol$t)
sc.mix.mompop.bin         <- glmer(vp~momcross*dadcross + (1|mompop), data=pol, family="binomial", weights=pol$t)

   fixed-effect model matrix is rank deficient so dropping 2 columns / coefficients

sc.mix.momdadpop.bin         <- glmer(vp~momcross*dadcross + (1|mompop) + (1|dadpop), data=pol, family="binomial", weights=pol$t)

   fixed-effect model matrix is rank deficient so dropping 2 columns / coefficients

sc.mix.mompid.bin         <- glmer(vp~momcross*dadcross + (1|mompid), data=pol, family="binomial", weights=pol$t)

   fixed-effect model matrix is rank deficient so dropping 2 columns / coefficients

sc.mix.momdadpid.bin         <- glmer(vp~momcross*dadcross + (1|mompid) + (1|dadpid), data=pol, family="binomial", weights=pol$t)

   fixed-effect model matrix is rank deficient so dropping 2 columns / coefficients

#Observation-level random effect for overdispers
pol$line <- 1:nrow(pol)
sc.mix.momdadpop.olre.bin         <- glmer(vp~momcross*dadcross + (1|mompop) + (1|dadpop) +(1|line), data=pol, family="binomial", weights=pol$t)

   fixed-effect model matrix is rank deficient so dropping 2 columns / coefficients

library(hglm)

   Loading required package: hglm.data

   
   hglm: Hierarchical Generalized Linear Models

   Version 2.2-0 (2018-08-24) installed

   Authors: Moudud Alam, Lars Ronnegard, Xia Shen

   Maintainer: Xia Shen <xia.shen@ki.se>

   Use citation("hglm") to know how to cite our work.

   Discussion: https://r-forge.r-project.org/forum/?group_id=558

   BugReports: https://r-forge.r-project.org/tracker/?group_id=558

   VideoTutorials: http://www.youtube.com/playlist?list=PLn1OmZECD-n15vnYzvJDy5GxjNpVV5Jr8

sc.mix.mompid.betabin.hglm <- hglm2(vp~momdadcross + (1|mompid) , data=pol, family=binomial(link = logit), rand.family=Beta(link = logit), weights=pol$t)

library(spaMM)

   spaMM (version 2.4.35) is loaded.
   Type 'help(spaMM)' for a short introduction,
   and news(package='spaMM') for news.

   
   Attaching package: 'spaMM'

   The following object is masked from 'package:hglm':
   
       Beta

   The following object is masked from 'package:bbmle':
   
       coef

   The following object is masked from 'package:stats4':
   
       coef

sc.mix.mompid.betabin.spamm <- HLfit(cbind(v,i)~momdadcross + (1|mompid) ,family=binomial(),rand.family=Beta(), HLmethod="HL(0,0)", data=pol)

library(glmmTMB)

   
   Attaching package: 'glmmTMB'

   The following object is masked from 'package:glmmADMB':
   
       VarCorr

sc.mix.mompid.betabin.tmb <- glmmTMB(cbind(v,i)~momdadcross + (1|mompid), data=pol, family="betabinomial")

library(glmmADMB)
sc.betabin.admb               <- glmmadmb(cbind(v,i)~momdadcross, data=pol, family="betabinomial")
sc.mix.mompop.betabin.admb    <- glmmadmb(cbind(v,i)~momdadcross + (1|mompop), data=pol, family="betabinomial")
sc.mix.momdadpop.betabin.admb <- glmmadmb(cbind(v,i)~momdadcross + (1|mompop) + (1|dadpop), data=pol, family="betabinomial")
sc.mix.mompid.betabin.admb   <- glmmadmb(cbind(v,i)~momdadcross + (1|mompid), data=pol, family="betabinomial")
sc.mix.momdadpid.betabin.admb   <- glmmadmb(cbind(v,i)~momdadcross + (1|mompid)+ (1|dadpid), data=pol, family="betabinomial")

sc.mix.momdadpop.bin.admb     <- glmmadmb(cbind(v,i)~momdadcross + (1|mompop) + (1|dadpop), data=pol, family="binomial")
sc.mix.mompid.bin.admb     <- glmmadmb(cbind(v,i)~momdadcross + (1|mompid), data=pol, family="binomial")
sc.bin.admb     <- glmmadmb(cbind(v,i)~momdadcross, data=pol, family="binomial")

Model comparison

AIC

We will use the Aikake Information Criterion to pick the model the best fits the data, penalized by the number of parameters. Differences of 2 units are significant.

# AICtab(sc.b, sc.mix.mompop.b, sc.mix.mompid.b)
sc.admb.names <- c("sc.betabin.admb", "sc.mix.mompop.betabin.admb","sc.mix.momdadpop.betabin.admb", "sc.mix.mompid.betabin.admb", "sc.mix.momdadpop.bin.admb","sc.mix.momdadpid.betabin.admb", "sc.mix.mompid.bin.admb", "sc.bin.admb")
sc.names <- c("sc.bin", "sc.mix.mompid.bin","sc.mix.momdadpid.bin", "sc.mix.mompop.bin","sc.mix.momdadpop.bin", "sc.mix.momdadpop.olre.bin", sc.admb.names)
sc.list <- sapply(sc.names, get, USE.NAMES=T)
sc.AIC <- ICtab(sc.list,mnames=sc.names,type="AIC", base=T, delta=F) #  for AICc, nobs=nobs(sc.list[[1]])
class(sc.AIC)<-"data.frame"
all.names <- c(sc.names)
all.list <- sapply(all.names, get, USE.NAMES=T)
all.AIC <- dfun(rbind(sc.AIC))
all.AIC <- all.AIC[order(all.AIC$dAIC),]
kable(all.AIC, format.arg=list(digits=3))

	dAIC	df
sc.mix.momdadpop.olre.bin	0.0	17
sc.mix.mompid.betabin.admb	16.9	16
sc.mix.mompop.betabin.admb	18.2	16
sc.mix.momdadpid.betabin.admb	18.5	17
sc.mix.momdadpop.betabin.admb	20.2	17
sc.betabin.admb	28.6	15
sc.mix.momdadpid.bin	13525.7	16
sc.mix.mompid.bin.admb	20220.7	15
sc.mix.mompid.bin	20220.8	15
sc.mix.momdadpop.bin.admb	23942.7	16
sc.mix.momdadpop.bin	23942.7	16
sc.mix.mompop.bin	25137.7	15
sc.bin	28890.5	14
sc.bin.admb	28890.5	14

od <- as.data.frame(t(sapply(sc.list[-c(1,7)], overdisp)))
od$logratio <- log(od$ratio)
od[order(od$logratio),]

                                       chisq        ratio rdf p  logratio
   sc.mix.momdadpop.olre.bin        11.80061   0.04027513 293 1 -3.212021
   sc.mix.momdadpid.bin          14797.57398  50.33188429 294 0  3.918639
   sc.mix.mompid.bin.admb        21960.21463  74.44140554 295 0  4.310012
   sc.mix.mompid.bin             21960.32647  74.44178465 295 0  4.310017
   sc.mix.momdadpid.betabin.admb 26101.38918  89.08323953 293 0  4.489571
   sc.mix.momdadpop.bin          26280.53875  89.38958759 294 0  4.493004
   sc.mix.momdadpop.bin.admb     26280.71604  89.39019060 294 0  4.493011
   sc.mix.mompid.betabin.admb    27044.92100  91.98952721 294 0  4.521675
   sc.mix.mompop.bin             27139.14074  91.99708727 295 0  4.521757
   sc.mix.mompop.betabin.admb    28597.20835  97.26941615 294 0  4.577485
   sc.mix.momdadpop.betabin.admb 28597.21571  97.60141881 293 0  4.580892
   sc.bin.admb                   30569.48433 103.27528491 296 0  4.637398

Overdispersion

Looking at the normal, fixed effects model, we see that the residuals are not normal:

shapiro.test(sc.bin$residuals)# raw residuals!

   
    Shapiro-Wilk normality test
   
   data:  sc.bin$residuals
   W = 0.92404, p-value = 1.875e-11

Coefficients

The coefficients estimated for each model agree qualitatively.

sc.log.names <- sc.names[-c(7:14)]
sc.log <- sapply(sc.log.names, get, USE.NAMES=T)
coefplot2(sc.log, legend.x="topright",legend=T,legend.args=list(cex=0.8, xpd=T, inset=c(-0.1,0)), col.pts=sample(gg_color_hue(length(sc.log.names))), spacing=0.05, lwd.2=2, lwd.1=4, intercept=T)

sc.admb <- sapply(sc.admb.names, get, USE.NAMES=T)
coefplot2(sc.admb, legend.x="topright",legend=T,legend.args=list(cex=0.8, xpd=T, inset=c(-0.1,0)), col.pts=sample(gg_color_hue(length(sc.admb.names))), spacing=0.05, lwd.2=2, lwd.1=4, intercept=T)

sc.log.names <- c("sc.mix.mompid.betabin.tmb", "sc.mix.mompid.betabin.admb","sc.mix.mompid.betabin.hglm", "sc.mix.mompid.betabin.spamm")
sc.log <- sapply(sc.log.names, get, USE.NAMES=T)
sc.log.coef <- list(
coef(summary(sc.log[[1]]))[[1]],
coef(summary(sc.log[[2]])),
summary(sc.log[[3]])$ FixCoefMat,
summary(sc.log[[4]])$beta_table)

   formula: cbind(v, i) ~ momdadcross + (1 | mompid)
   Estimation of lambda by Laplace REML approximation (p_bv).
   Estimation of fixed effects by h-likelihood approximation.
   Family: binomial ( link = logit ) 
    ------------ Fixed effects (beta) ------------
                    Estimate Cond. SE t-value
   (Intercept)        2.6188  0.27347   9.576
   momdadcrossHK.H   -2.3559  0.36620  -6.433
   momdadcrossKH.H   -2.6812  0.33233  -8.068
   momdadcrossK.H    -2.7942  0.34326  -8.140
   momdadcrossH.HK   -2.4209  0.36805  -6.578
   momdadcrossHK.HK  -2.9510  0.36845  -8.009
   momdadcrossK.HK   -2.0926  0.36161  -5.787
   momdadcrossH.KH   -1.9040  0.36786  -5.176
   momdadcrossKH.KH  -2.7020  0.33316  -8.110
   momdadcrossK.KH   -2.2229  0.35981  -6.178
   momdadcrossH.K    -2.2642  0.03512 -64.470
   momdadcrossHK.K   -3.3392  0.36809  -9.072
   momdadcrossKH.K   -2.7063  0.33282  -8.132
   momdadcrossK.K    -0.9012  0.34332  -2.625
    --------------- Random effects ---------------
   Family: Beta ( link = logit ) 
              --- Variance parameters ('lambda'):
   lambda = 4 var(u)/(1 - 4 var(u)) for u ~ Beta[1/(2*lambda),1/(2*lambda)]; 
      mompid  :  0.1293  
                --- Coefficients for log(lambda):
     Group        Term Estimate Cond.SE
    mompid (Intercept)   -2.046  0.1725
   # of obs: 310; # of groups: mompid, 70 
    ------------- Likelihood values  -------------
                           logLik
          h-likelihood: -11726.35
   p_v(h) (marginal L): -11847.67
     p_beta,v(h) (ReL): -11868.04

colnames(sc.log.coef[[4]]) <- colnames(sc.log.coef[[3]])[1:3]
library(data.table)
sc.log.coef.all <- rbindlist(setNames(lapply(lapply(sc.log.coef, as.data.frame), setDT, keep.rownames = TRUE), sc.log.names), idcol="modelName", fill=T)
setnames(sc.log.coef.all, 2:4, c("Variable","Coefficient","SE"))
# Specify the width of your confidence intervals
interval1 <- -qnorm((1-0.9)/2)  # 90% multiplier
interval2 <- -qnorm((1-0.95)/2)  # 95% multiplier

# Plot
ggplot(sc.log.coef.all, aes(colour = modelName))+
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
  geom_linerange(aes(x = Variable, ymin = Coefficient - SE*interval1,
                                ymax = Coefficient + SE*interval1),
                            lwd = 1, position = position_dodge(width = 1/2))+
  geom_pointrange(aes(x = Variable, y = Coefficient, ymin = Coefficient - SE*interval2,
                                 ymax = Coefficient + SE*interval2),
                             lwd = 1/2, position = position_dodge(width = 1/2),
                             shape = 21, fill = "WHITE")+
  coord_flip() + theme_bw()

Inference

Description

mod <- sc.mix.mompid.betabin.admb
print(mod)

   
   GLMM's in R powered by AD Model Builder:
   
     Family: betabinom 
     alpha = 4.9571 
     link = logit 
   
   Fixed effects:
     Log-likelihood: -1744.9 
     AIC: 3521.8 
     Formula: cbind(v, i) ~ momdadcross + (1 | mompid) 
        (Intercept)  momdadcrossHK.H  momdadcrossKH.H   momdadcrossK.H 
           1.880763        -1.644447        -1.997737        -2.190016 
    momdadcrossH.HK momdadcrossHK.HK  momdadcrossK.HK  momdadcrossH.KH 
          -1.814675        -2.400678        -1.233611        -1.562228 
   momdadcrossKH.KH  momdadcrossK.KH   momdadcrossH.K  momdadcrossHK.K 
          -2.346534        -2.013672        -1.937335        -2.761363 
    momdadcrossKH.K   momdadcrossK.K 
          -2.206468        -0.219586 
   
   Random effects:
   Structure: Diagonal matrix
   Group=mompid
               Variance StdDev
   (Intercept)  0.08407 0.2899
   
   Number of observations: total=310, mompid=70

Model summary

The model estimated the following parameters, with individual parameter significance determined by the Wald z-test, and fixed effect significance determined by analysis of deviance Wald test.

summary(mod)

   
   Call:
   glmmadmb(formula = cbind(v, i) ~ momdadcross + (1 | mompid), 
       data = pol, family = "betabinomial")
   
   AIC: 3521.8 
   
   Coefficients:
                    Estimate Std. Error z value Pr(>|z|)    
   (Intercept)         1.881      0.218    8.64  < 2e-16 ***
   momdadcrossHK.H    -1.644      0.366   -4.50  6.9e-06 ***
   momdadcrossKH.H    -1.998      0.311   -6.42  1.4e-10 ***
   momdadcrossK.H     -2.190      0.266   -8.22  < 2e-16 ***
   momdadcrossH.HK    -1.815      0.332   -5.47  4.5e-08 ***
   momdadcrossHK.HK   -2.401      0.423   -5.67  1.4e-08 ***
   momdadcrossK.HK    -1.234      0.404   -3.06   0.0022 ** 
   momdadcrossH.KH    -1.562      0.315   -4.96  6.9e-07 ***
   momdadcrossKH.KH   -2.347      0.336   -6.98  3.0e-12 ***
   momdadcrossK.KH    -2.014      0.355   -5.66  1.5e-08 ***
   momdadcrossH.K     -1.937      0.217   -8.94  < 2e-16 ***
   momdadcrossHK.K    -2.761      0.435   -6.35  2.2e-10 ***
   momdadcrossKH.K    -2.206      0.331   -6.67  2.5e-11 ***
   momdadcrossK.K     -0.220      0.263   -0.83   0.4045    
   ---
   Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
   
   Number of observations: total=310, mompid=70 
   Random effect variance(s):
   Group=mompid
               Variance StdDev
   (Intercept)  0.08407 0.2899
   
   Beta-binomial dispersion parameter: 4.9571 (std. err.: 0.42775)
   
   Log-likelihood: -1744.9

qqnorm(resid(mod), main="normal qq-plot, residuals")
qqline(resid(mod))

qqnorm(ranef(mod)[[1]][,1])
qqline(ranef(mod)[[1]][,1])

plot(fitted(mod), resid(mod)) #residuals vs fitted
abline(h=0)

pol$fitted <- fitted(mod)    #fitted vs observed
plot(pol$fitted, jitter(pol$vp,0.1))
abline(0,1)

Estimated marginal means

The estimated marginal e means procedure can generate predictor estimates of each type, and give their significance groupings with a post-hoc Tukey test.

# Count
rg <- ref_grid(mod)
# summary(rg)
#sxc.lsm <- emmeans(rg, ~ momcross*dadcross)
sxc.lsm <- emmeans(rg, ~ momdadcross)
plot(sxc.lsm, comparisons=T)

CLD(sxc.lsm, Letters=letters)

    momdadcross      emmean        SE  df  asymp.LCL  asymp.UCL .group
    HK.K        -0.88059990 0.4645445 Inf -1.7910904 0.02989056  a    
    HK.HK       -0.51991559 0.4554523 Inf -1.4125857 0.37275451  a    
    KH.KH       -0.46577137 0.3723186 Inf -1.1955025 0.26395973  a    
    KH.K        -0.32570489 0.3624595 Inf -1.0361125 0.38470270  a    
    K.H         -0.30925353 0.3364231 Inf -0.9686307 0.35012360  a    
    K.KH        -0.13290920 0.3861059 Inf -0.8896628 0.62384439  a    
    KH.H        -0.11697446 0.3555303 Inf -0.8138011 0.57985216  a    
    H.K         -0.05657235 0.3137066 Inf -0.6714260 0.55828133  a    
    H.HK         0.06608814 0.3786250 Inf -0.6760032 0.80817950  a    
    HK.H         0.23631529 0.4061025 Inf -0.5596309 1.03226149  ab   
    H.KH         0.31853514 0.3694976 Inf -0.4056668 1.04273710  a    
    K.HK         0.64715132 0.4387894 Inf -0.2128601 1.50716270  abc  
    K.K          1.66117672 0.3284526 Inf  1.0174215 2.30493200   bc  
    H.H          1.88076275 0.2176200 Inf  1.4542354 2.30729011    c  
   
   Results are given on the logit (not the response) scale. 
   Confidence level used: 0.95 
   Results are given on the log odds ratio (not the response) scale. 
   P value adjustment: tukey method for comparing a family of 14 estimates 
   significance level used: alpha = 0.05

cld.mod <- cld(sxc.lsm, Letters=letters) # tukey letterings
library(boot)

   
   Attaching package: 'boot'

   The following object is masked from 'package:lattice':
   
       melanoma

   The following object is masked from 'package:car':
   
       logit

cld.mod$response <- inv.logit(cld.mod$emmean)
cld.mod$uSE <- inv.logit(cld.mod$emmean+cld.mod$SE)
cld.mod$lSE <- inv.logit(cld.mod$emmean-cld.mod$SE)
options(digits=4)
cld.mod[c(15,16),] <- NA
cld.mod$momdadcross <- as.character(cld.mod$momdadcross)
cld.mod[c(15,16),1] <- c("HK.KH","KH.HK")
cld.mod$momdadcross <- as.factor(cld.mod$momdadcross)
cld.mod <- cbind(cld.mod, lapply(read.table(text = as.character(cld.mod$momdadcross), sep = ".", col.names=c("momcross","dadcross")), factor, levels=levels(pol$momcross)))
cld.mod[rev(order(cld.mod$momcross, cld.mod$dadcross)),]

      momdadcross   emmean     SE  df asymp.LCL asymp.UCL .group response
   14         K.K  1.66118 0.3285 Inf    1.0174   2.30493     bc   0.8404
   10        K.KH -0.13291 0.3861 Inf   -0.8897   0.62384    a     0.4668
   7         K.HK  0.64715 0.4388 Inf   -0.2129   1.50716    abc   0.6564
   4          K.H -0.30925 0.3364 Inf   -0.9686   0.35012    a     0.4233
   13        KH.K -0.32570 0.3625 Inf   -1.0361   0.38470    a     0.4193
   9        KH.KH -0.46577 0.3723 Inf   -1.1955   0.26396    a     0.3856
   16       KH.HK       NA     NA  NA        NA        NA   <NA>       NA
   3         KH.H -0.11697 0.3555 Inf   -0.8138   0.57985    a     0.4708
   12        HK.K -0.88060 0.4645 Inf   -1.7911   0.02989    a     0.2931
   15       HK.KH       NA     NA  NA        NA        NA   <NA>       NA
   6        HK.HK -0.51992 0.4555 Inf   -1.4126   0.37275    a     0.3729
   2         HK.H  0.23632 0.4061 Inf   -0.5596   1.03226    ab    0.5588
   11         H.K -0.05657 0.3137 Inf   -0.6714   0.55828    a     0.4859
   8         H.KH  0.31854 0.3695 Inf   -0.4057   1.04274    a     0.5790
   5         H.HK  0.06609 0.3786 Inf   -0.6760   0.80818    a     0.5165
   1          H.H  1.88076 0.2176 Inf    1.4542   2.30729      c   0.8677
         uSE    lSE momcross dadcross
   14 0.8797 0.7913        K        K
   10 0.5630 0.3731        K       KH
   7  0.7476 0.5519        K       HK
   4  0.5068 0.3440        K        H
   13 0.5092 0.3344       KH        K
   9  0.4767 0.3019       KH       KH
   16     NA     NA       KH       HK
   3  0.5594 0.3840       KH        H
   12 0.3975 0.2067       HK        K
   15     NA     NA       HK       KH
   6  0.4839 0.2738       HK       HK
   2  0.6553 0.4577       HK        H
   11 0.5639 0.4085        H        K
   8  0.6655 0.4873        H       KH
   5  0.6094 0.4225        H       HK
   1  0.8907 0.8407        H        H

ggplot(as.data.frame(cld.mod), aes(y=response, x=momcross, fill=dadcross)) +
  geom_col(position=position_dodge2()) +
  geom_linerange(aes(ymin=lSE, ymax=uSE), position=position_dodge2(0.9)) +
  labs(x="Maternal plant", y="Pollen viability") +
  geom_text(aes(label=trimws(.group), x=momcross, hjust=length(trimws(.group))/30), position=position_dodge2(0.9), vjust=-1) + 
  geom_text(aes(label=dadcross, y=0.05), position=position_dodge2(0.9)) +
  scale_y_continuous(expand = expand_scale(mult=c(0,0)), breaks = scales::pretty_breaks(n = 5), limits=c(0,1)) +
  scale_fill_manual(values=c("grey80","gray60","gray20","gray40"))+
  theme_classic() + theme(legend.text=element_text(size=rel(1)), legend.position="bottom", axis.text = element_text(colour="black", size=rel(1)), text=element_text(size=14), axis.ticks.x = element_blank()) + guides(fill=F)

Contrasts

Expect difference:

HK x K - K x HK
KH x H - H x KH

No difference:

HK x H - H x HK
KH x K - K x HK

K = matrix(rep(0,14*4), 4)
colnames(K) <- substr(names(coef(mod)),12,16)
K[1,c("HK.K","K.HK")] <- K[2,c("KH.H","H.KH")] <- K[3,c("HK.H","H.HK")] <- K[4,c("KH.K","K.KH")] <- c(1, -1)
rownames(K) <- c("HK x K - K x HK","KH x H - H x KH","HK x H - H x HK","KH x K - K x HK")
library(multcomp)
summary(glht(mod, linfct=K),test=adjusted("none"))

   
     Simultaneous Tests for General Linear Hypotheses
   
   Fit: glmmadmb(formula = cbind(v, i) ~ momdadcross + (1 | mompid), 
       data = pol, family = "betabinomial")
   
   Linear Hypotheses:
                        Estimate Std. Error z value Pr(>|z|)   
   HK x K - K x HK == 0  -1.5278     0.5859  -2.608  0.00911 **
   KH x H - H x KH == 0  -0.4355     0.4394  -0.991  0.32165   
   HK x H - H x HK == 0   0.1702     0.4896   0.348  0.72808   
   KH x K - K x HK == 0  -0.1928     0.4683  -0.412  0.68058   
   ---
   Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
   (Adjusted p values reported -- none method)

#diffdiff <- matrix(K[1,]+K[2,]-K[3,]-K[4,],1)
#summary(glht(sc.mix.mompid.betabin.admb, linfct=diffdiff))

custom.emmc <- function(levels) as.data.frame(t(K))
contrast(sxc.lsm, "custom", adjust="none")

    contrast          estimate        SE  df z.ratio p.value
    HK x K - K x HK -1.5277512 0.5858552 Inf  -2.608  0.0091
    KH x H - H x KH -0.4355096 0.4394330 Inf  -0.991  0.3217
    HK x H - H x HK  0.1702271 0.4896130 Inf   0.348  0.7281
    KH x K - K x HK -0.1927957 0.4683255 Inf  -0.412  0.6806
   
   Results are given on the log odds ratio (not the response) scale.

contrast(sxc.lsm, "custom", adjust="none", type="response")

    contrast        odds.ratio        SE  df z.ratio p.value
    HK x K / K x HK  0.2170232 0.1271441 Inf  -2.608  0.0091
    KH x H / H x KH  0.6469349 0.2842846 Inf  -0.991  0.3217
    HK x H / H x HK  1.1855741 0.5804726 Inf   0.348  0.7281
    KH x K / K x HK  0.8246504 0.3862049 Inf  -0.412  0.6806
   
   Tests are performed on the log odds ratio scale

(em_cont <- ggplot(as.data.frame(cld.mod[cld.mod$momdadcross %in% c("HK.K","K.HK","KH.H","H.KH","HK.H","H.HK","KH.K","K.KH"),]), aes(y=response, x=momcross, fill=dadcross)) +
  geom_col(position=position_dodge2()) +
  geom_linerange(aes(ymin=lSE, ymax=uSE), position=position_dodge2(0.9)) +
  labs(x="Maternal plant", y="Pollen viability") +
  geom_text(aes(label=dadcross, y=0.05), position=position_dodge2(0.9)) +
  scale_y_continuous(expand = expand_scale(mult=c(0,0)), breaks = scales::pretty_breaks(n = 5), limits=c(0,1)) +
  scale_fill_manual(values=c("grey80","gray60","gray20","gray40"))+
  theme_classic() + theme(legend.text=element_text(size=rel(1)), legend.position="bottom", axis.text = element_text(colour="black", size=rel(1)), text=element_text(size=14), axis.ticks.x = element_blank()) + guides(fill=F))

Pollen grains per flower

Distributions by fixed factors

(bothplot <- ggplot(pol, aes(y=tpf, x=dadcross, fill=dadcross)) + 
    facet_wrap(vars(momcross), nrow=1)+ ggtitle("Maternal parent") +
   geom_boxplot() + geom_point(aes(color=mompop)) + scale_color_discrete("Maternal population",na.value="grey20") +
  labs(x="Paternal parent", y="Pollen grains per flower") +
  scale_fill_manual("Paternal plant", values=c("grey90","gray70","gray50","gray30"))+
  theme_classic() + theme(legend.text=element_text(size=rel(1)), legend.position="bottom", axis.text = element_text(colour="black", size=rel(1)), text=element_text(size=14), axis.ticks.x = element_blank(), plot.title=element_text(hjust=0.5, size=rel(1)))+guides(fill=F))

Distributions by random factors

ggplot(aes(y=tpf, x=mompid, color=dadcross), data=pol) + geom_point() + coord_flip() + labs(x="Maternal plant", y="Pollen grains per flower")+theme_minimal()

Models

sc          <- lm(tpf~momdadcross, data=pol)
sc.mix.mompop         <- lmer(tpf~momdadcross + (1|mompop), data=pol)
sc.mix.momdadpop      <- lmer(tpf~momdadcross + (1|mompop) + (1|dadpop), data=pol)
sc.mix.mompid         <- lmer(tpf~momdadcross + (1|mompid), data=pol)
sc.mix.momdadpid      <- lmer(tpf~momdadcross + (1|mompid) + (1|dadpid), data=pol)

Model comparison

AIC

We will use the Aikake Information Criterion to pick the model the best fits the data, penalized by the number of parameters. Differences of 2 units are significant.

# AICtab(sc.b, sc.mix.mompop.b, sc.mix.mompid.b)
sc.names <- c("sc", "sc.mix.mompid","sc.mix.momdadpid", "sc.mix.mompop","sc.mix.momdadpop")
sc.list <- sapply(sc.names, get, USE.NAMES=T)
sc.AIC <- ICtab(sc.list,mnames=sc.names,type="AIC", base=T, delta=F) #  for AICc, nobs=nobs(sc.list[[1]])
class(sc.AIC)<-"data.frame"
all.names <- c(sc.names)
all.list <- sapply(all.names, get, USE.NAMES=T)
all.AIC <- dfun(rbind(sc.AIC))
all.AIC <- all.AIC[order(all.AIC$dAIC),]
kable(all.AIC, format.arg=list(digits=3))

	dAIC	df
sc.mix.momdadpid	0.00	17
sc.mix.mompid	2.69	16
sc.mix.mompop	6.89	16
sc.mix.momdadpop	7.08	17
sc	219.50	15

Overdispersion

Looking at the normal, fixed effects model, we see that the residuals are not normal:

shapiro.test(sc$residuals)# raw residuals!

   
    Shapiro-Wilk normality test
   
   data:  sc$residuals
   W = 0.99, p-value = 0.006

Coefficients

The coefficients estimated for each model agree qualitatively.

sc.log.names <- sc.names
sc.log <- sapply(sc.log.names, get, USE.NAMES=T)

coefplot2(sc.log, legend.x="topright",legend=T,legend.args=list(cex=0.8, xpd=T, inset=c(-0.1,0)), col.pts=sample(gg_color_hue(length(sc.log.names))), spacing=0.05, lwd.2=2, lwd.1=4, intercept=T)

Inference

Description

mod <- sc.mix.momdadpid
print(mod)

   Linear mixed model fit by REML ['lmerMod']
   Formula: tpf ~ momdadcross + (1 | mompid) + (1 | dadpid)
      Data: pol
   REML criterion at convergence: 5693.565
   Random effects:
    Groups   Name        Std.Dev.
    dadpid   (Intercept) 1119    
    mompid   (Intercept) 1118    
    Residual             3161    
   Number of obs: 310, groups:  dadpid, 84; mompid, 70
   Fixed Effects:
        (Intercept)   momdadcrossHK.H   momdadcrossKH.H    momdadcrossK.H  
            10411.6            -741.1           -3462.0           -1008.7  
    momdadcrossH.HK  momdadcrossHK.HK   momdadcrossK.HK   momdadcrossH.KH  
             -224.6           -2775.8            -448.1            -325.7  
   momdadcrossKH.KH   momdadcrossK.KH    momdadcrossH.K   momdadcrossHK.K  
            -4506.1            1731.6            1627.8           -1168.2  
    momdadcrossKH.K    momdadcrossK.K  
            -3071.8            1242.4

Model summary

The model estimated the following parameters, with individual parameter significance determined by the Wald z-test, and fixed effect significance determined by analysis of deviance Wald test.

summary(mod)

   Linear mixed model fit by REML ['lmerMod']
   Formula: tpf ~ momcross * dadcross + (1 | mompid) + (1 | dadpid)
      Data: pol
   
   REML criterion at convergence: 5694
   
   Scaled residuals: 
      Min     1Q Median     3Q    Max 
   -2.856 -0.556 -0.061  0.570  3.499 
   
   Random effects:
    Groups   Name        Variance Std.Dev.
    dadpid   (Intercept) 1251575  1119    
    mompid   (Intercept) 1250369  1118    
    Residual             9991798  3161    
   Number of obs: 310, groups:  dadpid, 84; mompid, 70
   
   Fixed effects:
                         Estimate Std. Error t value
   (Intercept)              10412        881   11.82
   momcrossHK                -741       1542   -0.48
   momcrossKH               -3462       1364   -2.54
   momcrossK                -1009       1026   -0.98
   dadcrossHK                -225       1360   -0.17
   dadcrossKH                -326       1272   -0.26
   dadcrossK                 1628        961    1.69
   momcrossHK:dadcrossHK    -1810       2296   -0.79
   momcrossK:dadcrossHK       785       1920    0.41
   momcrossKH:dadcrossKH     -718       1867   -0.38
   momcrossK:dadcrossKH      3066       1753    1.75
   momcrossHK:dadcrossK     -2055       2096   -0.98
   momcrossKH:dadcrossK     -1238       1708   -0.72
   momcrossK:dadcrossK        623       1086    0.57

   
   Correlation matrix not shown by default, as p = 14 > 12.
   Use print(x, correlation=TRUE)  or
       vcov(x)        if you need it

   fit warnings:
   fixed-effect model matrix is rank deficient so dropping 2 columns / coefficients

Least square means

The least square means procedure can generate predictor estimates of each type, and give their significance groupings with a post-hoc Tukey test.

# Count
rg <- ref_grid(mod)
# summary(rg)
sxc.lsm <- emmeans(rg, ~ momdadcross)
plot(sxc.lsm)

cld.mod <- cld(sxc.lsm, Letters=letters) # tukey letterings
cld.mod[c(15,16),] <- NA
cld.mod$momdadcross <- as.character(cld.mod$momdadcross)
cld.mod[c(15,16),1] <- c("HK.KH","KH.HK")
cld.mod$momdadcross <- as.factor(cld.mod$momdadcross)
cld.mod <- cbind(cld.mod, lapply(read.table(text = as.character(cld.mod$momdadcross), sep = ".", col.names=c("momcross","dadcross")), factor, levels=levels(pol$momcross)))
options(digits=4)
cld.mod[rev(order(cld.mod$momcross, cld.mod$dadcross)),]

      momdadcross emmean     SE     df lower.CL upper.CL .group momcross
   14         K.K  11654  680.0  40.18    10280    13028      c        K
   10        K.KH  12143 1137.4 219.03     9902    14385     bc        K
   7         K.HK   9963 1334.0 250.74     7336    12591    abc        K
   4          K.H   9403  806.4  33.93     7764    11042    abc        K
   13        KH.K   7340 1039.2 148.02     5286     9393    ab        KH
   9        KH.KH   5905  979.0 247.88     3977     7834    a         KH
   16       KH.HK     NA     NA     NA       NA       NA   <NA>       KH
   3         KH.H   6950 1052.4  57.22     4842     9057    ab        KH
   12        HK.K   9243 1464.7 216.68     6357    12130    abc       HK
   15       HK.KH     NA     NA     NA       NA       NA   <NA>       HK
   6        HK.HK   7636 1441.1 247.99     4797    10474    abc       HK
   2         HK.H   9671 1275.9 135.42     7147    12194    abc       HK
   11         H.K  12039  738.0  31.34    10535    13544      c        H
   8         H.KH  10086  930.2  76.71     8234    11938    abc        H
   5         H.HK  10187 1041.1 171.14     8132    12242    abc        H
   1          H.H  10412  890.2  47.61     8621    12202    abc        H
      dadcross
   14        K
   10       KH
   7        HK
   4         H
   13        K
   9        KH
   16       HK
   3         H
   12        K
   15       KH
   6        HK
   2         H
   11        K
   8        KH
   5        HK
   1         H

ggplot(as.data.frame(cld.mod), aes(y=emmean, x=momcross, fill=dadcross)) +
  geom_col(position=position_dodge2()) +
  geom_linerange(aes(ymin=emmean-SE, ymax=emmean+SE), position=position_dodge2(0.9)) +
  labs(x="Maternal plant", y="Pollen grains per flower") +
  geom_text(aes(label=trimws(.group), x=momcross, hjust=length(trimws(.group))/30), position=position_dodge2(0.9), vjust=-3) + 
  geom_text(aes(label=dadcross, y=300), position=position_dodge2(0.9)) +
  scale_y_continuous(expand = expand_scale(mult=c(0,0.05))) +
  scale_fill_manual(values=c("grey80","gray60","gray20","gray40"))+
  theme_classic() + theme(legend.text=element_text(size=rel(1)), legend.position="bottom", axis.text = element_text(colour="black", size=rel(1)), text=element_text(size=14), axis.ticks.x = element_blank()) + guides(fill=F)

Contrasts

Expect difference:

HK x K - K x HK
KH x H - H x KH

No difference:

HK x H - H x HK
KH x K - K x HK

K = matrix(rep(0,14*4), 4)
colnames(K) <- substr(names(coef(summary(mod))[,1]),12,16)
K[1,c("HK.K","K.HK")] <- K[2,c("KH.H","H.KH")] <- K[3,c("HK.H","H.HK")] <- K[4,c("KH.K","K.KH")] <- c(1, -1)
rownames(K) <- c("HK x K - K x HK","KH x H - H x KH","HK x H - H x HK","KH x K - K x HK")
library(multcomp)
summary(glht(mod, linfct=K),test=adjusted("none"))

   
     Simultaneous Tests for General Linear Hypotheses
   
   Fit: lmer(formula = tpf ~ momdadcross + (1 | mompid) + (1 | dadpid), 
       data = pol)
   
   Linear Hypotheses:
                        Estimate Std. Error z value Pr(>|z|)   
   HK x K - K x HK == 0     -720       1976   -0.36   0.7156   
   KH x H - H x KH == 0    -3136       1388   -2.26   0.0239 * 
   HK x H - H x HK == 0     -516       1635   -0.32   0.7522   
   KH x K - K x HK == 0    -4803       1535   -3.13   0.0018 **
   ---
   Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
   (Adjusted p values reported -- none method)

#diffdiff <- matrix(K[1,]+K[2,]-K[3,]-K[4,],1)
#summary(glht(sc.mix.mompid.betabin.admb, linfct=diffdiff))

custom.emmc <- function(levels) as.data.frame(t(K))
contrast(sxc.lsm, "custom", adjust="none")

    contrast        estimate   SE     df t.ratio p.value
    HK x K - K x HK   -720.1 1981 233.55  -0.363  0.7166
    KH x H - H x KH  -3136.3 1405  88.31  -2.233  0.0281
    HK x H - H x HK   -516.4 1647 158.30  -0.314  0.7543
    KH x K - K x HK  -4803.4 1541 195.72  -3.117  0.0021

contrast(sxc.lsm, "custom", adjust="none", type="response")

    contrast        estimate   SE     df t.ratio p.value
    HK x K - K x HK   -720.1 1981 233.55  -0.363  0.7166
    KH x H - H x KH  -3136.3 1405  88.31  -2.233  0.0281
    HK x H - H x HK   -516.4 1647 158.30  -0.314  0.7543
    KH x K - K x HK  -4803.4 1541 195.72  -3.117  0.0021

ggplot(as.data.frame(cld.mod[cld.mod$momdadcross %in% c("HK.K","K.HK","KH.H","H.KH","HK.H","H.HK","KH.K","K.KH"),]), aes(y=emmean, x=momcross, fill=dadcross)) +
  geom_col(position=position_dodge2()) +
  geom_linerange(aes(ymin=emmean-SE, ymax=emmean+SE), position=position_dodge2(0.9)) +
  labs(x="Maternal plant", y="Pollen grains per flower") +
  geom_text(aes(label=dadcross, y=300), position=position_dodge2(0.9)) +
  scale_y_continuous(expand = expand_scale(mult=c(0,0.05))) +
  scale_fill_manual(values=c("grey80","gray60","gray20","gray40"))+
  theme_classic() + theme(legend.text=element_text(size=rel(1)), legend.position="bottom", axis.text = element_text(colour="black", size=rel(1)), text=element_text(size=14), axis.ticks.x = element_blank()) + guides(fill=F)

Viable pollen grains per flower

Distributions by fixed factors

(bothplot <- ggplot(pol, aes(y=vpf, x=dadcross, fill=dadcross)) + 
    facet_wrap(vars(momcross), nrow=1)+ ggtitle("Maternal parent") +
   geom_boxplot() + geom_point(aes(color=mompop)) + scale_color_discrete("Maternal population",na.value="grey20") +
  labs(x="Paternal parent", y="Viable pollen grains per flower") +
  scale_fill_manual("Paternal plant", values=c("grey90","gray70","gray50","gray30"))+
  theme_classic() + theme(legend.text=element_text(size=rel(1)), legend.position="bottom", axis.text = element_text(colour="black", size=rel(1)), text=element_text(size=14), axis.ticks.x = element_blank(), plot.title=element_text(hjust=0.5, size=rel(1)))+guides(fill=F))

Distributions by random factors

ggplot(aes(y=vpf, x=mompid, color=dadcross), data=pol) + geom_point() + coord_flip() + labs(x="Maternal plant", y="Viable pollen grains per flower")+theme_minimal()

Models

sc          <- lm(vpf~momdadcross, data=pol)
sc.mix.mompop         <- lmer(vpf~momdadcross + (1|mompop), data=pol)
sc.mix.momdadpop      <- lmer(vpf~momdadcross + (1|mompop) + (1|dadpop), data=pol)
sc.mix.mompid         <- lmer(vpf~momdadcross + (1|mompid), data=pol)
sc.mix.momdadpid      <- lmer(vpf~momdadcross + (1|mompid) + (1|dadpid), data=pol)

Model comparison

AIC

We will use the Aikake Information Criterion to pick the model the best fits the data, penalized by the number of parameters. Differences of 2 units are significant.

# AICtab(sc.b, sc.mix.mompop.b, sc.mix.mompid.b)
sc.names <- c("sc", "sc.mix.mompid","sc.mix.momdadpid", "sc.mix.mompop","sc.mix.momdadpop")
sc.list <- sapply(sc.names, get, USE.NAMES=T)
sc.AIC <- ICtab(sc.list,mnames=sc.names,type="AIC", base=T, delta=F) #  for AICc, nobs=nobs(sc.list[[1]])
class(sc.AIC)<-"data.frame"
all.names <- c(sc.names)
all.list <- sapply(all.names, get, USE.NAMES=T)
all.AIC <- dfun(rbind(sc.AIC))
all.AIC <- all.AIC[order(all.AIC$dAIC),]
kable(all.AIC, format.arg=list(digits=3))

	dAIC	df
sc.mix.momdadpop	0.00	17
sc.mix.mompop	2.08	16
sc.mix.momdadpid	6.92	17
sc.mix.mompid	7.63	16
sc	224.43	15

Overdispersion

Looking at the normal, fixed effects model, we see that the residuals are not normal:

shapiro.test(sc$residuals)# raw residuals!

   
    Shapiro-Wilk normality test
   
   data:  sc$residuals
   W = 0.97756, p-value = 8.94e-05

Coefficients

The coefficients estimated for each model agree qualitatively.

sc.log.names <- sc.names
sc.log <- sapply(sc.log.names, get, USE.NAMES=T)

coefplot2(sc.log, legend.x="topright",legend=T,legend.args=list(cex=0.8, xpd=T, inset=c(-0.1,0)), col.pts=sample(gg_color_hue(length(sc.log.names))), spacing=0.05, lwd.2=2, lwd.1=4, intercept=T)

Inference

Description

mod_vpf  <- sc
print(mod_vpf)

   
   Call:
   lm(formula = vpf ~ momdadcross, data = pol)
   
   Coefficients:
        (Intercept)   momdadcrossHK.H   momdadcrossKH.H    momdadcrossK.H  
               9570             -3581             -6166             -5605  
    momdadcrossH.HK  momdadcrossHK.HK   momdadcrossK.HK   momdadcrossH.KH  
              -4244             -6233             -2634             -3305  
   momdadcrossKH.KH   momdadcrossK.KH    momdadcrossH.K   momdadcrossHK.K  
              -6111             -3193             -3570             -6696  
    momdadcrossKH.K    momdadcrossK.K  
              -5835               317

Model summary

The model estimated the following parameters, with individual parameter significance determined by the Wald z-test, and fixed effect significance determined by analysis of deviance Wald test.

summary(mod_vpf)

   
   Call:
   lm(formula = vpf ~ momdadcross, data = pol)
   
   Residuals:
      Min     1Q Median     3Q    Max 
    -7976  -2450   -361   2074  13717 
   
   Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
   (Intercept)          9570        626   15.28  < 2e-16 ***
   momdadcrossHK.H     -3581       1303   -2.75  0.00638 ** 
   momdadcrossKH.H     -6166       1062   -5.81  1.6e-08 ***
   momdadcrossK.H      -5605        795   -7.05  1.3e-11 ***
   momdadcrossH.HK     -4244       1139   -3.73  0.00023 ***
   momdadcrossHK.HK    -6234       1534   -4.06  6.2e-05 ***
   momdadcrossK.HK     -2634       1440   -1.83  0.06826 .  
   momdadcrossH.KH     -3305        990   -3.34  0.00095 ***
   momdadcrossKH.KH    -6111       1139   -5.37  1.6e-07 ***
   momdadcrossK.KH     -3193       1252   -2.55  0.01128 *  
   momdadcrossH.K      -3570        765   -4.67  4.6e-06 ***
   momdadcrossHK.K     -6696       1534   -4.37  1.8e-05 ***
   momdadcrossKH.K     -5836       1139   -5.12  5.4e-07 ***
   momdadcrossK.K        317        774    0.41  0.68229    
   ---
   Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
   
   Residual standard error: 3430 on 296 degrees of freedom
   Multiple R-squared:  0.34,   Adjusted R-squared:  0.311 
   F-statistic: 11.7 on 13 and 296 DF,  p-value: <2e-16

Least square means

The least square means procedure can generate predictor estimates of each type, and give their significance groupings with a post-hoc Tukey test.

# Count
rg <- ref_grid(mod_vpf)
# summary(rg)
sxc.lsm <- emmeans(rg, ~ momdadcross)
plot(sxc.lsm)

cld.mod <- cld(sxc.lsm, Letters=letters) # tukey letterings
cld.mod[c(15,16),] <- NA
cld.mod$momdadcross <- as.character(cld.mod$momdadcross)
cld.mod[c(15,16),1] <- c("HK.KH","KH.HK")
cld.mod$momdadcross <- as.factor(cld.mod$momdadcross)
cld.mod <- cbind(cld.mod, lapply(read.table(text = as.character(cld.mod$momdadcross), sep = ".", col.names=c("momcross","dadcross")), factor, levels=levels(pol$momcross)))
options(digits=4)
cld.mod[rev(order(cld.mod$momcross, cld.mod$dadcross)),]

      momdadcross emmean     SE  df lower.CL upper.CL .group momcross
   14         K.K   9887  454.3 296   8993.1    10781      c        K
   10        K.KH   6377 1084.5 296   4242.8     8511    abc        K
   7         K.HK   6936 1296.2 296   4384.8     9487    abc        K
   4          K.H   3965  489.9 296   3000.9     4929    a          K
   13        KH.K   3735  951.2 296   1862.7     5607    a         KH
   9        KH.KH   3459  951.2 296   1587.6     5331    a         KH
   16       KH.HK     NA     NA  NA       NA       NA   <NA>       KH
   3         KH.H   3404  857.4 296   1716.5     5091    a         KH
   12        HK.K   2874 1400.1 296    118.4     5629    a         HK
   15       HK.KH     NA     NA  NA       NA       NA   <NA>       HK
   6        HK.HK   3337 1400.1 296    581.3     6092    a         HK
   2         HK.H   5990 1143.2 296   3739.7     8239    abc       HK
   11         H.K   6000  439.1 296   5136.0     6864    a          H
   8         H.KH   6265  766.9 296   4755.7     7774    ab         H
   5         H.HK   5326  951.2 296   3454.0     7198    a          H
   1          H.H   9570  626.1 296   8337.9    10802     bc        H
      dadcross
   14        K
   10       KH
   7        HK
   4         H
   13        K
   9        KH
   16       HK
   3         H
   12        K
   15       KH
   6        HK
   2         H
   11        K
   8        KH
   5        HK
   1         H

ggplot(as.data.frame(cld.mod), aes(y=emmean, x=momcross, fill=dadcross)) +
  geom_col(position=position_dodge2()) +
  geom_linerange(aes(ymin=emmean-SE, ymax=emmean+SE), position=position_dodge2(0.9)) +
  labs(x="Maternal plant", y="Viable pollen grains per flower") +
  geom_text(aes(label=trimws(.group), x=momcross, hjust=length(trimws(.group))/30), position=position_dodge2(0.9), vjust=-3) + 
  geom_text(aes(label=dadcross, y=300), position=position_dodge2(0.9)) +
  scale_y_continuous(expand = expand_scale(mult=c(0,0.05))) +
  scale_fill_manual(values=c("grey80","gray60","gray20","gray40"))+
  theme_classic() + theme(legend.text=element_text(size=rel(1)), legend.position="bottom", axis.text = element_text(colour="black", size=rel(1)), text=element_text(size=14), axis.ticks.x = element_blank()) + 
  guides(fill=F)

Contrasts

Expect difference:

HK x K - K x HK
KH x H - H x KH

No difference:

HK x H - H x HK
KH x K - K x HK

K = matrix(rep(0,14*4), 4)
colnames(K) <- substr(names(coef(mod_vpf)),12,16)
K[1,c("HK.K","K.HK")] <- K[2,c("KH.H","H.KH")] <- K[3,c("HK.H","H.HK")] <- K[4,c("KH.K","K.KH")] <- c(1, -1)
rownames(K) <- c("HK x K - K x HK","KH x H - H x KH","HK x H - H x HK","KH x K - K x HK")
library(multcomp)
summary(glht(mod_vpf, linfct=K),test=adjusted("none"))

   
     Simultaneous Tests for General Linear Hypotheses
   
   Fit: lm(formula = vpf ~ momdadcross, data = pol)
   
   Linear Hypotheses:
                        Estimate Std. Error t value Pr(>|t|)  
   HK x K - K x HK == 0    -4062       1908   -2.13    0.034 *
   KH x H - H x KH == 0    -2861       1150   -2.49    0.013 *
   HK x H - H x HK == 0      664       1487    0.45    0.656  
   KH x K - K x HK == 0    -2642       1442   -1.83    0.068 .
   ---
   Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
   (Adjusted p values reported -- none method)

#diffdiff <- matrix(K[1,]+K[2,]-K[3,]-K[4,],1)
#summary(glht(sc.mix.mompid.betabin.admb, linfct=diffdiff))

custom.emmc <- function(levels) as.data.frame(t(K))
contrast(sxc.lsm, "custom", adjust="none")

    contrast        estimate   SE  df t.ratio p.value
    HK x K - K x HK  -4062.0 1908 296  -2.129  0.0341
    KH x H - H x KH  -2861.1 1150 296  -2.487  0.0134
    HK x H - H x HK    663.6 1487 296   0.446  0.6558
    KH x K - K x HK  -2642.5 1443 296  -1.832  0.0680

ggplot(as.data.frame(cld.mod[cld.mod$momdadcross %in% c("HK.K","K.HK","KH.H","H.KH","HK.H","H.HK","KH.K","K.KH"),]), aes(y=emmean, x=momcross, fill=dadcross)) +
  geom_col(position=position_dodge2()) +
  geom_linerange(aes(ymin=emmean-SE, ymax=emmean+SE), position=position_dodge2(0.9)) +
  labs(x="Maternal plant", y="Viable pollen grains per flower") +
  geom_text(aes(label=dadcross, y=300), position=position_dodge2(0.9)) +
  scale_y_continuous(expand = expand_scale(mult=c(0,0))) +
  scale_fill_manual(values=c("grey80","gray60","gray20","gray40"))+
  theme_classic() + theme(legend.text=element_text(size=rel(1)), legend.position="bottom", axis.text = element_text(colour="black", size=rel(1)), text=element_text(size=14), axis.ticks.x = element_blank()) + 
  guides(fill=F)

F2 Pollen GLMM

John Powers

Sept 10, 2018

Purpose

Data Import

Viable percentage versus total pollen

Pollen viability

Distributions by random factors

Distributions by fixed factors

Models

Model comparison

AIC

Overdispersion

Coefficients

Inference

Description

Model summary

Estimated marginal means

Contrasts

Pollen grains per flower

Distributions by fixed factors

Distributions by random factors

Models

Model comparison

AIC

Overdispersion

Coefficients

Inference

Description

Model summary

Least square means

Contrasts

Viable pollen grains per flower

Distributions by fixed factors

Distributions by random factors

Models

Model comparison

AIC

Overdispersion

Coefficients

Inference

Description

Model summary

Least square means

Contrasts