Modified from hybridViability.Rmd (Oct 2017)
Identify reproductive barriers between two sympatric moth-pollinated plant species, Schiedea kaalae and S. hookeri by fitting a generalized linear mixed model (GLMM).
In this analysis the response variable is the pollen viability of each cross. Other barriers (hybrid survival, flowering) could be analyzed in a similar framework, with appropriate changes to the underlying distribution.
setwd("~/MyDocs/Dropbox/UCI/Schiedea/Analysis/HybridSeeds")
pol <- read.table("f2pollen.csv", header=T, sep="\t", colClasses=c(date="Date"))
pol <- pol[!is.na(pol$V1) & pol$dadfullcross!="9999",]
pol$v <- rowSums(pol[,grepl("V", colnames(pol))])/10 # average viable counts
pol$i <- rowSums(pol[,grepl("I", colnames(pol))])/10 # average inviable counts
pol$vpf <- (pol$v * 444.4/20) * 10 # number viable per flower (5 flrs)
pol$ipf <- (pol$i * 444.4/20) * 10 # number inviable per flower (5 flrs)
pol$vp <- pol$vpf / (pol$vpf + pol$ipf) # proportion viable
pol$tpf <- pol$vpf + pol$ipf
pol$t <- 10*(pol$v + pol$i)
polm2 <- pol[grepl("x",pol$momfullcross),]
polm2.split <- cbind(polm2$crossid, as.data.frame(matrix(unlist(matrix(lapply(matrix(unlist(strsplit(as.character(polm2$momfullcross), " x ", fixed=T)), nrow=2), function(x) { spl <- unlist(strsplit(unlist(strsplit(x, " ", fixed=T)), "-", fixed=T)); c(spl[1],gsub("-NA","",paste(spl[2],spl[3],spl[4], sep="-"))) }), ncol=2)), ncol=4, byrow=T)))
colnames(polm2.split) <- c("crossid","mommompop","mommomid","momdadpop","momdadid")
pold2 <- pol[grepl("x",pol$dadfullcross),]
pold2.split <- cbind(pold2$crossid, as.data.frame(matrix(unlist(matrix(lapply(matrix(unlist(strsplit(as.character(pold2$dadfullcross), " x ", fixed=T)), nrow=2), function(x) { spl <- unlist(strsplit(unlist(strsplit(x, " ", fixed=T)), "-", fixed=T)); c(spl[1],gsub("-NA","",paste(spl[2],spl[3],spl[4], sep="-"))) }), ncol=2)), ncol=4, byrow=T)))
colnames(pold2.split) <- c("crossid","dadmompop","dadmomid","daddadpop","daddadid")
polm1 <- pol[!grepl("x",pol$momfullcross),]
polm1.split <- cbind(polm1$crossid, as.data.frame(matrix(unlist(matrix(lapply(as.character(polm1$momfullcross), function(x) { spl <- unlist(strsplit(unlist(strsplit(x, " ", fixed=T)), "-", fixed=T)); c(spl[1],gsub("-NA","",paste(spl[2],spl[3],spl[4], sep="-"))) }), ncol=1)), ncol=2, byrow=T)))
colnames(polm1.split) <- c("crossid","mompop","momid")
pold1 <- pol[!grepl("x",pol$dadfullcross),]
pold1.split <- cbind(pold1$crossid, as.data.frame(matrix(unlist(matrix(lapply(as.character(pold1$dadfullcross), function(x) { spl <- unlist(strsplit(unlist(strsplit(x, " ", fixed=T)), "-", fixed=T)); c(spl[1],gsub("-NA","",paste(spl[2],spl[3],spl[4], sep="-"))) }), ncol=1)), ncol=2, byrow=T)))
colnames(pold1.split) <- c("crossid","dadpop","dadid")
pol <- merge(pol, polm2.split, by="crossid", all.x=T)
pol <- merge(pol, pold2.split, by="crossid", all.x=T)
pol <- merge(pol, polm1.split, by="crossid", all.x=T)
pol <- merge(pol, pold1.split, by="crossid", all.x=T)
popcols <- c("mompop","dadpop","mommompop","momdadpop","dadmompop","daddadpop")
pol[popcols] <- lapply(pol[popcols], function(x) { sapply(x, mapvalues, from = c("794","866","899","879","892","904","3587"), to = c("WK","WK","WK","879WKG","892WKG","904WPG","3587WP"), warn_missing=F) })
kaalpops <- c("904WPG","3587WP", "892WKG")
pop2sp <- function(x) { factor(ifelse(x %in% kaalpops, "kaal", ifelse(is.na(x),NA,"hook")))}
pol$mommomspecies <- pop2sp(pol$mommompop)
pol$momdadspecies <- pop2sp(pol$momdadpop)
pol$dadmomspecies <- pop2sp(pol$dadmompop)
pol$daddadspecies <- pop2sp(pol$daddadpop)
pol$momcross <- factor(toupper(paste0(substr(pol$mommomspecies,0,1), substr(pol$momdadspecies,0,1))))
pol$dadcross <- factor(toupper(paste0(substr(pol$dadmomspecies,0,1), substr(pol$daddadspecies,0,1))))
pol$momcross <- factor(ifelse(pol$momcross=="NANA", toupper(substr(as.character(pop2sp(pol$mompop)),0,1)), as.character(pol$momcross)))
pol$dadcross <- factor(ifelse(pol$dadcross=="NANA", toupper(substr(as.character(pop2sp(pol$dadpop)),0,1)), as.character(pol$dadcross)))
# is.na(pol$dadcross) <- is.na(pol$momcross) <- pol$crosstype=="control"
pol$dadcross <- factor(pol$dadcross, c("H", "HK", "KH", "K"))
pol$momcross <- factor(pol$momcross, c("H", "HK", "KH", "K"))
pol$crosstype <- with(pol, factor(paste(momcross, dadcross, sep="x")))
pol$species <- factor(ifelse(pol$mompop %in% kaalpops, "kaal", "hook"))
pol$dadsp <- factor(ifelse(pol$dadpop %in% kaalpops, "kaal", "hook"))
pol <- within(pol, smompop <- as.factor(paste(species,mompop,sep="")))
pol <- within(pol, mompid <- as.factor(paste(mompop,momid,sep=".")))
pol <- within(pol, dadpid <- as.factor(paste(dadpop,dadid,sep=".")))
pol$mompop <- as.character(pol$mompop)
pol$mompop[is.na(pol$mompop)] <- as.character(pol$momcross[is.na(pol$mompop)])
pol$mompop <- as.factor(pol$mompop)
pol$dadpop <- as.character(pol$dadpop)
pol$dadpop[is.na(pol$dadpop)] <- as.character(pol$dadcross[is.na(pol$dadpop)])
pol$dadpop <- as.factor(pol$dadpop)
# check final structure
str(pol)
'data.frame': 113 obs. of 55 variables:
$ crossid : Factor w/ 113 levels "101-1","110-9",..: 1 2 3 4 5 6 7 8 9 10 ...
$ date : Date, format: "18-07-23" "18-07-16" ...
$ momfullcross : Factor w/ 49 levels "3587-12","3587-14",..: 28 29 30 31 31 32 33 4 33 34 ...
$ dadfullcross : Factor w/ 66 levels "3587-12 4/3/13",..: 12 3 19 29 38 41 20 5 42 21 ...
$ V1 : int 32 24 26 19 20 21 6 12 84 3 ...
$ I1 : int 9 5 7 1 23 10 39 30 10 2 ...
$ V2 : int 75 19 40 25 25 26 7 14 90 1 ...
$ I2 : int 12 10 5 5 33 18 34 23 6 6 ...
$ V3 : int 67 22 37 22 18 24 7 11 82 0 ...
$ I3 : int 7 11 9 0 3 25 40 37 9 2 ...
$ V4 : int 46 29 32 32 19 18 4 10 82 4 ...
$ I4 : int 11 11 6 4 40 20 33 42 9 4 ...
$ V5 : int 66 20 29 14 21 24 3 12 70 2 ...
$ I5 : int 7 10 3 9 23 24 29 34 3 2 ...
$ V6 : int 74 26 33 15 23 13 7 13 68 2 ...
$ I6 : int 8 13 8 2 27 15 26 36 2 7 ...
$ V7 : int 62 23 26 12 22 25 7 19 65 1 ...
$ I7 : int 8 21 9 3 24 13 41 32 10 4 ...
$ V8 : int 52 22 31 26 26 19 3 13 66 3 ...
$ I8 : int 11 16 10 5 30 14 28 44 4 8 ...
$ V9 : int 61 22 30 23 27 19 4 17 94 1 ...
$ I9 : int 10 19 5 6 43 15 34 39 3 6 ...
$ V10 : int 53 24 29 22 32 26 4 20 72 4 ...
$ I10 : int 6 14 7 2 39 23 23 40 6 10 ...
$ v : num 58.8 23.1 31.3 21 23.3 21.5 5.2 14.1 77.3 2.1 ...
$ i : num 8.9 13 6.9 3.7 28.5 17.7 32.7 35.7 6.2 5.1 ...
$ vpf : num 13065 5133 6955 4666 5177 ...
$ ipf : num 1978 2889 1533 822 6333 ...
$ vp : num 0.869 0.64 0.819 0.85 0.45 ...
$ tpf : num 15043 8021 8488 5488 11510 ...
$ t : num 677 361 382 247 518 392 379 498 835 72 ...
$ mommompop : Factor w/ 4 levels "3587WP","879WKG",..: 2 NA NA 3 3 NA 3 NA 3 3 ...
$ mommomid : Factor w/ 12 levels "1","10","10-1",..: 12 NA NA 1 1 NA 2 NA 2 7 ...
$ momdadpop : Factor w/ 3 levels "879WKG","892WKG",..: 3 NA NA 1 1 NA 1 NA 1 1 ...
$ momdadid : Factor w/ 14 levels "1-ID-1","1-ID-9",..: 11 NA NA 5 5 NA 3 NA 3 3 ...
$ dadmompop : Factor w/ 4 levels "3587WP","879WKG",..: NA 1 2 NA NA 3 NA 1 3 NA ...
$ dadmomid : Factor w/ 12 levels "1","10","10-1",..: NA 4 3 NA NA 7 NA 4 7 NA ...
$ daddadpop : Factor w/ 3 levels "879WKG","892WKG",..: NA 1 3 NA NA 1 NA 1 1 NA ...
$ daddadid : Factor w/ 22 levels "1-ID-1","10-1-ID",..: NA 2 18 NA NA 2 NA 20 5 NA ...
$ mompop : Factor w/ 6 levels "3587WP","879WKG",..: 5 2 3 6 6 3 6 1 6 6 ...
$ momid : Factor w/ 21 levels "1","10","10-1",..: NA 21 1 NA NA 2 NA 18 NA NA ...
$ dadpop : Factor w/ 7 levels "3587WP","879WKG",..: 7 6 5 2 3 6 2 6 6 2 ...
$ dadid : Factor w/ 16 levels "1","10","10-1",..: 7 NA NA 16 4 NA 8 NA NA 9 ...
$ mommomspecies: Factor w/ 2 levels "hook","kaal": 1 NA NA 2 2 NA 2 NA 2 2 ...
$ momdadspecies: Factor w/ 2 levels "hook","kaal": 2 NA NA 1 1 NA 1 NA 1 1 ...
$ dadmomspecies: Factor w/ 2 levels "hook","kaal": NA 2 1 NA NA 2 NA 2 2 NA ...
$ daddadspecies: Factor w/ 2 levels "hook","kaal": NA 1 2 NA NA 1 NA 1 1 NA ...
$ momcross : Factor w/ 4 levels "H","HK","KH",..: 2 1 4 3 3 4 3 4 3 3 ...
$ dadcross : Factor w/ 4 levels "H","HK","KH",..: 1 3 2 1 4 3 1 3 3 1 ...
$ crosstype : Factor w/ 10 levels "HKxH","HKxHK",..: 1 5 9 6 7 10 6 10 8 6 ...
$ species : Factor w/ 2 levels "hook","kaal": 1 1 2 1 1 2 1 2 1 1 ...
$ dadsp : Factor w/ 2 levels "hook","kaal": 1 1 1 1 2 1 1 1 1 1 ...
$ smompop : Factor w/ 5 levels "hook879WKG","hookNA",..: 2 1 4 2 2 4 2 3 2 2 ...
$ mompid : Factor w/ 23 levels "3587WP.12","3587WP.14",..: 23 14 15 23 23 16 23 3 23 23 ...
$ dadpid : Factor w/ 18 levels "3587WP.12-4/3/13",..: 18 17 17 7 10 17 5 17 17 6 ...
load("hybridpollen.Rdata")
hybpol$momcross <- factor(substr(as.character(hybpol$cross),1,1))
hybpol$dadcross <- factor(substr(as.character(hybpol$cross),2,2))
pol$mompid <- pol$momfullcross
pol$dadpid <- pol$dadfullcross
merging <- intersect(colnames(hybpol), colnames(pol))#c("date", "v", "i", "vp", "tpf", "t", "momcross","dadcross","mompid","dadpid", "mompop","dadpop","crosstype")
bothpol <- rbind(pol[, merging], hybpol[, merging])
#with(bothpol, plot(vp~interaction(momcross,dadcross), las=2))
#with(bothpol, plot(t~interaction(momcross,dadcross), las=2))
table(bothpol$momcross, bothpol$dadcross)
H HK KH K
H 30 13 20 61
HK 9 6 0 6
KH 16 0 13 13
K 49 7 10 57
pol <- bothpol
pol$v <- pol$v*10
pol$i <- pol$i*10
pol$momdadcross <- with(pol, droplevels(interaction(momcross, dadcross)))
ggplot(pol, aes(x=tpf,y=vp)) + geom_smooth() + geom_point(aes(color=interaction(momcross,dadcross, sep=" x "))) + scale_color_discrete("Cross") + labs(x="Pollen grains per flower", y="Proportion viable")+theme_minimal()
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
summary(lm(vp~tpf, data=pol))
Call:
lm(formula = vp ~ tpf, data = pol)
Residuals:
Min 1Q Median 3Q Max
-0.7437 -0.1887 0.0092 0.1957 0.4812
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.219e-01 4.272e-02 7.534 5.51e-13 ***
tpf 2.487e-05 3.888e-06 6.396 5.90e-10 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.2646 on 308 degrees of freedom
Multiple R-squared: 0.1173, Adjusted R-squared: 0.1144
F-statistic: 40.91 on 1 and 308 DF, p-value: 5.905e-10
#Viable percentage versus viable
ggplot(pol, aes(x=vp,y=vpf)) + geom_smooth() + geom_point(aes(color=interaction(momcross,dadcross, sep=" x "))) + scale_color_discrete("Cross") + labs(x="Proportion viable", y="Viable pollen grains per flower")+theme_minimal()
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
summary(lm(vpf~vp, data=pol))
Call:
lm(formula = vpf ~ vp, data = pol)
Residuals:
Min 1Q Median 3Q Max
-7653.7 -1192.8 -27.8 980.5 6474.7
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -970.4 276.0 -3.516 0.000504 ***
vp 12607.9 429.7 29.340 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2124 on 308 degrees of freedom
Multiple R-squared: 0.7365, Adjusted R-squared: 0.7356
F-statistic: 860.8 on 1 and 308 DF, p-value: < 2.2e-16
ggplot(aes(y=vp, x=mompid, color=dadcross), data=pol) + geom_point() + coord_flip() + labs(x="Maternal plant", y="Proportion viable")+theme_minimal()
(bothplot <- ggplot(pol, aes(y=vp, x=dadcross, fill=dadcross)) +
facet_wrap(vars(momcross), nrow=1)+ ggtitle("Maternal parent") +
geom_boxplot() + geom_point(aes(color=mompop)) + scale_color_discrete("Maternal population",na.value="grey20") +
labs(x="Paternal parent", y="Pollen viability") +
scale_y_continuous(expand = expand_scale(mult=c(0,0)), breaks = scales::pretty_breaks(n = 5), limits=c(0,1)) +
scale_fill_manual("Paternal plant", values=c("grey90","gray70","gray50","gray30"))+
theme_classic() + theme(legend.text=element_text(size=rel(1)), legend.position="bottom", axis.text = element_text(colour="black", size=rel(1)), text=element_text(size=14), axis.ticks.x = element_blank(), plot.title=element_text(hjust=0.5, size=rel(1)))+guides(fill=F))
ggsave("pollenviability.png", bothplot, width=7, height=6)
sc.bin <- glm(vp~momcross*dadcross, data=pol, family="binomial", weights=pol$t)
sc.mix.mompop.bin <- glmer(vp~momcross*dadcross + (1|mompop), data=pol, family="binomial", weights=pol$t)
fixed-effect model matrix is rank deficient so dropping 2 columns / coefficients
sc.mix.momdadpop.bin <- glmer(vp~momcross*dadcross + (1|mompop) + (1|dadpop), data=pol, family="binomial", weights=pol$t)
fixed-effect model matrix is rank deficient so dropping 2 columns / coefficients
sc.mix.mompid.bin <- glmer(vp~momcross*dadcross + (1|mompid), data=pol, family="binomial", weights=pol$t)
fixed-effect model matrix is rank deficient so dropping 2 columns / coefficients
sc.mix.momdadpid.bin <- glmer(vp~momcross*dadcross + (1|mompid) + (1|dadpid), data=pol, family="binomial", weights=pol$t)
fixed-effect model matrix is rank deficient so dropping 2 columns / coefficients
#Observation-level random effect for overdispers
pol$line <- 1:nrow(pol)
sc.mix.momdadpop.olre.bin <- glmer(vp~momcross*dadcross + (1|mompop) + (1|dadpop) +(1|line), data=pol, family="binomial", weights=pol$t)
fixed-effect model matrix is rank deficient so dropping 2 columns / coefficients
library(hglm)
Loading required package: hglm.data
hglm: Hierarchical Generalized Linear Models
Version 2.2-0 (2018-08-24) installed
Authors: Moudud Alam, Lars Ronnegard, Xia Shen
Maintainer: Xia Shen <xia.shen@ki.se>
Use citation("hglm") to know how to cite our work.
Discussion: https://r-forge.r-project.org/forum/?group_id=558
BugReports: https://r-forge.r-project.org/tracker/?group_id=558
VideoTutorials: http://www.youtube.com/playlist?list=PLn1OmZECD-n15vnYzvJDy5GxjNpVV5Jr8
sc.mix.mompid.betabin.hglm <- hglm2(vp~momdadcross + (1|mompid) , data=pol, family=binomial(link = logit), rand.family=Beta(link = logit), weights=pol$t)
library(spaMM)
spaMM (version 2.4.35) is loaded.
Type 'help(spaMM)' for a short introduction,
and news(package='spaMM') for news.
Attaching package: 'spaMM'
The following object is masked from 'package:hglm':
Beta
The following object is masked from 'package:bbmle':
coef
The following object is masked from 'package:stats4':
coef
sc.mix.mompid.betabin.spamm <- HLfit(cbind(v,i)~momdadcross + (1|mompid) ,family=binomial(),rand.family=Beta(), HLmethod="HL(0,0)", data=pol)
library(glmmTMB)
Attaching package: 'glmmTMB'
The following object is masked from 'package:glmmADMB':
VarCorr
sc.mix.mompid.betabin.tmb <- glmmTMB(cbind(v,i)~momdadcross + (1|mompid), data=pol, family="betabinomial")
library(glmmADMB)
sc.betabin.admb <- glmmadmb(cbind(v,i)~momdadcross, data=pol, family="betabinomial")
sc.mix.mompop.betabin.admb <- glmmadmb(cbind(v,i)~momdadcross + (1|mompop), data=pol, family="betabinomial")
sc.mix.momdadpop.betabin.admb <- glmmadmb(cbind(v,i)~momdadcross + (1|mompop) + (1|dadpop), data=pol, family="betabinomial")
sc.mix.mompid.betabin.admb <- glmmadmb(cbind(v,i)~momdadcross + (1|mompid), data=pol, family="betabinomial")
sc.mix.momdadpid.betabin.admb <- glmmadmb(cbind(v,i)~momdadcross + (1|mompid)+ (1|dadpid), data=pol, family="betabinomial")
sc.mix.momdadpop.bin.admb <- glmmadmb(cbind(v,i)~momdadcross + (1|mompop) + (1|dadpop), data=pol, family="binomial")
sc.mix.mompid.bin.admb <- glmmadmb(cbind(v,i)~momdadcross + (1|mompid), data=pol, family="binomial")
sc.bin.admb <- glmmadmb(cbind(v,i)~momdadcross, data=pol, family="binomial")
We will use the Aikake Information Criterion to pick the model the best fits the data, penalized by the number of parameters. Differences of 2 units are significant.
# AICtab(sc.b, sc.mix.mompop.b, sc.mix.mompid.b)
sc.admb.names <- c("sc.betabin.admb", "sc.mix.mompop.betabin.admb","sc.mix.momdadpop.betabin.admb", "sc.mix.mompid.betabin.admb", "sc.mix.momdadpop.bin.admb","sc.mix.momdadpid.betabin.admb", "sc.mix.mompid.bin.admb", "sc.bin.admb")
sc.names <- c("sc.bin", "sc.mix.mompid.bin","sc.mix.momdadpid.bin", "sc.mix.mompop.bin","sc.mix.momdadpop.bin", "sc.mix.momdadpop.olre.bin", sc.admb.names)
sc.list <- sapply(sc.names, get, USE.NAMES=T)
sc.AIC <- ICtab(sc.list,mnames=sc.names,type="AIC", base=T, delta=F) # for AICc, nobs=nobs(sc.list[[1]])
class(sc.AIC)<-"data.frame"
all.names <- c(sc.names)
all.list <- sapply(all.names, get, USE.NAMES=T)
all.AIC <- dfun(rbind(sc.AIC))
all.AIC <- all.AIC[order(all.AIC$dAIC),]
kable(all.AIC, format.arg=list(digits=3))
| dAIC | df | |
|---|---|---|
| sc.mix.momdadpop.olre.bin | 0.0 | 17 |
| sc.mix.mompid.betabin.admb | 16.9 | 16 |
| sc.mix.mompop.betabin.admb | 18.2 | 16 |
| sc.mix.momdadpid.betabin.admb | 18.5 | 17 |
| sc.mix.momdadpop.betabin.admb | 20.2 | 17 |
| sc.betabin.admb | 28.6 | 15 |
| sc.mix.momdadpid.bin | 13525.7 | 16 |
| sc.mix.mompid.bin.admb | 20220.7 | 15 |
| sc.mix.mompid.bin | 20220.8 | 15 |
| sc.mix.momdadpop.bin.admb | 23942.7 | 16 |
| sc.mix.momdadpop.bin | 23942.7 | 16 |
| sc.mix.mompop.bin | 25137.7 | 15 |
| sc.bin | 28890.5 | 14 |
| sc.bin.admb | 28890.5 | 14 |
od <- as.data.frame(t(sapply(sc.list[-c(1,7)], overdisp)))
od$logratio <- log(od$ratio)
od[order(od$logratio),]
chisq ratio rdf p logratio
sc.mix.momdadpop.olre.bin 11.80061 0.04027513 293 1 -3.212021
sc.mix.momdadpid.bin 14797.57398 50.33188429 294 0 3.918639
sc.mix.mompid.bin.admb 21960.21463 74.44140554 295 0 4.310012
sc.mix.mompid.bin 21960.32647 74.44178465 295 0 4.310017
sc.mix.momdadpid.betabin.admb 26101.38918 89.08323953 293 0 4.489571
sc.mix.momdadpop.bin 26280.53875 89.38958759 294 0 4.493004
sc.mix.momdadpop.bin.admb 26280.71604 89.39019060 294 0 4.493011
sc.mix.mompid.betabin.admb 27044.92100 91.98952721 294 0 4.521675
sc.mix.mompop.bin 27139.14074 91.99708727 295 0 4.521757
sc.mix.mompop.betabin.admb 28597.20835 97.26941615 294 0 4.577485
sc.mix.momdadpop.betabin.admb 28597.21571 97.60141881 293 0 4.580892
sc.bin.admb 30569.48433 103.27528491 296 0 4.637398
Looking at the normal, fixed effects model, we see that the residuals are not normal:
shapiro.test(sc.bin$residuals)# raw residuals!
Shapiro-Wilk normality test
data: sc.bin$residuals
W = 0.92404, p-value = 1.875e-11
The coefficients estimated for each model agree qualitatively.
sc.log.names <- sc.names[-c(7:14)]
sc.log <- sapply(sc.log.names, get, USE.NAMES=T)
coefplot2(sc.log, legend.x="topright",legend=T,legend.args=list(cex=0.8, xpd=T, inset=c(-0.1,0)), col.pts=sample(gg_color_hue(length(sc.log.names))), spacing=0.05, lwd.2=2, lwd.1=4, intercept=T)
sc.admb <- sapply(sc.admb.names, get, USE.NAMES=T)
coefplot2(sc.admb, legend.x="topright",legend=T,legend.args=list(cex=0.8, xpd=T, inset=c(-0.1,0)), col.pts=sample(gg_color_hue(length(sc.admb.names))), spacing=0.05, lwd.2=2, lwd.1=4, intercept=T)
sc.log.names <- c("sc.mix.mompid.betabin.tmb", "sc.mix.mompid.betabin.admb","sc.mix.mompid.betabin.hglm", "sc.mix.mompid.betabin.spamm")
sc.log <- sapply(sc.log.names, get, USE.NAMES=T)
sc.log.coef <- list(
coef(summary(sc.log[[1]]))[[1]],
coef(summary(sc.log[[2]])),
summary(sc.log[[3]])$ FixCoefMat,
summary(sc.log[[4]])$beta_table)
formula: cbind(v, i) ~ momdadcross + (1 | mompid)
Estimation of lambda by Laplace REML approximation (p_bv).
Estimation of fixed effects by h-likelihood approximation.
Family: binomial ( link = logit )
------------ Fixed effects (beta) ------------
Estimate Cond. SE t-value
(Intercept) 2.6188 0.27347 9.576
momdadcrossHK.H -2.3559 0.36620 -6.433
momdadcrossKH.H -2.6812 0.33233 -8.068
momdadcrossK.H -2.7942 0.34326 -8.140
momdadcrossH.HK -2.4209 0.36805 -6.578
momdadcrossHK.HK -2.9510 0.36845 -8.009
momdadcrossK.HK -2.0926 0.36161 -5.787
momdadcrossH.KH -1.9040 0.36786 -5.176
momdadcrossKH.KH -2.7020 0.33316 -8.110
momdadcrossK.KH -2.2229 0.35981 -6.178
momdadcrossH.K -2.2642 0.03512 -64.470
momdadcrossHK.K -3.3392 0.36809 -9.072
momdadcrossKH.K -2.7063 0.33282 -8.132
momdadcrossK.K -0.9012 0.34332 -2.625
--------------- Random effects ---------------
Family: Beta ( link = logit )
--- Variance parameters ('lambda'):
lambda = 4 var(u)/(1 - 4 var(u)) for u ~ Beta[1/(2*lambda),1/(2*lambda)];
mompid : 0.1293
--- Coefficients for log(lambda):
Group Term Estimate Cond.SE
mompid (Intercept) -2.046 0.1725
# of obs: 310; # of groups: mompid, 70
------------- Likelihood values -------------
logLik
h-likelihood: -11726.35
p_v(h) (marginal L): -11847.67
p_beta,v(h) (ReL): -11868.04
colnames(sc.log.coef[[4]]) <- colnames(sc.log.coef[[3]])[1:3]
library(data.table)
sc.log.coef.all <- rbindlist(setNames(lapply(lapply(sc.log.coef, as.data.frame), setDT, keep.rownames = TRUE), sc.log.names), idcol="modelName", fill=T)
setnames(sc.log.coef.all, 2:4, c("Variable","Coefficient","SE"))
# Specify the width of your confidence intervals
interval1 <- -qnorm((1-0.9)/2) # 90% multiplier
interval2 <- -qnorm((1-0.95)/2) # 95% multiplier
# Plot
ggplot(sc.log.coef.all, aes(colour = modelName))+
geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
geom_linerange(aes(x = Variable, ymin = Coefficient - SE*interval1,
ymax = Coefficient + SE*interval1),
lwd = 1, position = position_dodge(width = 1/2))+
geom_pointrange(aes(x = Variable, y = Coefficient, ymin = Coefficient - SE*interval2,
ymax = Coefficient + SE*interval2),
lwd = 1/2, position = position_dodge(width = 1/2),
shape = 21, fill = "WHITE")+
coord_flip() + theme_bw()
mod <- sc.mix.mompid.betabin.admb
print(mod)
GLMM's in R powered by AD Model Builder:
Family: betabinom
alpha = 4.9571
link = logit
Fixed effects:
Log-likelihood: -1744.9
AIC: 3521.8
Formula: cbind(v, i) ~ momdadcross + (1 | mompid)
(Intercept) momdadcrossHK.H momdadcrossKH.H momdadcrossK.H
1.880763 -1.644447 -1.997737 -2.190016
momdadcrossH.HK momdadcrossHK.HK momdadcrossK.HK momdadcrossH.KH
-1.814675 -2.400678 -1.233611 -1.562228
momdadcrossKH.KH momdadcrossK.KH momdadcrossH.K momdadcrossHK.K
-2.346534 -2.013672 -1.937335 -2.761363
momdadcrossKH.K momdadcrossK.K
-2.206468 -0.219586
Random effects:
Structure: Diagonal matrix
Group=mompid
Variance StdDev
(Intercept) 0.08407 0.2899
Number of observations: total=310, mompid=70
The model estimated the following parameters, with individual parameter significance determined by the Wald z-test, and fixed effect significance determined by analysis of deviance Wald test.
summary(mod)
Call:
glmmadmb(formula = cbind(v, i) ~ momdadcross + (1 | mompid),
data = pol, family = "betabinomial")
AIC: 3521.8
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.881 0.218 8.64 < 2e-16 ***
momdadcrossHK.H -1.644 0.366 -4.50 6.9e-06 ***
momdadcrossKH.H -1.998 0.311 -6.42 1.4e-10 ***
momdadcrossK.H -2.190 0.266 -8.22 < 2e-16 ***
momdadcrossH.HK -1.815 0.332 -5.47 4.5e-08 ***
momdadcrossHK.HK -2.401 0.423 -5.67 1.4e-08 ***
momdadcrossK.HK -1.234 0.404 -3.06 0.0022 **
momdadcrossH.KH -1.562 0.315 -4.96 6.9e-07 ***
momdadcrossKH.KH -2.347 0.336 -6.98 3.0e-12 ***
momdadcrossK.KH -2.014 0.355 -5.66 1.5e-08 ***
momdadcrossH.K -1.937 0.217 -8.94 < 2e-16 ***
momdadcrossHK.K -2.761 0.435 -6.35 2.2e-10 ***
momdadcrossKH.K -2.206 0.331 -6.67 2.5e-11 ***
momdadcrossK.K -0.220 0.263 -0.83 0.4045
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Number of observations: total=310, mompid=70
Random effect variance(s):
Group=mompid
Variance StdDev
(Intercept) 0.08407 0.2899
Beta-binomial dispersion parameter: 4.9571 (std. err.: 0.42775)
Log-likelihood: -1744.9
qqnorm(resid(mod), main="normal qq-plot, residuals")
qqline(resid(mod))
qqnorm(ranef(mod)[[1]][,1])
qqline(ranef(mod)[[1]][,1])
plot(fitted(mod), resid(mod)) #residuals vs fitted
abline(h=0)
pol$fitted <- fitted(mod) #fitted vs observed
plot(pol$fitted, jitter(pol$vp,0.1))
abline(0,1)
The estimated marginal e means procedure can generate predictor estimates of each type, and give their significance groupings with a post-hoc Tukey test.
# Count
rg <- ref_grid(mod)
# summary(rg)
#sxc.lsm <- emmeans(rg, ~ momcross*dadcross)
sxc.lsm <- emmeans(rg, ~ momdadcross)
plot(sxc.lsm, comparisons=T)
CLD(sxc.lsm, Letters=letters)
momdadcross emmean SE df asymp.LCL asymp.UCL .group
HK.K -0.88059990 0.4645445 Inf -1.7910904 0.02989056 a
HK.HK -0.51991559 0.4554523 Inf -1.4125857 0.37275451 a
KH.KH -0.46577137 0.3723186 Inf -1.1955025 0.26395973 a
KH.K -0.32570489 0.3624595 Inf -1.0361125 0.38470270 a
K.H -0.30925353 0.3364231 Inf -0.9686307 0.35012360 a
K.KH -0.13290920 0.3861059 Inf -0.8896628 0.62384439 a
KH.H -0.11697446 0.3555303 Inf -0.8138011 0.57985216 a
H.K -0.05657235 0.3137066 Inf -0.6714260 0.55828133 a
H.HK 0.06608814 0.3786250 Inf -0.6760032 0.80817950 a
HK.H 0.23631529 0.4061025 Inf -0.5596309 1.03226149 ab
H.KH 0.31853514 0.3694976 Inf -0.4056668 1.04273710 a
K.HK 0.64715132 0.4387894 Inf -0.2128601 1.50716270 abc
K.K 1.66117672 0.3284526 Inf 1.0174215 2.30493200 bc
H.H 1.88076275 0.2176200 Inf 1.4542354 2.30729011 c
Results are given on the logit (not the response) scale.
Confidence level used: 0.95
Results are given on the log odds ratio (not the response) scale.
P value adjustment: tukey method for comparing a family of 14 estimates
significance level used: alpha = 0.05
cld.mod <- cld(sxc.lsm, Letters=letters) # tukey letterings
library(boot)
Attaching package: 'boot'
The following object is masked from 'package:lattice':
melanoma
The following object is masked from 'package:car':
logit
cld.mod$response <- inv.logit(cld.mod$emmean)
cld.mod$uSE <- inv.logit(cld.mod$emmean+cld.mod$SE)
cld.mod$lSE <- inv.logit(cld.mod$emmean-cld.mod$SE)
options(digits=4)
cld.mod[c(15,16),] <- NA
cld.mod$momdadcross <- as.character(cld.mod$momdadcross)
cld.mod[c(15,16),1] <- c("HK.KH","KH.HK")
cld.mod$momdadcross <- as.factor(cld.mod$momdadcross)
cld.mod <- cbind(cld.mod, lapply(read.table(text = as.character(cld.mod$momdadcross), sep = ".", col.names=c("momcross","dadcross")), factor, levels=levels(pol$momcross)))
cld.mod[rev(order(cld.mod$momcross, cld.mod$dadcross)),]
momdadcross emmean SE df asymp.LCL asymp.UCL .group response
14 K.K 1.66118 0.3285 Inf 1.0174 2.30493 bc 0.8404
10 K.KH -0.13291 0.3861 Inf -0.8897 0.62384 a 0.4668
7 K.HK 0.64715 0.4388 Inf -0.2129 1.50716 abc 0.6564
4 K.H -0.30925 0.3364 Inf -0.9686 0.35012 a 0.4233
13 KH.K -0.32570 0.3625 Inf -1.0361 0.38470 a 0.4193
9 KH.KH -0.46577 0.3723 Inf -1.1955 0.26396 a 0.3856
16 KH.HK NA NA NA NA NA <NA> NA
3 KH.H -0.11697 0.3555 Inf -0.8138 0.57985 a 0.4708
12 HK.K -0.88060 0.4645 Inf -1.7911 0.02989 a 0.2931
15 HK.KH NA NA NA NA NA <NA> NA
6 HK.HK -0.51992 0.4555 Inf -1.4126 0.37275 a 0.3729
2 HK.H 0.23632 0.4061 Inf -0.5596 1.03226 ab 0.5588
11 H.K -0.05657 0.3137 Inf -0.6714 0.55828 a 0.4859
8 H.KH 0.31854 0.3695 Inf -0.4057 1.04274 a 0.5790
5 H.HK 0.06609 0.3786 Inf -0.6760 0.80818 a 0.5165
1 H.H 1.88076 0.2176 Inf 1.4542 2.30729 c 0.8677
uSE lSE momcross dadcross
14 0.8797 0.7913 K K
10 0.5630 0.3731 K KH
7 0.7476 0.5519 K HK
4 0.5068 0.3440 K H
13 0.5092 0.3344 KH K
9 0.4767 0.3019 KH KH
16 NA NA KH HK
3 0.5594 0.3840 KH H
12 0.3975 0.2067 HK K
15 NA NA HK KH
6 0.4839 0.2738 HK HK
2 0.6553 0.4577 HK H
11 0.5639 0.4085 H K
8 0.6655 0.4873 H KH
5 0.6094 0.4225 H HK
1 0.8907 0.8407 H H
ggplot(as.data.frame(cld.mod), aes(y=response, x=momcross, fill=dadcross)) +
geom_col(position=position_dodge2()) +
geom_linerange(aes(ymin=lSE, ymax=uSE), position=position_dodge2(0.9)) +
labs(x="Maternal plant", y="Pollen viability") +
geom_text(aes(label=trimws(.group), x=momcross, hjust=length(trimws(.group))/30), position=position_dodge2(0.9), vjust=-1) +
geom_text(aes(label=dadcross, y=0.05), position=position_dodge2(0.9)) +
scale_y_continuous(expand = expand_scale(mult=c(0,0)), breaks = scales::pretty_breaks(n = 5), limits=c(0,1)) +
scale_fill_manual(values=c("grey80","gray60","gray20","gray40"))+
theme_classic() + theme(legend.text=element_text(size=rel(1)), legend.position="bottom", axis.text = element_text(colour="black", size=rel(1)), text=element_text(size=14), axis.ticks.x = element_blank()) + guides(fill=F)
Expect difference:
No difference:
K = matrix(rep(0,14*4), 4)
colnames(K) <- substr(names(coef(mod)),12,16)
K[1,c("HK.K","K.HK")] <- K[2,c("KH.H","H.KH")] <- K[3,c("HK.H","H.HK")] <- K[4,c("KH.K","K.KH")] <- c(1, -1)
rownames(K) <- c("HK x K - K x HK","KH x H - H x KH","HK x H - H x HK","KH x K - K x HK")
library(multcomp)
summary(glht(mod, linfct=K),test=adjusted("none"))
Simultaneous Tests for General Linear Hypotheses
Fit: glmmadmb(formula = cbind(v, i) ~ momdadcross + (1 | mompid),
data = pol, family = "betabinomial")
Linear Hypotheses:
Estimate Std. Error z value Pr(>|z|)
HK x K - K x HK == 0 -1.5278 0.5859 -2.608 0.00911 **
KH x H - H x KH == 0 -0.4355 0.4394 -0.991 0.32165
HK x H - H x HK == 0 0.1702 0.4896 0.348 0.72808
KH x K - K x HK == 0 -0.1928 0.4683 -0.412 0.68058
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Adjusted p values reported -- none method)
#diffdiff <- matrix(K[1,]+K[2,]-K[3,]-K[4,],1)
#summary(glht(sc.mix.mompid.betabin.admb, linfct=diffdiff))
custom.emmc <- function(levels) as.data.frame(t(K))
contrast(sxc.lsm, "custom", adjust="none")
contrast estimate SE df z.ratio p.value
HK x K - K x HK -1.5277512 0.5858552 Inf -2.608 0.0091
KH x H - H x KH -0.4355096 0.4394330 Inf -0.991 0.3217
HK x H - H x HK 0.1702271 0.4896130 Inf 0.348 0.7281
KH x K - K x HK -0.1927957 0.4683255 Inf -0.412 0.6806
Results are given on the log odds ratio (not the response) scale.
contrast(sxc.lsm, "custom", adjust="none", type="response")
contrast odds.ratio SE df z.ratio p.value
HK x K / K x HK 0.2170232 0.1271441 Inf -2.608 0.0091
KH x H / H x KH 0.6469349 0.2842846 Inf -0.991 0.3217
HK x H / H x HK 1.1855741 0.5804726 Inf 0.348 0.7281
KH x K / K x HK 0.8246504 0.3862049 Inf -0.412 0.6806
Tests are performed on the log odds ratio scale
(em_cont <- ggplot(as.data.frame(cld.mod[cld.mod$momdadcross %in% c("HK.K","K.HK","KH.H","H.KH","HK.H","H.HK","KH.K","K.KH"),]), aes(y=response, x=momcross, fill=dadcross)) +
geom_col(position=position_dodge2()) +
geom_linerange(aes(ymin=lSE, ymax=uSE), position=position_dodge2(0.9)) +
labs(x="Maternal plant", y="Pollen viability") +
geom_text(aes(label=dadcross, y=0.05), position=position_dodge2(0.9)) +
scale_y_continuous(expand = expand_scale(mult=c(0,0)), breaks = scales::pretty_breaks(n = 5), limits=c(0,1)) +
scale_fill_manual(values=c("grey80","gray60","gray20","gray40"))+
theme_classic() + theme(legend.text=element_text(size=rel(1)), legend.position="bottom", axis.text = element_text(colour="black", size=rel(1)), text=element_text(size=14), axis.ticks.x = element_blank()) + guides(fill=F))
(bothplot <- ggplot(pol, aes(y=tpf, x=dadcross, fill=dadcross)) +
facet_wrap(vars(momcross), nrow=1)+ ggtitle("Maternal parent") +
geom_boxplot() + geom_point(aes(color=mompop)) + scale_color_discrete("Maternal population",na.value="grey20") +
labs(x="Paternal parent", y="Pollen grains per flower") +
scale_fill_manual("Paternal plant", values=c("grey90","gray70","gray50","gray30"))+
theme_classic() + theme(legend.text=element_text(size=rel(1)), legend.position="bottom", axis.text = element_text(colour="black", size=rel(1)), text=element_text(size=14), axis.ticks.x = element_blank(), plot.title=element_text(hjust=0.5, size=rel(1)))+guides(fill=F))
ggplot(aes(y=tpf, x=mompid, color=dadcross), data=pol) + geom_point() + coord_flip() + labs(x="Maternal plant", y="Pollen grains per flower")+theme_minimal()
sc <- lm(tpf~momdadcross, data=pol)
sc.mix.mompop <- lmer(tpf~momdadcross + (1|mompop), data=pol)
sc.mix.momdadpop <- lmer(tpf~momdadcross + (1|mompop) + (1|dadpop), data=pol)
sc.mix.mompid <- lmer(tpf~momdadcross + (1|mompid), data=pol)
sc.mix.momdadpid <- lmer(tpf~momdadcross + (1|mompid) + (1|dadpid), data=pol)
We will use the Aikake Information Criterion to pick the model the best fits the data, penalized by the number of parameters. Differences of 2 units are significant.
# AICtab(sc.b, sc.mix.mompop.b, sc.mix.mompid.b)
sc.names <- c("sc", "sc.mix.mompid","sc.mix.momdadpid", "sc.mix.mompop","sc.mix.momdadpop")
sc.list <- sapply(sc.names, get, USE.NAMES=T)
sc.AIC <- ICtab(sc.list,mnames=sc.names,type="AIC", base=T, delta=F) # for AICc, nobs=nobs(sc.list[[1]])
class(sc.AIC)<-"data.frame"
all.names <- c(sc.names)
all.list <- sapply(all.names, get, USE.NAMES=T)
all.AIC <- dfun(rbind(sc.AIC))
all.AIC <- all.AIC[order(all.AIC$dAIC),]
kable(all.AIC, format.arg=list(digits=3))
| dAIC | df | |
|---|---|---|
| sc.mix.momdadpid | 0.00 | 17 |
| sc.mix.mompid | 2.69 | 16 |
| sc.mix.mompop | 6.89 | 16 |
| sc.mix.momdadpop | 7.08 | 17 |
| sc | 219.50 | 15 |
Looking at the normal, fixed effects model, we see that the residuals are not normal:
shapiro.test(sc$residuals)# raw residuals!
Shapiro-Wilk normality test
data: sc$residuals
W = 0.99, p-value = 0.006
The coefficients estimated for each model agree qualitatively.
sc.log.names <- sc.names
sc.log <- sapply(sc.log.names, get, USE.NAMES=T)
coefplot2(sc.log, legend.x="topright",legend=T,legend.args=list(cex=0.8, xpd=T, inset=c(-0.1,0)), col.pts=sample(gg_color_hue(length(sc.log.names))), spacing=0.05, lwd.2=2, lwd.1=4, intercept=T)
mod <- sc.mix.momdadpid
print(mod)
Linear mixed model fit by REML ['lmerMod']
Formula: tpf ~ momdadcross + (1 | mompid) + (1 | dadpid)
Data: pol
REML criterion at convergence: 5693.565
Random effects:
Groups Name Std.Dev.
dadpid (Intercept) 1119
mompid (Intercept) 1118
Residual 3161
Number of obs: 310, groups: dadpid, 84; mompid, 70
Fixed Effects:
(Intercept) momdadcrossHK.H momdadcrossKH.H momdadcrossK.H
10411.6 -741.1 -3462.0 -1008.7
momdadcrossH.HK momdadcrossHK.HK momdadcrossK.HK momdadcrossH.KH
-224.6 -2775.8 -448.1 -325.7
momdadcrossKH.KH momdadcrossK.KH momdadcrossH.K momdadcrossHK.K
-4506.1 1731.6 1627.8 -1168.2
momdadcrossKH.K momdadcrossK.K
-3071.8 1242.4
The model estimated the following parameters, with individual parameter significance determined by the Wald z-test, and fixed effect significance determined by analysis of deviance Wald test.
summary(mod)
Linear mixed model fit by REML ['lmerMod']
Formula: tpf ~ momcross * dadcross + (1 | mompid) + (1 | dadpid)
Data: pol
REML criterion at convergence: 5694
Scaled residuals:
Min 1Q Median 3Q Max
-2.856 -0.556 -0.061 0.570 3.499
Random effects:
Groups Name Variance Std.Dev.
dadpid (Intercept) 1251575 1119
mompid (Intercept) 1250369 1118
Residual 9991798 3161
Number of obs: 310, groups: dadpid, 84; mompid, 70
Fixed effects:
Estimate Std. Error t value
(Intercept) 10412 881 11.82
momcrossHK -741 1542 -0.48
momcrossKH -3462 1364 -2.54
momcrossK -1009 1026 -0.98
dadcrossHK -225 1360 -0.17
dadcrossKH -326 1272 -0.26
dadcrossK 1628 961 1.69
momcrossHK:dadcrossHK -1810 2296 -0.79
momcrossK:dadcrossHK 785 1920 0.41
momcrossKH:dadcrossKH -718 1867 -0.38
momcrossK:dadcrossKH 3066 1753 1.75
momcrossHK:dadcrossK -2055 2096 -0.98
momcrossKH:dadcrossK -1238 1708 -0.72
momcrossK:dadcrossK 623 1086 0.57
Correlation matrix not shown by default, as p = 14 > 12.
Use print(x, correlation=TRUE) or
vcov(x) if you need it
fit warnings:
fixed-effect model matrix is rank deficient so dropping 2 columns / coefficients
The least square means procedure can generate predictor estimates of each type, and give their significance groupings with a post-hoc Tukey test.
# Count
rg <- ref_grid(mod)
# summary(rg)
sxc.lsm <- emmeans(rg, ~ momdadcross)
plot(sxc.lsm)
cld.mod <- cld(sxc.lsm, Letters=letters) # tukey letterings
cld.mod[c(15,16),] <- NA
cld.mod$momdadcross <- as.character(cld.mod$momdadcross)
cld.mod[c(15,16),1] <- c("HK.KH","KH.HK")
cld.mod$momdadcross <- as.factor(cld.mod$momdadcross)
cld.mod <- cbind(cld.mod, lapply(read.table(text = as.character(cld.mod$momdadcross), sep = ".", col.names=c("momcross","dadcross")), factor, levels=levels(pol$momcross)))
options(digits=4)
cld.mod[rev(order(cld.mod$momcross, cld.mod$dadcross)),]
momdadcross emmean SE df lower.CL upper.CL .group momcross
14 K.K 11654 680.0 40.18 10280 13028 c K
10 K.KH 12143 1137.4 219.03 9902 14385 bc K
7 K.HK 9963 1334.0 250.74 7336 12591 abc K
4 K.H 9403 806.4 33.93 7764 11042 abc K
13 KH.K 7340 1039.2 148.02 5286 9393 ab KH
9 KH.KH 5905 979.0 247.88 3977 7834 a KH
16 KH.HK NA NA NA NA NA <NA> KH
3 KH.H 6950 1052.4 57.22 4842 9057 ab KH
12 HK.K 9243 1464.7 216.68 6357 12130 abc HK
15 HK.KH NA NA NA NA NA <NA> HK
6 HK.HK 7636 1441.1 247.99 4797 10474 abc HK
2 HK.H 9671 1275.9 135.42 7147 12194 abc HK
11 H.K 12039 738.0 31.34 10535 13544 c H
8 H.KH 10086 930.2 76.71 8234 11938 abc H
5 H.HK 10187 1041.1 171.14 8132 12242 abc H
1 H.H 10412 890.2 47.61 8621 12202 abc H
dadcross
14 K
10 KH
7 HK
4 H
13 K
9 KH
16 HK
3 H
12 K
15 KH
6 HK
2 H
11 K
8 KH
5 HK
1 H
ggplot(as.data.frame(cld.mod), aes(y=emmean, x=momcross, fill=dadcross)) +
geom_col(position=position_dodge2()) +
geom_linerange(aes(ymin=emmean-SE, ymax=emmean+SE), position=position_dodge2(0.9)) +
labs(x="Maternal plant", y="Pollen grains per flower") +
geom_text(aes(label=trimws(.group), x=momcross, hjust=length(trimws(.group))/30), position=position_dodge2(0.9), vjust=-3) +
geom_text(aes(label=dadcross, y=300), position=position_dodge2(0.9)) +
scale_y_continuous(expand = expand_scale(mult=c(0,0.05))) +
scale_fill_manual(values=c("grey80","gray60","gray20","gray40"))+
theme_classic() + theme(legend.text=element_text(size=rel(1)), legend.position="bottom", axis.text = element_text(colour="black", size=rel(1)), text=element_text(size=14), axis.ticks.x = element_blank()) + guides(fill=F)
Expect difference:
No difference:
K = matrix(rep(0,14*4), 4)
colnames(K) <- substr(names(coef(summary(mod))[,1]),12,16)
K[1,c("HK.K","K.HK")] <- K[2,c("KH.H","H.KH")] <- K[3,c("HK.H","H.HK")] <- K[4,c("KH.K","K.KH")] <- c(1, -1)
rownames(K) <- c("HK x K - K x HK","KH x H - H x KH","HK x H - H x HK","KH x K - K x HK")
library(multcomp)
summary(glht(mod, linfct=K),test=adjusted("none"))
Simultaneous Tests for General Linear Hypotheses
Fit: lmer(formula = tpf ~ momdadcross + (1 | mompid) + (1 | dadpid),
data = pol)
Linear Hypotheses:
Estimate Std. Error z value Pr(>|z|)
HK x K - K x HK == 0 -720 1976 -0.36 0.7156
KH x H - H x KH == 0 -3136 1388 -2.26 0.0239 *
HK x H - H x HK == 0 -516 1635 -0.32 0.7522
KH x K - K x HK == 0 -4803 1535 -3.13 0.0018 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Adjusted p values reported -- none method)
#diffdiff <- matrix(K[1,]+K[2,]-K[3,]-K[4,],1)
#summary(glht(sc.mix.mompid.betabin.admb, linfct=diffdiff))
custom.emmc <- function(levels) as.data.frame(t(K))
contrast(sxc.lsm, "custom", adjust="none")
contrast estimate SE df t.ratio p.value
HK x K - K x HK -720.1 1981 233.55 -0.363 0.7166
KH x H - H x KH -3136.3 1405 88.31 -2.233 0.0281
HK x H - H x HK -516.4 1647 158.30 -0.314 0.7543
KH x K - K x HK -4803.4 1541 195.72 -3.117 0.0021
contrast(sxc.lsm, "custom", adjust="none", type="response")
contrast estimate SE df t.ratio p.value
HK x K - K x HK -720.1 1981 233.55 -0.363 0.7166
KH x H - H x KH -3136.3 1405 88.31 -2.233 0.0281
HK x H - H x HK -516.4 1647 158.30 -0.314 0.7543
KH x K - K x HK -4803.4 1541 195.72 -3.117 0.0021
ggplot(as.data.frame(cld.mod[cld.mod$momdadcross %in% c("HK.K","K.HK","KH.H","H.KH","HK.H","H.HK","KH.K","K.KH"),]), aes(y=emmean, x=momcross, fill=dadcross)) +
geom_col(position=position_dodge2()) +
geom_linerange(aes(ymin=emmean-SE, ymax=emmean+SE), position=position_dodge2(0.9)) +
labs(x="Maternal plant", y="Pollen grains per flower") +
geom_text(aes(label=dadcross, y=300), position=position_dodge2(0.9)) +
scale_y_continuous(expand = expand_scale(mult=c(0,0.05))) +
scale_fill_manual(values=c("grey80","gray60","gray20","gray40"))+
theme_classic() + theme(legend.text=element_text(size=rel(1)), legend.position="bottom", axis.text = element_text(colour="black", size=rel(1)), text=element_text(size=14), axis.ticks.x = element_blank()) + guides(fill=F)
(bothplot <- ggplot(pol, aes(y=vpf, x=dadcross, fill=dadcross)) +
facet_wrap(vars(momcross), nrow=1)+ ggtitle("Maternal parent") +
geom_boxplot() + geom_point(aes(color=mompop)) + scale_color_discrete("Maternal population",na.value="grey20") +
labs(x="Paternal parent", y="Viable pollen grains per flower") +
scale_fill_manual("Paternal plant", values=c("grey90","gray70","gray50","gray30"))+
theme_classic() + theme(legend.text=element_text(size=rel(1)), legend.position="bottom", axis.text = element_text(colour="black", size=rel(1)), text=element_text(size=14), axis.ticks.x = element_blank(), plot.title=element_text(hjust=0.5, size=rel(1)))+guides(fill=F))
ggplot(aes(y=vpf, x=mompid, color=dadcross), data=pol) + geom_point() + coord_flip() + labs(x="Maternal plant", y="Viable pollen grains per flower")+theme_minimal()
sc <- lm(vpf~momdadcross, data=pol)
sc.mix.mompop <- lmer(vpf~momdadcross + (1|mompop), data=pol)
sc.mix.momdadpop <- lmer(vpf~momdadcross + (1|mompop) + (1|dadpop), data=pol)
sc.mix.mompid <- lmer(vpf~momdadcross + (1|mompid), data=pol)
sc.mix.momdadpid <- lmer(vpf~momdadcross + (1|mompid) + (1|dadpid), data=pol)
We will use the Aikake Information Criterion to pick the model the best fits the data, penalized by the number of parameters. Differences of 2 units are significant.
# AICtab(sc.b, sc.mix.mompop.b, sc.mix.mompid.b)
sc.names <- c("sc", "sc.mix.mompid","sc.mix.momdadpid", "sc.mix.mompop","sc.mix.momdadpop")
sc.list <- sapply(sc.names, get, USE.NAMES=T)
sc.AIC <- ICtab(sc.list,mnames=sc.names,type="AIC", base=T, delta=F) # for AICc, nobs=nobs(sc.list[[1]])
class(sc.AIC)<-"data.frame"
all.names <- c(sc.names)
all.list <- sapply(all.names, get, USE.NAMES=T)
all.AIC <- dfun(rbind(sc.AIC))
all.AIC <- all.AIC[order(all.AIC$dAIC),]
kable(all.AIC, format.arg=list(digits=3))
| dAIC | df | |
|---|---|---|
| sc.mix.momdadpop | 0.00 | 17 |
| sc.mix.mompop | 2.08 | 16 |
| sc.mix.momdadpid | 6.92 | 17 |
| sc.mix.mompid | 7.63 | 16 |
| sc | 224.43 | 15 |
Looking at the normal, fixed effects model, we see that the residuals are not normal:
shapiro.test(sc$residuals)# raw residuals!
Shapiro-Wilk normality test
data: sc$residuals
W = 0.97756, p-value = 8.94e-05
The coefficients estimated for each model agree qualitatively.
sc.log.names <- sc.names
sc.log <- sapply(sc.log.names, get, USE.NAMES=T)
coefplot2(sc.log, legend.x="topright",legend=T,legend.args=list(cex=0.8, xpd=T, inset=c(-0.1,0)), col.pts=sample(gg_color_hue(length(sc.log.names))), spacing=0.05, lwd.2=2, lwd.1=4, intercept=T)
mod_vpf <- sc
print(mod_vpf)
Call:
lm(formula = vpf ~ momdadcross, data = pol)
Coefficients:
(Intercept) momdadcrossHK.H momdadcrossKH.H momdadcrossK.H
9570 -3581 -6166 -5605
momdadcrossH.HK momdadcrossHK.HK momdadcrossK.HK momdadcrossH.KH
-4244 -6233 -2634 -3305
momdadcrossKH.KH momdadcrossK.KH momdadcrossH.K momdadcrossHK.K
-6111 -3193 -3570 -6696
momdadcrossKH.K momdadcrossK.K
-5835 317
The model estimated the following parameters, with individual parameter significance determined by the Wald z-test, and fixed effect significance determined by analysis of deviance Wald test.
summary(mod_vpf)
Call:
lm(formula = vpf ~ momdadcross, data = pol)
Residuals:
Min 1Q Median 3Q Max
-7976 -2450 -361 2074 13717
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 9570 626 15.28 < 2e-16 ***
momdadcrossHK.H -3581 1303 -2.75 0.00638 **
momdadcrossKH.H -6166 1062 -5.81 1.6e-08 ***
momdadcrossK.H -5605 795 -7.05 1.3e-11 ***
momdadcrossH.HK -4244 1139 -3.73 0.00023 ***
momdadcrossHK.HK -6234 1534 -4.06 6.2e-05 ***
momdadcrossK.HK -2634 1440 -1.83 0.06826 .
momdadcrossH.KH -3305 990 -3.34 0.00095 ***
momdadcrossKH.KH -6111 1139 -5.37 1.6e-07 ***
momdadcrossK.KH -3193 1252 -2.55 0.01128 *
momdadcrossH.K -3570 765 -4.67 4.6e-06 ***
momdadcrossHK.K -6696 1534 -4.37 1.8e-05 ***
momdadcrossKH.K -5836 1139 -5.12 5.4e-07 ***
momdadcrossK.K 317 774 0.41 0.68229
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 3430 on 296 degrees of freedom
Multiple R-squared: 0.34, Adjusted R-squared: 0.311
F-statistic: 11.7 on 13 and 296 DF, p-value: <2e-16
The least square means procedure can generate predictor estimates of each type, and give their significance groupings with a post-hoc Tukey test.
# Count
rg <- ref_grid(mod_vpf)
# summary(rg)
sxc.lsm <- emmeans(rg, ~ momdadcross)
plot(sxc.lsm)
cld.mod <- cld(sxc.lsm, Letters=letters) # tukey letterings
cld.mod[c(15,16),] <- NA
cld.mod$momdadcross <- as.character(cld.mod$momdadcross)
cld.mod[c(15,16),1] <- c("HK.KH","KH.HK")
cld.mod$momdadcross <- as.factor(cld.mod$momdadcross)
cld.mod <- cbind(cld.mod, lapply(read.table(text = as.character(cld.mod$momdadcross), sep = ".", col.names=c("momcross","dadcross")), factor, levels=levels(pol$momcross)))
options(digits=4)
cld.mod[rev(order(cld.mod$momcross, cld.mod$dadcross)),]
momdadcross emmean SE df lower.CL upper.CL .group momcross
14 K.K 9887 454.3 296 8993.1 10781 c K
10 K.KH 6377 1084.5 296 4242.8 8511 abc K
7 K.HK 6936 1296.2 296 4384.8 9487 abc K
4 K.H 3965 489.9 296 3000.9 4929 a K
13 KH.K 3735 951.2 296 1862.7 5607 a KH
9 KH.KH 3459 951.2 296 1587.6 5331 a KH
16 KH.HK NA NA NA NA NA <NA> KH
3 KH.H 3404 857.4 296 1716.5 5091 a KH
12 HK.K 2874 1400.1 296 118.4 5629 a HK
15 HK.KH NA NA NA NA NA <NA> HK
6 HK.HK 3337 1400.1 296 581.3 6092 a HK
2 HK.H 5990 1143.2 296 3739.7 8239 abc HK
11 H.K 6000 439.1 296 5136.0 6864 a H
8 H.KH 6265 766.9 296 4755.7 7774 ab H
5 H.HK 5326 951.2 296 3454.0 7198 a H
1 H.H 9570 626.1 296 8337.9 10802 bc H
dadcross
14 K
10 KH
7 HK
4 H
13 K
9 KH
16 HK
3 H
12 K
15 KH
6 HK
2 H
11 K
8 KH
5 HK
1 H
ggplot(as.data.frame(cld.mod), aes(y=emmean, x=momcross, fill=dadcross)) +
geom_col(position=position_dodge2()) +
geom_linerange(aes(ymin=emmean-SE, ymax=emmean+SE), position=position_dodge2(0.9)) +
labs(x="Maternal plant", y="Viable pollen grains per flower") +
geom_text(aes(label=trimws(.group), x=momcross, hjust=length(trimws(.group))/30), position=position_dodge2(0.9), vjust=-3) +
geom_text(aes(label=dadcross, y=300), position=position_dodge2(0.9)) +
scale_y_continuous(expand = expand_scale(mult=c(0,0.05))) +
scale_fill_manual(values=c("grey80","gray60","gray20","gray40"))+
theme_classic() + theme(legend.text=element_text(size=rel(1)), legend.position="bottom", axis.text = element_text(colour="black", size=rel(1)), text=element_text(size=14), axis.ticks.x = element_blank()) +
guides(fill=F)
Expect difference:
No difference:
K = matrix(rep(0,14*4), 4)
colnames(K) <- substr(names(coef(mod_vpf)),12,16)
K[1,c("HK.K","K.HK")] <- K[2,c("KH.H","H.KH")] <- K[3,c("HK.H","H.HK")] <- K[4,c("KH.K","K.KH")] <- c(1, -1)
rownames(K) <- c("HK x K - K x HK","KH x H - H x KH","HK x H - H x HK","KH x K - K x HK")
library(multcomp)
summary(glht(mod_vpf, linfct=K),test=adjusted("none"))
Simultaneous Tests for General Linear Hypotheses
Fit: lm(formula = vpf ~ momdadcross, data = pol)
Linear Hypotheses:
Estimate Std. Error t value Pr(>|t|)
HK x K - K x HK == 0 -4062 1908 -2.13 0.034 *
KH x H - H x KH == 0 -2861 1150 -2.49 0.013 *
HK x H - H x HK == 0 664 1487 0.45 0.656
KH x K - K x HK == 0 -2642 1442 -1.83 0.068 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Adjusted p values reported -- none method)
#diffdiff <- matrix(K[1,]+K[2,]-K[3,]-K[4,],1)
#summary(glht(sc.mix.mompid.betabin.admb, linfct=diffdiff))
custom.emmc <- function(levels) as.data.frame(t(K))
contrast(sxc.lsm, "custom", adjust="none")
contrast estimate SE df t.ratio p.value
HK x K - K x HK -4062.0 1908 296 -2.129 0.0341
KH x H - H x KH -2861.1 1150 296 -2.487 0.0134
HK x H - H x HK 663.6 1487 296 0.446 0.6558
KH x K - K x HK -2642.5 1443 296 -1.832 0.0680
ggplot(as.data.frame(cld.mod[cld.mod$momdadcross %in% c("HK.K","K.HK","KH.H","H.KH","HK.H","H.HK","KH.K","K.KH"),]), aes(y=emmean, x=momcross, fill=dadcross)) +
geom_col(position=position_dodge2()) +
geom_linerange(aes(ymin=emmean-SE, ymax=emmean+SE), position=position_dodge2(0.9)) +
labs(x="Maternal plant", y="Viable pollen grains per flower") +
geom_text(aes(label=dadcross, y=300), position=position_dodge2(0.9)) +
scale_y_continuous(expand = expand_scale(mult=c(0,0))) +
scale_fill_manual(values=c("grey80","gray60","gray20","gray40"))+
theme_classic() + theme(legend.text=element_text(size=rel(1)), legend.position="bottom", axis.text = element_text(colour="black", size=rel(1)), text=element_text(size=14), axis.ticks.x = element_blank()) +
guides(fill=F)