Modified from hybridgerm.Rmd (Oct 2017)
Identify reproductive barriers between two sympatric moth-fflinated plant species, Schiedea kaalae and S. hookeri by fitting a generalized linear mixed model (GLMM).
In the experimental design, the following crosstypes were made:
In this analysis the response variable is the fflen viability of each cross. Other barriers (hybrid survival, flowering) could be analyzed in a similar framework, with appropriate changes to the underlying distribution.
Fixed effects:
Potential random effects:
ff <- read.table("firstflower6.csv", header=T, sep="\t")
ff$firstflower.date[ff$firstflower.date==""] <- NA
ff$firstflower.date[ff$use.firstflower!="yes"] <- NA
ff$firstflower.date <- as.Date(ff$firstflower.date)
ff <- ff[ff$crossid!=107,] ##FIND OUT WHAT CROSS THIS IS
ff$alive[ff$use.alive.flowered!="yes"] <- NA
ff <- ff[!is.na(ff$alive),]
ff$firstflower <- as.integer(round(difftime(ff$firstflower.date, "2016-03-01")))
ff$alive <- ff$alive=="yes"
crosses <- read.table("hybrids.csv", header=T, sep="\t", colClasses=c(mompop="factor", dadpop="factor"))
crosscol <- c("green","blue","orange","red")
#treat populations as factors
ff$mompop <- crosses$mompop[match(ff$crossid, crosses$crossid)]
ff$momid <- crosses$momid[match(ff$crossid, crosses$crossid)]
ff$species <- crosses$momsp[match(ff$crossid, crosses$crossid)]
ff$dadpop <- crosses$dadpop[match(ff$crossid, crosses$crossid)]
ff$dadid <- crosses$dadid[match(ff$crossid, crosses$crossid)]
ff$dadsp <- crosses$dadsp[match(ff$crossid, crosses$crossid)]
ff$crosstype <- crosses$crosstype[match(ff$crossid, crosses$crossid)]
ff$cross <- crosses$cross[match(ff$crossid, crosses$crossid)]
#rename crosstype codes
ff$crosstype <- factor(ff$crosstype, levels=c("between", "within", "hybrid"))
#made "between" the first reference level to facilitate comparison between outcrossing populations and hybridizing species
ff$mompop <- sapply(ff$mompop, mapvalues, from = c("794","866","899","879","892","904","3587"), to = c("WK","WK","WK","879WKG","892WKG","904WPG","3587WP"))
ff$dadpop <- sapply(ff$dadpop, mapvalues, from = c("794","866","899","879","892","904","3587"), to = c("WK","WK","WK","879WKG","892WKG","904WPG","3587WP"))
#define interactions
ff <- within(ff, sxc <- interaction(species,crosstype))
ff <- within(ff, sxcxm <- interaction(species,crosstype,mompop,momid))
ff <- within(ff, mompid <- as.factor(paste(mompop,momid,sep=".")))
ff <- within(ff, dadpid <- as.factor(paste(dadpop,dadid,sep=".")))
ff <- within(ff, smompop <- as.factor(paste(species,mompop,sep="")))
#check final structure
str(ff)
'data.frame': 1484 obs. of 39 variables:
$ index : int 1 2 3 4 5 6 7 8 9 10 ...
$ crossid : int 1 1 1 1 1 1 1 1 1 1 ...
$ plantid : Factor w/ 32 levels "0","1","10","11",..: 2 15 26 27 28 29 30 31 32 3 ...
$ crossid.plantid : Factor w/ 1618 levels "100-1","100-2",..: 98 154 200 220 270 310 372 423 544 99 ...
$ death.date : Factor w/ 133 levels "","100-2: 5/19/16",..: 1 1 1 1 1 23 1 1 37 1 ...
$ firstflower.day : Factor w/ 70 levels "","10/11","10/13",..: 3 23 1 26 1 1 1 1 1 1 ...
$ firstflower.date : Date, format: "2016-10-13" "2016-11-27" ...
$ use.alive.flowered : Factor w/ 3 levels "?","no","yes": 3 3 3 3 3 3 3 3 3 3 ...
$ alive : logi TRUE TRUE TRUE TRUE TRUE FALSE ...
$ use.firstflower : Factor w/ 4 levels "missed","never flowered",..: 4 4 1 4 1 3 3 1 3 1 ...
$ flowered : Factor w/ 3 levels "?","no","yes": 3 3 3 3 3 2 3 3 2 3 ...
$ saved.1 : Factor w/ 2 levels "no","yes": 1 1 2 1 2 1 1 1 1 1 ...
$ saved.2 : Factor w/ 2 levels "no","yes": 1 1 2 1 2 1 1 1 1 1 ...
$ sampled.VOC : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
$ biomass.inflo : Factor w/ 2 levels "no","yes": 2 2 1 2 1 1 2 2 1 2 ...
$ biomass.firstinflo : Factor w/ 2 levels "no","yes": 2 2 2 2 2 1 2 2 1 1 ...
$ use.fib : Factor w/ 4 levels "double","#N/A",..: 4 4 4 4 4 2 4 1 2 2 ...
$ delay : Factor w/ 35 levels "","0","1","10",..: 25 30 1 30 1 1 1 1 1 1 ...
$ firstinflo.collect.date: Factor w/ 70 levels "2016-07-12","2016-07-21",..: 46 63 20 55 45 70 13 50 70 70 ...
$ firstinflo.weigh.date : Factor w/ 48 levels "2017-07-26","2017-07-27",..: 17 20 15 20 17 48 9 8 48 48 ...
$ firstinflo.biomass.mg : Factor w/ 1093 levels "100","100.1",..: 508 629 1002 461 562 1093 55 892 1093 1093 ...
$ comments.fib : Factor w/ 31 levels "","10/13/17?",..: 1 1 1 1 1 24 1 31 24 24 ...
$ biomass.veg : logi NA NA NA NA NA NA ...
$ comments.SS : Factor w/ 44 levels "","?","\"1 terminlal infl\"",..: 1 1 30 1 30 13 35 30 13 30 ...
$ comments.JP.SGW : Factor w/ 78 levels "","105-9: 7/12/16 spray",..: 1 1 1 1 1 1 1 1 1 1 ...
$ firstflower : int 226 271 NA 250 NA NA NA NA NA NA ...
$ mompop : Factor w/ 5 levels "3587WP","WK",..: 3 3 3 3 3 3 3 3 3 3 ...
$ momid : Factor w/ 17 levels "1","10","10-1",..: 7 7 7 7 7 7 7 7 7 7 ...
$ species : Factor w/ 2 levels "hook","kaal": 1 1 1 1 1 1 1 1 1 1 ...
$ dadpop : Factor w/ 5 levels "3587WP","WK",..: 3 3 3 3 3 3 3 3 3 3 ...
$ dadid : Factor w/ 23 levels "1","10","10-1",..: 19 19 19 19 19 19 19 19 19 19 ...
$ dadsp : Factor w/ 2 levels "hook","kaal": 1 1 1 1 1 1 1 1 1 1 ...
$ crosstype : Factor w/ 3 levels "between","within",..: 2 2 2 2 2 2 2 2 2 2 ...
$ cross : Factor w/ 4 levels "HH","HK","KH",..: 1 1 1 1 1 1 1 1 1 1 ...
$ sxc : Factor w/ 6 levels "hook.between",..: 3 3 3 3 3 3 3 3 3 3 ...
$ sxcxm : Factor w/ 510 levels "hook.between.3587WP.1",..: 195 195 195 195 195 195 195 195 195 195 ...
$ mompid : Factor w/ 23 levels "3587WP.10","3587WP.14",..: 8 8 8 8 8 8 8 8 8 8 ...
$ dadpid : Factor w/ 26 levels "3587WP.10","3587WP.14",..: 10 10 10 10 10 10 10 10 10 10 ...
$ smompop : Factor w/ 5 levels "hook879WKG","hookWK",..: 1 1 1 1 1 1 1 1 1 1 ...
The sample sizes are unbalanced at all levels, including maternal population:
reptab <- with(ff, table(smompop,crosstype))
mosaic(reptab, pop=F)
labeling_cells(text = reptab, margin = 0)(reptab)
Replication is low for some within-population crosses. The replication is even lower for each maternal plant, so we need to be wary of estimates when subsetting at this level:
with(ff, kable(table(mompid,crosstype)))
| between | within | hybrid | |
|---|---|---|---|
| 3587WP.10 | 0 | 2 | 0 |
| 3587WP.14 | 18 | 0 | 9 |
| 3587WP.15 | 8 | 4 | 0 |
| 3587WP.7 | 25 | 17 | 20 |
| 3587WP.A | 5 | 3 | 3 |
| 3587WP.C | 16 | 1 | 0 |
| 879WKG.10-1 | 61 | 8 | 45 |
| 879WKG.2-2 | 40 | 57 | 114 |
| 879WKG.G-2 | 20 | 11 | 39 |
| 879WKG.H-2 | 0 | 1 | 8 |
| 879WKG.N-5 | 11 | 11 | 29 |
| 892WKG.1 | 30 | 6 | 10 |
| 892WKG.10 | 1 | 0 | 0 |
| 892WKG.2 | 3 | 0 | 0 |
| 892WKG.3 | 4 | 1 | 4 |
| 892WKG.4 | 1 | 0 | 1 |
| 892WKG.5 | 13 | 7 | 6 |
| 904WPG.2 | 17 | 11 | 19 |
| 904WPG.3 | 22 | 34 | 5 |
| 904WPG.5 | 82 | 38 | 32 |
| WK.2 | 201 | 20 | 137 |
| WK.2E- 1 | 9 | 0 | 40 |
| WK.4 | 83 | 19 | 42 |
To identify the best-fitting distribution, we make quantile-quantile plots of the raw data against various distributions. The more points within the confidence interval envelopes, the better the fit. Later, we present quantile-quantile plots of the model residuals to assess model fit.
Effects and interactions in these plots are simply given by the mean, which may be unduly influenced by high values.
intplot <- ggplot(ff,aes(fill=factor(alive))) + geom_bar(position="fill") + labs(y="Proportion") + guides(fill=guide_legend(title="Survived")) + facet_grid(~species)
intplot + aes(x=crosstype)
intplot + aes(x=mompop)
intplot + aes(x=mompid)
intplot + aes(x=dadpop)
We constructed the following models with the package glmmADMB. They all have the same fixed effects, species x crosstype, and response variable, alive
| distribution, Random Effects: | None | Maternal plant | Maternal population |
|---|---|---|---|
| normal (norm) | X | X | X |
sc.bin <- glm(alive~species*crosstype, data=ff, family="binomial")
sc.mix.mompid.bin <- glmer(alive~species*crosstype + (1|mompid), data=ff, family="binomial")
sc.mix.mompop.bin <- glmer(alive~species*crosstype + (1|mompop), data=ff, family="binomial")
sc.mix.momdadpid.bin <- glmer(alive~species*crosstype + (1|mompid) + (1|dadpid), data=ff, family="binomial")
We will use the Aikake Information Criterion to pick the model the best fits the data, penalized by the number of parameters. Differences of 2 units are significant.
#AICtab(sc.b, sc.mix.mompop.b, sc.mix.mompid.b)
sc.names <- c("sc.bin", "sc.mix.mompid.bin", "sc.mix.mompop.bin","sc.mix.momdadpid.bin")
sc.list <- sapply(sc.names, get, USE.NAMES=T)
sc.AIC <- ICtab(sc.list,mnames=sc.names,type="AIC", base=T, delta=F) # for AICc, nobs=nobs(sc.list[[1]])
class(sc.AIC)<-"data.frame"
all.names <- c(sc.names)
all.list <- sapply(all.names, get, USE.NAMES=T)
all.AIC <- dfun(rbind(sc.AIC))
all.AIC <- all.AIC[order(all.AIC$dAIC),]
kable(all.AIC, format.arg=list(digits=3))
| dAIC | df | |
|---|---|---|
| sc.mix.mompop.bin | 0.00 | 7 |
| sc.bin | 1.17 | 6 |
| sc.mix.momdadpid.bin | 2.41 | 8 |
| sc.mix.mompid.bin | 2.57 | 7 |
The best-fiting model is a model with the following components:
The coefficients estimated for each model agree qualitatively.
sc.log.names <- sc.names
sc.log <- sapply(sc.log.names, get, USE.NAMES=T)
coefplot2(sc.log, legend.x="topright",legend=T,legend.args=list(cex=0.8, xpd=T, inset=c(-0.1,0)), col.pts=sample(gg_color_hue(length(sc.log.names))), spacing=0.05, lwd.2=2, lwd.1=4, intercept=T)
We chose the model with nearly the best (lowest) AIC, to carry out inference tests and parameter estimation.
mod <- sc.mix.momdadpid.bin
print(mod)
Generalized linear mixed model fit by maximum likelihood (Laplace
Approximation) [glmerMod]
Family: binomial ( logit )
Formula: alive ~ species * crosstype + (1 | mompid) + (1 | dadpid)
Data: ff
AIC BIC logLik deviance df.resid
793.8097 836.2297 -388.9049 777.8097 1476
Random effects:
Groups Name Std.Dev.
dadpid (Intercept) 0.3103
mompid (Intercept) 0.3381
Number of obs: 1484, groups: dadpid, 26; mompid, 23
Fixed Effects:
(Intercept) specieskaal
1.9896 2.4206
crosstypewithin crosstypehybrid
0.3945 0.7162
specieskaal:crosstypewithin specieskaal:crosstypehybrid
-2.1090 -3.3154
By dropping it from the model and performing a likelihood-ratio test, we see that the species x crosstype interaction is not significant.
sxc.chisq <- drop1(mod, test="Chisq") #load from file
dfun(sxc.chisq)
Single term deletions
Model:
alive ~ species * crosstype + (1 | mompid) + (1 | dadpid)
Df dAIC LRT Pr(Chi)
<none> 0.000
species:crosstype 2 16.761 20.761 3.104e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The model estimated the following parameters, with individual parameter significance determined by the Wald z-test, and fixed effect significance determined by analysis of deviance Wald test.
summary(mod)
Generalized linear mixed model fit by maximum likelihood (Laplace
Approximation) [glmerMod]
Family: binomial ( logit )
Formula: alive ~ species * crosstype + (1 | mompid) + (1 | dadpid)
Data: ff
AIC BIC logLik deviance df.resid
793.8 836.2 -388.9 777.8 1476
Scaled residuals:
Min 1Q Median 3Q Max
-8.5657 0.1921 0.2653 0.3359 0.4753
Random effects:
Groups Name Variance Std.Dev.
dadpid (Intercept) 0.09626 0.3103
mompid (Intercept) 0.11429 0.3381
Number of obs: 1484, groups: dadpid, 26; mompid, 23
Fixed effects:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.9896 0.2471 8.053 8.06e-16 ***
specieskaal 2.4206 0.6494 3.728 0.000193 ***
crosstypewithin 0.3945 0.3823 1.032 0.302147
crosstypehybrid 0.7162 0.2996 2.390 0.016836 *
specieskaal:crosstypewithin -2.1090 0.7878 -2.677 0.007427 **
specieskaal:crosstypehybrid -3.3154 0.7624 -4.348 1.37e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) spcskl crsstypw crsstyph spcskl:crsstypw
specieskaal -0.381
crsstypwthn -0.314 0.126
crsstyphybr -0.567 0.276 0.242
spcskl:crsstypw 0.153 -0.710 -0.477 -0.121
spcskl:crsstyph 0.289 -0.828 -0.086 -0.499 0.601
Anova(mod, type=3)
Analysis of Deviance Table (Type III Wald chisquare tests)
Response: alive
Chisq Df Pr(>Chisq)
(Intercept) 64.8545 1 8.064e-16 ***
species 13.8946 1 0.0001934 ***
crosstype 5.9311 2 0.0515333 .
species:crosstype 18.9142 2 7.813e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The least square means procedure can generate predictor estimates of each type, and give their significance groupings with a post-hoc Tukey test. S. hookeri-produced hybrids produce less Viability than either crosses between or within S. hookeri populations. The other differences are not significant, but remember that the fixed effect of hybrid (vs. between) was significant (model summary).
#Count
rg <- ref.grid(mod)
#summary(rg)
sxc.lsm <- lsmeans(rg, ~ crosstype*species)
plot(sxc.lsm)
cld.mod <- cld(sxc.lsm, Letters=letters) #tukey letterings
library(boot)
cld.mod$response <- inv.logit(cld.mod$lsmean)
cld.mod$uSE <- inv.logit(cld.mod$lsmean+cld.mod$SE)
cld.mod$lSE <- inv.logit(cld.mod$lsmean-cld.mod$SE)
options(digits=4)
cld.mod[rev(order(cld.mod$species, cld.mod$crosstype)),]
crosstype species lsmean SE df asymp.LCL asymp.UCL .group response
hybrid kaal 1.811 0.3412 NA 1.142 2.480 a 0.8595
within kaal 2.696 0.4202 NA 1.872 3.519 ab 0.9368
between kaal 4.410 0.6004 NA 3.233 5.587 b 0.9880
hybrid hook 2.706 0.2586 NA 2.199 3.213 ab 0.9374
within hook 2.384 0.3845 NA 1.630 3.138 a 0.9156
between hook 1.990 0.2471 NA 1.505 2.474 a 0.8797
uSE lSE
0.8959 0.8130
0.9575 0.9068
0.9934 0.9783
0.9509 0.9204
0.9410 0.8808
0.9035 0.8510
Results are given on the logit (not the response) scale.
Confidence level used: 0.95
Results are given on the log odds ratio (not the response) scale.
P value adjustment: tukey method for comparing a family of 6 estimates
significance level used: alpha = 0.05
H.wb <- with(cld.mod[cld.mod$species=="hook",], response[crosstype=="within"]/response[crosstype=="between"] - 1)
K.wb <- with(cld.mod[cld.mod$species=="kaal",], response[crosstype=="within"]/response[crosstype=="between"] - 1)
K.H <- with(cld.mod[cld.mod$crosstype=="between",], response[species=="kaal"]/response[species=="hook"] - 1)
maxsp <- ifelse(K.H>0, "kaal","hook")
minsp <- ifelse(K.H<0, "kaal","hook")
maxresp <- with(cld.mod, response[species==maxsp & crosstype=="between"])
minresp <- with(cld.mod, response[species==minsp & crosstype=="between"])
HK.resp <- with(cld.mod, response[species=="hook" & crosstype=="hybrid"])
KH.resp <- with(cld.mod, response[species=="kaal" & crosstype=="hybrid"])
HK.int <- with(cld.mod, ifelse(HK.resp > minresp & HK.resp < maxresp, (HK.resp-minresp)/(maxresp-minresp),
ifelse(HK.resp < minresp, HK.resp/minresp-1, HK.resp/maxresp-1)))
KH.int <- with(cld.mod, ifelse(KH.resp > minresp & KH.resp < maxresp, (KH.resp-minresp)/(maxresp-minresp),
ifelse(KH.resp < minresp, KH.resp/minresp-1, KH.resp/maxresp-1)))
intermed <- (minresp + maxresp) / 2
round(c(H.wb,K.wb,K.H,HK.int,KH.int),2)
[1] 0.04 -0.05 0.12 0.53 -0.02
ggplot(as.data.frame(cld.mod), aes(y=response, x=relevel(crosstype, "within"), fill=species)) +
geom_col(position=position_dodge2()) +
geom_linerange(aes(ymin=lSE, ymax=uSE), position=position_dodge(0.9)) +
labs(x="", y="Survival",fill="Maternal species") +
scale_fill_manual(labels = c("S. hookeri ", "S. kaalae "), values=brewer.pal(name="Set1", n=3)[c(3,2)]) +
scale_x_discrete(labels = c("Intrapopulation", "Interpopulation", "Hybrid")) +
geom_text(aes(label=.group), position=position_dodge(0.9), hjust=0, vjust=-1) +
scale_y_continuous(expand = expand_scale(mult=c(0,0.05)), breaks = scales::pretty_breaks(n = 5), limits=c(0,1)) +
theme_classic() + theme(legend.text=element_text(face="italic", size=rel(1)), legend.position="bottom", axis.text = element_text(colour="black", size=rel(1)), text=element_text(size=14), axis.ticks.x = element_blank()) + geom_segment(aes(x=2.5, y=intermed, xend=3.5, yend=intermed))