Identify reproductive barriers between two sympatric moth-pollinated plant species, Schiedea kaalae and S. hookeri by fitting a generalized linear mixed model (GLMM).
Fixed effects:
Potential random effects:
#DATA CHANGED
#changed from within to hybrid in hybridseeds.csv and f1&hybridseeds.csv: 615 597 hook 794 2 892 hybrid K HK 16
#need to fix dadpop = closed in hybridseeds.csv
sds <- read.table("f1&hybridseeds2.csv", header=T, sep="\t")
sds <- within(sds, yesno <- as.numeric(seednum>0))
sdsm2 <- sds[grepl("x",sds$momfullcross),]
sdsm2.split <- cbind(sdsm2$crossid, as.data.frame(matrix(unlist(matrix(lapply(matrix(unlist(strsplit(as.character(sdsm2$momfullcross), " x ", fixed=T)), nrow=2), function(x) { spl <- unlist(strsplit(unlist(strsplit(x, " ", fixed=T)), "-", fixed=T)); c(spl[1],gsub("-NA","",paste(spl[2],spl[3],spl[4], sep="-"))) }), ncol=2)), ncol=4, byrow=T)))
colnames(sdsm2.split) <- c("crossid","mommompop","mommomid","momdadpop","momdadid")
sdsd2 <- sds[grepl("x",sds$dadfullcross),]
sdsd2.split <- cbind(sdsd2$crossid, as.data.frame(matrix(unlist(matrix(lapply(matrix(unlist(strsplit(as.character(sdsd2$dadfullcross), " x ", fixed=T)), nrow=2), function(x) { spl <- unlist(strsplit(unlist(strsplit(x, " ", fixed=T)), "-", fixed=T)); c(spl[1],gsub("-NA","",paste(spl[2],spl[3],spl[4], sep="-"))) }), ncol=2)), ncol=4, byrow=T)))
colnames(sdsd2.split) <- c("crossid","dadmompop","dadmomid","daddadpop","daddadid")
sdsm1 <- sds[!grepl("x",sds$momfullcross),]
sdsm1.split <- cbind(sdsm1$crossid, as.data.frame(matrix(unlist(matrix(lapply(as.character(sdsm1$momfullcross), function(x) { spl <- unlist(strsplit(unlist(strsplit(x, " ", fixed=T)), "-", fixed=T)); c(spl[1],gsub("-NA","",paste(spl[2],spl[3],spl[4], sep="-"))) }), ncol=2)), ncol=2, byrow=T)))
colnames(sdsm1.split) <- c("crossid","mompop","momid")
sdsd1 <- sds[!grepl("x",sds$dadfullcross),]
sdsd1.split <- cbind(sdsd1$crossid, as.data.frame(matrix(unlist(matrix(lapply(as.character(sdsd1$dadfullcross), function(x) { spl <- unlist(strsplit(unlist(strsplit(x, " ", fixed=T)), "-", fixed=T)); c(spl[1],gsub("-NA","",paste(spl[2],spl[3],spl[4], sep="-"))) }), ncol=2)), ncol=2, byrow=T)))
colnames(sdsd1.split) <- c("crossid","dadpop","dadid")
sds <- merge(sds, sdsm2.split, by="crossid", all.x=T)
sds <- merge(sds, sdsd2.split, by="crossid", all.x=T)
sds <- merge(sds, sdsm1.split, by="crossid", all.x=T)
sds <- merge(sds, sdsd1.split, by="crossid", all.x=T)
pop2sp <- function(x) { factor(ifelse(x %in% c("904","3587", "892"), "kaal", ifelse(is.na(x),NA,"hook")))}
sds$mommomspecies <- pop2sp(sds$mommompop)
sds$momdadspecies <- pop2sp(sds$momdadpop)
sds$dadmomspecies <- pop2sp(sds$dadmompop)
sds$daddadspecies <- pop2sp(sds$daddadpop)
sds$momcross <- factor(toupper(paste0(substr(sds$mommomspecies,0,1), substr(sds$momdadspecies,0,1))))
sds$dadcross <- factor(toupper(paste0(substr(sds$dadmomspecies,0,1), substr(sds$daddadspecies,0,1))))
sds$momcross <- factor(ifelse(sds$momcross=="NANA", toupper(substr(as.character(pop2sp(sds$mompop)),0,1)), as.character(sds$momcross)))
sds$dadcross <- factor(ifelse(sds$dadcross=="NANA", toupper(substr(as.character(pop2sp(sds$dadpop)),0,1)), as.character(sds$dadcross)))
is.na(sds$dadcross) <- is.na(sds$momcross) <- sds$crosstype=="control"
table(sds$crosstype)
control FF FH FK HF HHbetween HHwithin
250 298 300 248 148 65 47
HK KF KH KKbetween KKwithin
156 123 177 55 34
with(sds, plot(seednum~crosstype, las=2))
table(sds$momcross, sds$dadcross)
H HK K KH
H 112 61 156 105
HK 117 97 67 0
K 177 37 89 69
KH 191 1 177 195
table(sds$momcross, sds$dadcross, sds$crosstype) #errors in crosstype!
, , = control
H HK K KH
H 0 0 0 0
HK 0 0 0 0
K 0 0 0 0
KH 0 0 0 0
, , = FF
H HK K KH
H 0 0 0 0
HK 3 94 0 0
K 0 0 0 0
KH 3 1 2 195
, , = FH
H HK K KH
H 0 2 0 0
HK 105 2 2 0
K 0 0 0 1
KH 186 0 2 0
, , = FK
H HK K KH
H 0 0 0 0
HK 8 0 65 0
K 0 0 0 0
KH 2 0 173 0
, , = HF
H HK K KH
H 0 55 0 91
HK 1 1 0 0
K 0 0 0 0
KH 0 0 0 0
, , = HHbetween
H HK K KH
H 65 0 0 0
HK 0 0 0 0
K 0 0 0 0
KH 0 0 0 0
, , = HHwithin
H HK K KH
H 47 0 0 0
HK 0 0 0 0
K 0 0 0 0
KH 0 0 0 0
, , = HK
H HK K KH
H 0 0 156 0
HK 0 0 0 0
K 0 0 0 0
KH 0 0 0 0
, , = KF
H HK K KH
H 0 4 0 14
HK 0 0 0 0
K 0 37 0 68
KH 0 0 0 0
, , = KH
H HK K KH
H 0 0 0 0
HK 0 0 0 0
K 177 0 0 0
KH 0 0 0 0
, , = KKbetween
H HK K KH
H 0 0 0 0
HK 0 0 0 0
K 0 0 55 0
KH 0 0 0 0
, , = KKwithin
H HK K KH
H 0 0 0 0
HK 0 0 0 0
K 0 0 34 0
KH 0 0 0 0
sds <- sds[sds$crosstype!="control",]
sds <- sds[!(sds$momcross=="KH" & sds$dadcross=="HK"),]
#f1.c.mix.qpoi.zi <- glmmadmb(seednum~crosstype + (1|mompid), data=sds, family="nbinom1", zeroInflation=T)
#f1.c.mix.hr <- glmmadmb(yesno~crosstype + (1|mompid), data=sds, family="binom")
#f1.c.mix.qpoi.tr <- glmmadmb(seednum~crosstype + (1|mompid), data=subset(sds,seednum>0), family="truncnbinom1", admb.opts = admbControl(shess = FALSE, noinit = FALSE))
#f1.md.qpoi.zi <- glmmadmb(seednum~momcross*dadcross, data=sds, family="nbinom1", zeroInflation=T)
#f1.md.hr <- glmmadmb(yesno~momcross+dadcross, data=sds, family="binom")
#f1.md.qpoi.tr <- glmmadmb(seednum~momcross+dadcross, data=subset(sds,seednum>0), family="truncnbinom1", admb.opts = admbControl(shess = FALSE, noinit = FALSE))
f1.md.qpoi <- glm(seednum~momcross*dadcross, data=sds, family="quasipoisson")
f1.md.hr <- glm(yesno~momcross*dadcross, data=sds, family="binomial")
f1.md.qpoi.tr <- glm(seednum~momcross*dadcross, data=subset(sds,seednum>0), family="quasipoisson")
summary(f1.md.qpoi)
Call:
glm(formula = seednum ~ momcross * dadcross, family = "quasipoisson",
data = sds)
Deviance Residuals:
Min 1Q Median 3Q Max
-4.6599 -2.0000 -0.7834 0.9884 7.0583
Coefficients: (2 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.38482 0.06161 38.710 < 2e-16 ***
momcrossHK -1.68317 0.15281 -11.014 < 2e-16 ***
momcrossK -0.88515 0.09806 -9.027 < 2e-16 ***
momcrossKH -1.73720 0.12822 -13.549 < 2e-16 ***
dadcrossHK -0.76236 0.13687 -5.570 2.97e-08 ***
dadcrossK -0.83753 0.10046 -8.337 < 2e-16 ***
dadcrossKH -0.51964 0.10297 -5.046 5.00e-07 ***
momcrossHK:dadcrossHK 0.82353 0.24592 3.349 0.00083 ***
momcrossK:dadcrossHK -0.28779 0.32269 -0.892 0.37260
momcrossKH:dadcrossHK NA NA NA NA
momcrossHK:dadcrossK 0.60401 0.26978 2.239 0.02530 *
momcrossK:dadcrossK 1.07576 0.15822 6.799 1.47e-11 ***
momcrossKH:dadcrossK 0.88306 0.18914 4.669 3.28e-06 ***
momcrossHK:dadcrossKH NA NA NA NA
momcrossK:dadcrossKH -0.85743 0.27494 -3.119 0.00185 **
momcrossKH:dadcrossKH 0.91256 0.17779 5.133 3.20e-07 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for quasipoisson family taken to be 4.615388)
Null deviance: 9896.3 on 1649 degrees of freedom
Residual deviance: 7609.0 on 1636 degrees of freedom
AIC: NA
Number of Fisher Scoring iterations: 6
summary(f1.md.hr)
Call:
glm(formula = yesno ~ momcross * dadcross, family = "binomial",
data = sds)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.7793 -1.2112 0.7954 0.9431 1.5425
Coefficients: (2 not defined because of singularities)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.3531 0.2339 5.785 7.26e-09 ***
momcrossHK -1.0953 0.2991 -3.662 0.000250 ***
momcrossK -0.3645 0.2886 -1.263 0.206579
momcrossKH -1.4055 0.2751 -5.109 3.23e-07 ***
dadcrossHK -0.4823 0.3654 -1.320 0.186862
dadcrossK -0.7733 0.2874 -2.691 0.007119 **
dadcrossKH -0.5284 0.3157 -1.674 0.094164 .
momcrossHK:dadcrossHK 0.7963 0.4615 1.725 0.084458 .
momcrossK:dadcrossHK -0.2344 0.5218 -0.449 0.653300
momcrossKH:dadcrossHK NA NA NA NA
momcrossHK:dadcrossK 0.4856 0.4208 1.154 0.248426
momcrossK:dadcrossK 1.0231 0.4191 2.441 0.014642 *
momcrossKH:dadcrossK 0.9048 0.3552 2.547 0.010852 *
momcrossHK:dadcrossKH NA NA NA NA
momcrossK:dadcrossKH -1.2869 0.4435 -2.902 0.003713 **
momcrossKH:dadcrossKH 1.2739 0.3791 3.361 0.000778 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 2193.4 on 1649 degrees of freedom
Residual deviance: 2095.7 on 1636 degrees of freedom
AIC: 2123.7
Number of Fisher Scoring iterations: 4
summary(f1.md.qpoi.tr)
Call:
glm(formula = seednum ~ momcross * dadcross, family = "quasipoisson",
data = subset(sds, seednum > 0))
Deviance Residuals:
Min 1Q Median 3Q Max
-4.4829 -1.4107 -0.4498 0.8008 5.5739
Coefficients: (2 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.61469 0.04728 55.303 < 2e-16 ***
momcrossHK -1.34051 0.11727 -11.431 < 2e-16 ***
momcrossK -0.79867 0.07525 -10.613 < 2e-16 ***
momcrossKH -1.24739 0.09840 -12.677 < 2e-16 ***
dadcrossHK -0.64254 0.10503 -6.118 1.36e-09 ***
dadcrossK -0.62271 0.07709 -8.077 1.88e-15 ***
dadcrossKH -0.38600 0.07902 -4.885 1.20e-06 ***
momcrossHK:dadcrossHK 0.57877 0.18872 3.067 0.002222 **
momcrossK:dadcrossHK -0.15755 0.24764 -0.636 0.524791
momcrossKH:dadcrossHK NA NA NA NA
momcrossHK:dadcrossK 0.52485 0.20704 2.535 0.011392 *
momcrossK:dadcrossK 0.79913 0.12142 6.582 7.48e-11 ***
momcrossKH:dadcrossK 0.60292 0.14515 4.154 3.55e-05 ***
momcrossHK:dadcrossKH NA NA NA NA
momcrossK:dadcrossKH -0.11783 0.21100 -0.558 0.576675
momcrossKH:dadcrossKH 0.46471 0.13644 3.406 0.000685 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for quasipoisson family taken to be 2.718112)
Null deviance: 3951.1 on 1020 degrees of freedom
Residual deviance: 2634.2 on 1007 degrees of freedom
AIC: NA
Number of Fisher Scoring iterations: 5
#add the count and binary models AIC to get the full hurdle model
AIC(f1.md.qpoi, f1.md.hr, f1.md.qpoi.tr)
df AIC
f1.md.qpoi 14 NA
f1.md.hr 14 2123.67
f1.md.qpoi.tr 14 NA
#Count
rg <- ref.grid(f1.md.qpoi)
#summary(rg)
c.lsm <- lsmeans(rg, ~ momcross*dadcross)
plot(c.lsm)
cld(c.lsm, Letters=letters) #tukey letterings
momcross dadcross lsmean SE df asymp.LCL asymp.UCL .group
K KH 0.1226023 0.24324985 NA -0.35415862 0.5993633 a
K HK 0.4495251 0.28209157 NA -0.10336422 1.0024144 ab
HK K 0.4681362 0.20768838 NA 0.06107447 0.8751980 ab
KH H 0.6476239 0.11244953 NA 0.42722690 0.8680210 ab
KH K 0.6931472 0.11418327 NA 0.46935209 0.9169423 ab
HK H 0.7016579 0.13984537 NA 0.42756598 0.9757498 ab
HK HK 0.7628271 0.14896095 NA 0.47086900 1.0547852 ab
KH KH 1.0405485 0.09143965 NA 0.86133006 1.2197669 b
K H 1.4996735 0.07628998 NA 1.35014787 1.6491991 c
H K 1.5472897 0.07935095 NA 1.39176469 1.7028147 c
H HK 1.6224674 0.12221510 NA 1.38293022 1.8620046 c
K K 1.7379003 0.09550560 NA 1.55071276 1.9250878 c
H KH 1.8651869 0.08250671 NA 1.70347676 2.0268971 c
H H 2.3848232 0.06160803 NA 2.26407367 2.5055727 d
KH HK NA NA NA NA NA
HK KH NA NA NA NA NA
Results are given on the log (not the response) scale.
Confidence level used: 0.95
P value adjustment: tukey method for comparing a family of 16 estimates
significance level used: alpha = 0.05
#Count
rg.tr <- ref.grid(f1.md.qpoi.tr)
#summary(rg)
c.lsm <- lsmeans(rg.tr, ~ momcross*dadcross)
plot(c.lsm)
cld.mod <- cld(c.lsm, Letters=letters) #tukey letterings
cld.mod$response <- exp(cld.mod$lsmean)
cld.mod$uSE <- exp(cld.mod$lsmean+cld.mod$SE)
cld.mod$lSE <- exp(cld.mod$lsmean-cld.mod$SE)
cld.mod
momcross dadcross lsmean SE df asymp.LCL asymp.UCL .group
K HK 1.015921 0.21648094 NA 0.5916257 1.440215 a
HK K 1.176321 0.15938292 NA 0.8639365 1.488706 a
HK HK 1.210404 0.11431468 NA 0.9863510 1.434456 a
HK H 1.274177 0.10731925 NA 1.0638352 1.484519 a
K KH 1.312186 0.18667504 NA 0.9463100 1.678063 ab
KH K 1.347508 0.08762581 NA 1.1757649 1.519252 a
KH H 1.367298 0.08629531 NA 1.1981622 1.536434 a
KH KH 1.446014 0.07017204 NA 1.3084789 1.583548 a
K H 1.816011 0.05854598 NA 1.7012628 1.930759 bc
H HK 1.972141 0.09378953 NA 1.7883171 2.155965 bcd
H K 1.991976 0.06089500 NA 1.8726235 2.111328 cd
K K 1.992430 0.07329231 NA 1.8487799 2.136080 bcd
H KH 2.228688 0.06331678 NA 2.1045892 2.352786 d
H H 2.614686 0.04727885 NA 2.5220209 2.707351 e
KH HK NA NA NA NA NA
HK KH NA NA NA NA NA
response uSE lSE
2.761905 3.429456 2.224294
3.242424 3.802673 2.764717
3.354839 3.761126 2.992440
3.575758 3.980854 3.211884
3.714286 4.476589 3.081792
3.847826 4.200208 3.525007
3.924731 4.278460 3.600247
4.246154 4.554818 3.958407
6.147287 6.517930 5.797721
7.186047 7.892640 6.542711
7.330000 7.790231 6.896958
7.333333 7.890997 6.815080
9.287671 9.894753 8.717836
13.662921 14.324402 13.031987
NA NA NA
NA NA NA
Results are given on the log (not the response) scale.
Confidence level used: 0.95
P value adjustment: tukey method for comparing a family of 16 estimates
significance level used: alpha = 0.05
ggplot(as.data.frame(cld.mod), aes(y=response, x=momcross, fill=dadcross)) +
geom_col(position=position_dodge2()) +
geom_linerange(aes(ymin=lSE, ymax=uSE), position=position_dodge2(0.9)) +
labs(x="Maternal plant", y="Seeds per capsule") +
geom_text(aes(label=trimws(.group), x=momcross, hjust=length(trimws(.group))/30), position=position_dodge2(0.9), vjust=-4) +
geom_text(aes(label=dadcross, y=0.5), position=position_dodge2(0.9)) +
scale_y_continuous(expand = expand_scale(mult=c(0,.1)), breaks = scales::pretty_breaks(n = 20))+
scale_fill_manual(values=c("grey80","gray60","gray20","gray40"))+
theme_classic() + theme(legend.text=element_text(size=rel(1)), legend.position="bottom", axis.text = element_text(colour="black", size=rel(1)), text=element_text(size=14), axis.ticks.x = element_blank()) + guides(fill=F)
#Binary
rg.hr <- ref.grid(f1.md.hr)
c.lsm.hr <- lsmeans(rg.hr, ~ momcross*dadcross)
plot(c.lsm.hr)
cld.mod <- cld(c.lsm.hr, Letters=letters) #tukey letterings
library(boot)
cld.mod$response <- inv.logit(cld.mod$lsmean)
cld.mod$uSE <- inv.logit(cld.mod$lsmean+cld.mod$SE)
cld.mod$lSE <- inv.logit(cld.mod$lsmean-cld.mod$SE)
options(digits=4)
cld.mod
momcross dadcross lsmean SE df asymp.LCL asymp.UCL .group response
K KH -0.82668 0.2616 NA -1.3395 -0.3139 a 0.3043
KH H -0.05237 0.1448 NA -0.3361 0.2314 ab 0.4869
HK K -0.02985 0.2444 NA -0.5088 0.4491 abc 0.4925
KH K 0.07914 0.1504 NA -0.2157 0.3740 abc 0.5198
HK H 0.25783 0.1864 NA -0.1076 0.6232 abcd 0.5641
K HK 0.27193 0.3318 NA -0.3785 0.9223 abcde 0.5676
HK HK 0.57179 0.2114 NA 0.1574 0.9862 bcde 0.6392
H K 0.57982 0.1669 NA 0.2527 0.9069 bcde 0.6410
KH KH 0.69315 0.1519 NA 0.3954 0.9909 cde 0.6667
H KH 0.82472 0.2120 NA 0.4092 1.2403 bcde 0.6952
H HK 0.87083 0.2807 NA 0.3206 1.4211 bcde 0.7049
K H 0.98861 0.1691 NA 0.6572 1.3200 de 0.7288
K K 1.23837 0.2540 NA 0.7406 1.7361 de 0.7753
H H 1.35314 0.2339 NA 0.8947 1.8116 e 0.7946
KH HK NA NA NA NA NA NA
HK KH NA NA NA NA NA NA
uSE lSE
0.3624 0.2519
0.5231 0.4509
0.5534 0.4319
0.5571 0.4822
0.6093 0.5178
0.6465 0.4850
0.6864 0.5891
0.6785 0.6018
0.6995 0.6321
0.7382 0.6486
0.7598 0.6434
0.7609 0.6941
0.8164 0.7280
0.8302 0.7538
NA NA
NA NA
Results are given on the logit (not the response) scale.
Confidence level used: 0.95
Results are given on the log odds ratio (not the response) scale.
P value adjustment: tukey method for comparing a family of 16 estimates
significance level used: alpha = 0.05
#cld.mod[1,c("response","uSE","lSE",".group")] <- NA
ggplot(as.data.frame(cld.mod), aes(y=response, x=momcross, fill=dadcross)) +
geom_col(position=position_dodge2()) +
geom_linerange(aes(ymin=lSE, ymax=uSE), position=position_dodge2(0.9)) +
labs(x="Maternal plant", y="Seeds formation") +
geom_text(aes(label=trimws(.group), x=momcross, hjust=length(trimws(.group))/30), position=position_dodge2(0.9), vjust=-4) +
geom_text(aes(label=dadcross, y=0.05), position=position_dodge2(0.9)) +
scale_y_continuous(expand = expand_scale(mult=c(0,0)), breaks = scales::pretty_breaks(n = 5), limits=c(0,1)) +
scale_fill_manual(values=c("grey80","gray60","gray20","gray40"))+
theme_classic() + theme(legend.text=element_text(size=rel(1)), legend.position="bottom", axis.text = element_text(colour="black", size=rel(1)), text=element_text(size=14), axis.ticks.x = element_blank()) + guides(fill=F)