Modified from hybridseeds.Rmd (April 2017)

Guides

Purpose

Identify reproductive barriers between two sympatric moth-pollinated plant species, Schiedea kaalae and S. hookeri by fitting a generalized linear mixed model (GLMM).

In the experimental design, the following crosstypes were made:

  • within species, between population (may show outbreeding depression or heterosis)
  • within species, within populations (may show inbreeding depression)
  • hybrids between species (indicates species barrier from pollination to seed production)

In this analysis the response variable is the date of first flower (if the plant flowered and was scored) of each cross. Other barriers (hybrid survival, flowering) could be analyzed in a similar framework, with appropriate changes to the underlying distribution.

Fixed effects:

  • crosstype - hybrids, within population, between populations
  • species - species of the maternal plant that produced the Inflo Biofirstflower

Potential random effects:

  • mompop - maternal plant population
  • mompid - maternal plant, specified by its population and ID
  • dadpop - paternal plant population

Data Import

ff <- read.table("firstflower6.csv", header=T, sep="\t")
ff$firstflower.date[ff$firstflower.date==""] <- NA
ff$firstflower.date[ff$use.firstflower!="yes"] <- NA
ff <- ff[!is.na(ff$firstflower.date),]
ff$firstflower.date <- as.Date(ff$firstflower.date)
ff <- ff[ff$crossid!=107,] ##FIND OUT WHAT CROSS THIS IS
ff$alive[ff$use.alive.flowered!="yes"] <- NA
ff <- ff[!is.na(ff$alive),]


crosses <- read.table("hybrids.csv", header=T, sep="\t", colClasses=c(mompop="factor", dadpop="factor"))

crosscol <- c("green","blue","orange","red")

#treat populations as factors
ff$mompop <- crosses$mompop[match(ff$crossid, crosses$crossid)]
ff$momid <- crosses$momid[match(ff$crossid, crosses$crossid)]
ff$species <- crosses$momsp[match(ff$crossid, crosses$crossid)]
ff$dadpop <- crosses$dadpop[match(ff$crossid, crosses$crossid)]
ff$dadid <- crosses$dadid[match(ff$crossid, crosses$crossid)]
ff$dadsp <- crosses$dadsp[match(ff$crossid, crosses$crossid)]
ff$crosstype <- crosses$crosstype[match(ff$crossid, crosses$crossid)]
ff$cross <- crosses$cross[match(ff$crossid, crosses$crossid)]

#rename crosstype codes
ff$crosstype <- factor(ff$crosstype, levels=c("between", "within", "hybrid"))
#made "between" the first reference level to facilitate comparison between outcrossing populations and hybridizing species 

ff$mompop <- sapply(ff$mompop, mapvalues, from = c("794","866","899","879","892","904","3587"), to = c("WK","WK","WK","879WKG","892WKG","904WPG","3587WP"))
ff$dadpop <- sapply(ff$dadpop, mapvalues, from = c("794","866","899","879","892","904","3587"), to = c("WK","WK","WK","879WKG","892WKG","904WPG","3587WP"))

#define interactions
ff <- within(ff, sxc <- interaction(species,crosstype))
ff <- within(ff, sxcxm <- interaction(species,crosstype,mompop,momid))
ff <- within(ff, mompid <- as.factor(paste(mompop,momid,sep=".")))
ff <- within(ff, dadpid <- as.factor(paste(dadpop,dadid,sep=".")))
ff <- within(ff, smompop <- as.factor(paste(species,mompop,sep="")))

ff$firstflower <- as.integer(round(difftime(ff$firstflower.date, "2016-03-10")))
#check final structure
str(ff)
   'data.frame':    1084 obs. of  39 variables:
    $ index                  : int  1 2 4 12 13 15 17 24 25 26 ...
    $ crossid                : int  1 1 1 1 1 1 1 2 2 2 ...
    $ plantid                : Factor w/ 32 levels "0","1","10","11",..: 2 15 27 5 7 9 11 2 15 26 ...
    $ crossid.plantid        : Factor w/ 1618 levels "100-1","100-2",..: 98 154 220 104 107 113 123 650 718 752 ...
    $ death.date             : Factor w/ 133 levels "","100-2: 5/19/16",..: 1 1 1 1 1 1 1 1 1 1 ...
    $ firstflower.day        : Factor w/ 70 levels "","10/11","10/13",..: 3 23 26 43 36 21 22 56 40 27 ...
    $ firstflower.date       : Date, format: "2016-10-13" "2016-11-27" ...
    $ use.alive.flowered     : Factor w/ 3 levels "?","no","yes": 3 3 3 3 3 3 3 3 3 3 ...
    $ alive                  : Factor w/ 3 levels "?","no","yes": 3 3 3 3 3 3 3 3 3 3 ...
    $ use.firstflower        : Factor w/ 4 levels "missed","never flowered",..: 4 4 4 4 4 4 4 4 4 4 ...
    $ flowered               : Factor w/ 3 levels "?","no","yes": 3 3 3 3 3 3 3 3 3 3 ...
    $ saved.1                : Factor w/ 2 levels "no","yes": 1 1 1 1 2 1 1 1 1 1 ...
    $ saved.2                : Factor w/ 2 levels "no","yes": 1 1 1 1 2 2 1 1 1 1 ...
    $ sampled.VOC            : Factor w/ 2 levels "no","yes": 1 1 1 1 2 2 1 1 1 1 ...
    $ biomass.inflo          : Factor w/ 2 levels "no","yes": 2 2 2 2 1 1 2 1 2 2 ...
    $ biomass.firstinflo     : Factor w/ 2 levels "no","yes": 2 2 2 2 2 2 2 2 2 1 ...
    $ use.fib                : Factor w/ 4 levels "double","#N/A",..: 4 4 4 4 4 4 4 4 1 2 ...
    $ delay                  : Factor w/ 35 levels "","0","1","10",..: 25 30 30 14 27 18 30 25 23 35 ...
    $ firstinflo.collect.date: Factor w/ 70 levels "2016-07-12","2016-07-21",..: 46 63 55 17 7 60 62 15 10 70 ...
    $ firstinflo.weigh.date  : Factor w/ 48 levels "2017-07-26","2017-07-27",..: 17 20 20 9 20 1 20 7 46 48 ...
    $ firstinflo.biomass.mg  : Factor w/ 1093 levels "100","100.1",..: 508 629 461 266 65 666 790 1 4 1093 ...
    $ comments.fib           : Factor w/ 31 levels "","10/13/17?",..: 1 1 1 1 1 1 1 1 10 24 ...
    $ biomass.veg            : logi  NA NA NA NA NA NA ...
    $ comments.SS            : Factor w/ 44 levels "","?","\"1 terminlal infl\"",..: 1 1 1 1 1 1 1 1 1 1 ...
    $ comments.JP.SGW        : Factor w/ 78 levels "","105-9: 7/12/16 spray",..: 1 1 1 1 1 1 1 1 1 1 ...
    $ mompop                 : Factor w/ 5 levels "3587WP","WK",..: 3 3 3 3 3 3 3 3 3 3 ...
    $ momid                  : Factor w/ 17 levels "1","10","10-1",..: 7 7 7 7 7 7 7 7 7 7 ...
    $ species                : Factor w/ 2 levels "hook","kaal": 1 1 1 1 1 1 1 1 1 1 ...
    $ dadpop                 : Factor w/ 5 levels "3587WP","WK",..: 3 3 3 3 3 3 3 3 3 3 ...
    $ dadid                  : Factor w/ 23 levels "1","10","10-1",..: 19 19 19 19 19 19 19 3 3 3 ...
    $ dadsp                  : Factor w/ 2 levels "hook","kaal": 1 1 1 1 1 1 1 1 1 1 ...
    $ crosstype              : Factor w/ 3 levels "between","within",..: 2 2 2 2 2 2 2 2 2 2 ...
    $ cross                  : Factor w/ 4 levels "HH","HK","KH",..: 1 1 1 1 1 1 1 1 1 1 ...
    $ sxc                    : Factor w/ 6 levels "hook.between",..: 3 3 3 3 3 3 3 3 3 3 ...
    $ sxcxm                  : Factor w/ 510 levels "hook.between.3587WP.1",..: 195 195 195 195 195 195 195 195 195 195 ...
    $ mompid                 : Factor w/ 23 levels "3587WP.10","3587WP.14",..: 8 8 8 8 8 8 8 8 8 8 ...
    $ dadpid                 : Factor w/ 24 levels "3587WP.10","3587WP.14",..: 9 9 9 9 9 9 9 7 7 7 ...
    $ smompop                : Factor w/ 5 levels "hook879WKG","hookWK",..: 1 1 1 1 1 1 1 1 1 1 ...
    $ firstflower            : int  217 262 241 159 138 257 260 152 144 243 ...

Data Inspection

Replication

The sample sizes are unbalanced at all levels, including maternal population:

reptab <- with(ff, table(smompop,crosstype))
mosaic(reptab, pop=F)
labeling_cells(text = reptab, margin = 0)(reptab)

Replication is low for some within-population crosses. The replication is even lower for each maternal plant, so we need to be wary of estimates when subsetting at this level:

with(ff, kable(table(mompid,crosstype)))
between within hybrid
3587WP.10 0 1 0
3587WP.14 15 0 6
3587WP.15 5 3 0
3587WP.7 19 14 12
3587WP.A 5 1 0
3587WP.C 16 1 0
879WKG.10-1 42 5 28
879WKG.2-2 31 34 89
879WKG.G-2 14 9 26
879WKG.H-2 0 0 4
879WKG.N-5 7 7 17
892WKG.1 24 5 7
892WKG.10 1 0 0
892WKG.2 2 0 0
892WKG.3 3 1 2
892WKG.4 1 0 0
892WKG.5 13 7 5
904WPG.2 15 9 15
904WPG.3 22 30 2
904WPG.5 75 33 19
WK.2 137 19 105
WK.2E- 1 6 0 27
WK.4 54 8 26

Overall data distribution

To identify the best-fitting distribution, we make quantile-quantile plots of the raw data against various distributions. The more points within the confidence interval envelopes, the better the fit. Later, we present quantile-quantile plots of the model residuals to assess model fit.

#QQ plots against various distributions
set.seed(1)
par(mfrow=c(1,3))
normal <- fitdistr(log(ff$firstflower+1), "normal")
qqp(log(ff$firstflower+1), "norm", main="Normal")
lognormal <- fitdistr(ff$firstflower+1, "lognormal")
qqp(ff$firstflower+1, "lnorm", main="Log Normal")
#pois <- fitdistr(ff$firstflower+1, "Poisson")
#qqp(ff$firstflower, "pois", pois$estimate, main="Poisson")
#nbinom <- fitdistr(ff$firstflower+1, "Negative Binomial") 
#qqp(ff$firstflower+1, "nbinom", size = nbinom$estimate[[1]], mu=nbinom$estimate[[2]], main="Negative Binomial")
gamma <- fitdistr(ff$firstflower+1, "gamma")
qqp(ff$firstflower+1, "gamma", shape = gamma$estimate[[1]], rate = gamma$estimate[[2]], main="Gamma")

distributions by fixed factors

ggplot(ff,aes(x=firstflower,fill=species)) +
  geom_histogram(data=subset(ff,species == "hook"), aes(y=-..density..),binwidth=10)+
  geom_histogram(data=subset(ff,species == "kaal"), aes(y= ..density..),binwidth=10)+
    coord_flip() + facet_grid(~crosstype) + labs(y="Histogram", x="Days to Flower")

distributions by random factors

ggplot(aes(y=firstflower, x=mompid, color=crosstype), data=ff) + geom_count(alpha=0.8) + coord_flip() + labs(x="Maternal plant", y="Days to Flower")

Fixed effects

Effects and interactions in these plots are simply given by the mean, which may be unduly influenced by high values.

intplot <- ggplot(ff,aes(x=crosstype,y=firstflower))+
  geom_count(aes(size = ..prop.., group=sxc),alpha=0.5)+
  stat_summary(aes(x=as.numeric(crosstype)),fun.y=mean,geom="line")+ facet_grid(~species)
intplot + aes(group=species, color=species)

Random effects

Maternal population

intplot + aes(group=mompop, color=mompop)

Maternal plant

intplot + aes(group=mompid, color=mompop)

Paternal population

intplot + aes(group=dadpop, color=dadpop)

Paternal plant

intplot + aes(group=dadpid, color=dadpop)

Run models on subsets

Run many generalized linear models on subsets of the data defined by crosstype | mompid to see if effects estimates are consistent within maternal plants.

Most maternal plant subsets agree, but some are problematic outliers. These plants can be picked out visually from the random effects interaction plot above, the estimated parameters of each subset model, and the QQ plot of the estimated parameters:

 #had to get rid of species or mompid since mompid is nested inside species. dadpop also works
glm.lis <- lmList(log(firstflower)~crosstype|mompid,data=ff, family="gaussian")
plot.lmList(glm.lis,scale=list(x=list(relation="free")))
   Loading required package: reshape
   
   Attaching package: 'reshape'
   The following objects are masked from 'package:plyr':
   
       rename, round_any
   The following object is masked from 'package:Matrix':
   
       expand
   Using grp as id variables

qqmath.lmList(glm.lis)#
   Using  as id variables

Models

We constructed the following models with the package glmmADMB. They all have the same fixed effects, species x crosstype, and response variable, log10(firstflower)

  • X = standard GL(M)M
distribution, Random Effects: None Maternal plant Maternal population
normal (norm) X X X
#Normal (Gaussian) distribution, identity link
sc.norm          <- lm(firstflower~species*crosstype, data=ff)
sc.mix.mompid    <- lmer(firstflower~species*crosstype + (1|mompid), data=ff)
sc.mix.mompop    <- lmer(firstflower~species*crosstype + (1|mompop), data=ff)
sc.mix.momdadpid <- lmer(firstflower~species*crosstype + (1|mompid) + (1|dadpid), data=ff)
sc.mix.momdadpop <- lmer(firstflower~species*crosstype + (1|mompop) + (1|dadpop), data=ff)

Model comparison

AIC

We will use the Aikake Information Criterion to pick the model the best fits the data, penalized by the number of parameters. Differences of 2 units are significant.

sc.names <- c("sc.norm","sc.mix.mompid","sc.mix.mompop","sc.mix.momdadpid","sc.mix.momdadpop")
sc.list <- sapply(sc.names, get, USE.NAMES=T)
sc.AIC <- ICtab(sc.list,mnames=sc.names,type="AIC", base=T, delta=F) # for AICc, nobs=nobs(sc.list[[1]])
class(sc.AIC)<-"data.frame"
all.names <- c(sc.names)
all.list <- sapply(all.names, get, USE.NAMES=T)
all.AIC <- dfun(rbind(sc.AIC))
all.AIC <- all.AIC[order(all.AIC$dAIC),]
kable(all.AIC, format.arg=list(digits=3))
dAIC df
sc.mix.momdadpid 0.00 9
sc.mix.momdadpop 4.79 9
sc.mix.mompop 9.87 8
sc.mix.mompid 10.10 8
sc.norm 38.01 7

The best-fiting model is a mixed model with the following components:

    • response: firstflower
    • fixed effects: species, crosstype, species x crosstype
    • random effect: mompid dadpid

Overdispersion

Looking at the normal, fixed effects model, we see that the residuals are not normal:

shapiro.test(sc.norm$residuals)#raw residuals!
   
    Shapiro-Wilk normality test
   
   data:  sc.norm$residuals
   W = 0.92377, p-value < 2.2e-16

Coefficients

The coefficients estimated for each model agree qualitatively.

sc <- sapply(sc.names, get, USE.NAMES=T)

coefplot2(sc, legend.x="topright",legend=T,legend.args=list(cex=0.8, xpd=T, inset=c(-0.1,0)), col.pts=sample(gg_color_hue(length(sc.names))), spacing=0.05, lwd.2=2, lwd.1=4, intercept=F)

Inference

We chose the model with nearly the best (lowest) AIC, to carry out inference tests and parameter estimation.

Description

mod <- sc.mix.momdadpid
print(mod)
   Linear mixed model fit by REML ['lmerMod']
   Formula: firstflower ~ species * crosstype + (1 | mompid) + (1 | dadpid)
      Data: ff
   REML criterion at convergence: 10125.27
   Random effects:
    Groups   Name        Std.Dev.
    dadpid   (Intercept)  4.811  
    mompid   (Intercept)  3.616  
    Residual             25.816  
   Number of obs: 1084, groups:  dadpid, 24; mompid, 23
   Fixed Effects:
                   (Intercept)                  specieskaal  
                       168.119                       15.772  
               crosstypewithin              crosstypehybrid  
                         6.467                       18.236  
   specieskaal:crosstypewithin  specieskaal:crosstypehybrid  
                       -16.480                      -10.611

Test significance of random effects

Using a likelihood ratio test, with a null hypothesis of zero variance, the random effect (maternal plant) is significant for both model parts:

anova(sc.norm, sc.mix.mompid) #double this p-value. or simulate null by permuting data.

Test significance of interaction

By dropping it from the model and performing a likelihood-ratio test, we see that the species x crosstype interaction is significant for the count model but not the binary model:

sxc.chisq <- drop1(mod, test="Chisq") #load from file
dfun(sxc.chisq)
   Single term deletions
   
   Model:
   firstflower ~ species * crosstype + (1 | mompid) + (1 | dadpid)
                     Df   dAIC    LRT  Pr(Chi)   
   <none>               0.0000                   
   species:crosstype  2 9.1181 13.118 0.001417 **
   ---
   Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Model summary

The model estimated the following parameters, with individual parameter significance determined by the Wald z-test, and fixed effect significance determined by analysis of deviance Wald test.

summary(mod)
   Linear mixed model fit by REML ['lmerMod']
   Formula: firstflower ~ species * crosstype + (1 | mompid) + (1 | dadpid)
      Data: ff
   
   REML criterion at convergence: 10125.3
   
   Scaled residuals: 
       Min      1Q  Median      3Q     Max 
   -1.9387 -0.6797 -0.1938  0.4445  3.9211 
   
   Random effects:
    Groups   Name        Variance Std.Dev.
    dadpid   (Intercept)  23.14    4.811  
    mompid   (Intercept)  13.08    3.616  
    Residual             666.49   25.816  
   Number of obs: 1084, groups:  dadpid, 24; mompid, 23
   
   Fixed effects:
                               Estimate Std. Error t value
   (Intercept)                  168.119      2.870   58.58
   specieskaal                   15.772      3.897    4.05
   crosstypewithin                6.467      3.389    1.91
   crosstypehybrid               18.236      3.229    5.65
   specieskaal:crosstypewithin  -16.480      4.743   -3.47
   specieskaal:crosstypehybrid  -10.611      6.368   -1.67
   
   Correlation of Fixed Effects:
                   (Intr) spcskl crsstypw crsstyph spcskl:crsstypw
   specieskaal     -0.738                                         
   crsstypwthn     -0.303  0.223                                  
   crsstyphybr     -0.657  0.654  0.269                           
   spcskl:crsstypw  0.220 -0.330 -0.718   -0.196                  
   spcskl:crsstyph  0.510 -0.675 -0.138   -0.765    0.206
Anova(mod, type=3)
   Analysis of Deviance Table (Type III Wald chisquare tests)
   
   Response: firstflower
                        Chisq Df Pr(>Chisq)    
   (Intercept)       3431.199  1  < 2.2e-16 ***
   species             16.382  1  5.176e-05 ***
   crosstype           32.055  2  1.095e-07 ***
   species:crosstype   13.017  2   0.001491 ** 
   ---
   Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Predicted random effects

These are box and QQ (to check normality) plots of the estimated random effect of each maternal plant.

predre <- setNames(data.frame(RE=ranef(mod)$mompid,SD=ranef(mod, sd=T)$`1`),c("RE","SD"))
ggplot(predre, aes(x = rownames(predre),y=RE)) +
  geom_point(size = 2) + coord_flip()+
  geom_errorbar(aes(ymin = RE-SD, ymax = RE+SD)) + labs(x="Maternal plants", y="Predicted random effects")

#Count
reStack <- ldply(ranef(mod))
print( qqmath( ~`(Intercept)`|.id, data=reStack, scales=list(relation="free"),
                 prepanel = prepanel.qqmathline,
                 panel = function(x, ...) {
                   panel.qqmathline(x, ...)
                   panel.qqmath(x, ...)
                 },
                 layout=c(1,1)))

Least square means

The least square means procedure can generate predictor estimates of each type, and give their significance groupings with a post-hoc Tukey test. S. hookeri-produced hybrids produce less Inflo Biofirstflower than either crosses between or within S. hookeri populations. The other differences are not significant, but remember that the fixed effect of hybrid (vs. between) was significant (model summary).

#Count
rg <- ref.grid(sc.mix.momdadpid)
   Loading required namespace: lmerTest
#summary(rg)
sxc.lsm <- lsmeans(rg, ~ crosstype*species)
plot(sxc.lsm)

options(digits=4)
cld.mod <- cld(sxc.lsm, Letters=letters) #tukey letterings
cld.mod$response <- cld.mod$lsmean
cld.mod$uSE <- cld.mod$lsmean+cld.mod$SE
cld.mod$lSE <- cld.mod$lsmean-cld.mod$SE
cld.mod[rev(order(cld.mod$species, cld.mod$crosstype)),]
    crosstype species lsmean    SE    df lower.CL upper.CL .group response
    hybrid    kaal     191.5 3.982 91.62    183.6    199.4    c      191.5
    within    kaal     173.9 3.408 71.69    167.1    180.7  a        173.9
    between   kaal     183.9 2.630 40.77    178.6    189.2   bc      183.9
    hybrid    hook     186.4 2.547 26.56    181.1    191.6   bc      186.4
    within    hook     174.6 3.720 72.22    167.2    182.0  ab       174.6
    between   hook     168.1 2.870 24.66    162.2    174.0  a        168.1
      uSE   lSE
    195.5 187.5
    177.3 170.5
    186.5 181.3
    188.9 183.8
    178.3 170.9
    171.0 165.2
   
   Degrees-of-freedom method: satterthwaite 
   Confidence level used: 0.95 
   P value adjustment: tukey method for comparing a family of 6 estimates 
   significance level used: alpha = 0.05
H.wb <-  with(cld.mod[cld.mod$species=="hook",], response[crosstype=="within"]/response[crosstype=="between"] - 1)
K.wb <- with(cld.mod[cld.mod$species=="kaal",], response[crosstype=="within"]/response[crosstype=="between"] - 1)
K.H <- with(cld.mod[cld.mod$crosstype=="between",], response[species=="kaal"]/response[species=="hook"] - 1)
maxsp <- ifelse(K.H>0, "kaal","hook")
minsp <- ifelse(K.H<0, "kaal","hook")
maxresp <- with(cld.mod, response[species==maxsp & crosstype=="between"])
minresp <- with(cld.mod, response[species==minsp & crosstype=="between"])
HK.resp <-  with(cld.mod, response[species=="hook" & crosstype=="hybrid"])
KH.resp <-  with(cld.mod, response[species=="kaal" & crosstype=="hybrid"])
HK.int <-   with(cld.mod, ifelse(HK.resp > minresp & HK.resp < maxresp, (HK.resp-minresp)/(maxresp-minresp), 
                                 ifelse(HK.resp < minresp, HK.resp/minresp-1, HK.resp/maxresp-1)))
KH.int <-   with(cld.mod, ifelse(KH.resp > minresp & KH.resp < maxresp, (KH.resp-minresp)/(maxresp-minresp), 
                                 ifelse(KH.resp < minresp, KH.resp/minresp-1, KH.resp/maxresp-1)))

intermed <- (minresp + maxresp) / 2
with(ff, wilcox.test(ff[species=="kaal" & crosstype=="hybrid","firstflower"], mu=intermed))
   
    Wilcoxon signed rank test with continuity correction
   
   data:  ff[species == "kaal" & crosstype == "hybrid", "firstflower"]
   V = 1800, p-value = 2e-04
   alternative hypothesis: true location is not equal to 176
with(ff, wilcox.test(ff[species=="hook" & crosstype=="hybrid","firstflower"], mu=intermed))
   
    Wilcoxon signed rank test with continuity correction
   
   data:  ff[species == "hook" & crosstype == "hybrid", "firstflower"]
   V = 32000, p-value = 3e-04
   alternative hypothesis: true location is not equal to 176
round(c(H.wb,K.wb,K.H,HK.int,KH.int),2)
   [1]  0.04 -0.05  0.09  0.01  0.04
ggplot(as.data.frame(cld.mod), aes(y=response, x=relevel(crosstype, "within"), fill=species)) +
  geom_col(position=position_dodge2()) +
  geom_linerange(aes(ymin=lSE, ymax=uSE), position=position_dodge(0.9)) +
  labs(x="", y="Days to flower",fill="Maternal species") +
  scale_fill_manual(labels = c("S. hookeri  ", "S. kaalae  "), values=brewer.pal(name="Set1", n=3)[c(3,2)]) +
  scale_x_discrete(labels = c("Intrapopulation", "Interpopulation", "Hybrid")) +
  geom_text(aes(label=.group), position=position_dodge(0.9), hjust=0, vjust=-1) +
  scale_y_continuous(expand = expand_scale(add=c(0,10)), breaks = scales::pretty_breaks(n = 5)) +
  theme_classic() + theme(legend.text=element_text(face="italic", size=rel(1)), legend.position="bottom", axis.text = element_text(colour="black", size=rel(1)), text=element_text(size=14), axis.ticks.x = element_blank()) + geom_segment(aes(x=2.5, y=intermed, xend=3.5, yend=intermed))