Does interfertility decrease with phylogenetic distance in Schiedea?

Load data

Data from Weller et al. 2001. F1 hybrids that made greater (green) or less than (red) 50 percent fertile pollen.

fert <- read.table(file="fertility.csv", sep="\t", header=T)
fert$sp1 <- toupper(fert$sp1)
fert$sp2 <- toupper(fert$sp2)
fert$g50 <- fert$g50+1
fspecies <- data.frame(name=levels(factor(c(fert$sp1,fert$sp2))))
g <- graph_from_data_frame(fert, directed=F, vertices=fspecies)
g_edges = get.edgelist(g)

Clustering

#clustering by network
adj <- get.adjacency(g, attr="g50")
adj[adj==2] <- 0.5
   Note: method with signature 'CsparseMatrix#Matrix#missing#replValue' chosen for function '[<-',
    target signature 'dgCMatrix#lgCMatrix#missing#numeric'.
    "Matrix#lsparseMatrix#missing#replValue" would also be valid
g.d <- graph_from_adjacency_matrix(adj, weighted=T, mode="undirected")
ceb <- cluster_edge_betweenness(g.d)

#trim phlyo
sch.f <- schtree
sch.f$tip.label <- toupper(paste("",substr(sch.f$tip.label,1,4),sep=""))
sch.f<-drop.tip(sch.f,sch.f$tip.label[-match(fspecies$name, sch$tip.label)])

#cophenetic plot
ceb_tree <- cophylo(as_phylo(ceb), sch.f)
   Rotating nodes to optimize matching...
   Done.
plot(ceb_tree)

Arcplot

#phylo-arcplot
par(mfrow=c(1,2))
par(mar=c(1.2,0,1.2,0))
plot(sch.f)
par(mar=c(0,0,0,0))
arcplot(g_edges, show.labels=F, horizontal=F, ordering=sch.f$tip.label, col.arcs=fert$g50+1)

Fertility vs. phylogenetic distance

adj <- as.matrix(get.adjacency(g, attr="g50"))
adj[upper.tri(adj, diag=T)]<-NA
adj[adj==0] <- NA
phydist <- cophenetic.phylo(sch.f)
phydist[upper.tri(phydist, diag=T)]<-NA
pf <- na.omit(data.frame(pd=c(phydist), ft=c(adj-1)))
cor(pf$ft, pf$pd, use="complete.obs")
   [1] -0.282946
cor.test(pf$ft, pf$pd, use="complete.obs")
   
    Pearson's product-moment correlation
   
   data:  pf$ft and pf$pd
   t = -2.1273, df = 52, p-value = 0.03816
   alternative hypothesis: true correlation is not equal to 0
   95 percent confidence interval:
    -0.51192243 -0.01643027
   sample estimates:
         cor 
   -0.282946

Logistic regression

#Logistic regression 
model <- glm(ft~pd, family=binomial(link='logit'), data=pf)
summary(model)
   
   Call:
   glm(formula = ft ~ pd, family = binomial(link = "logit"), data = pf)
   
   Deviance Residuals: 
       Min       1Q   Median       3Q      Max  
   -1.9650  -1.1879   0.7051   1.0703   1.1669  
   
   Coefficients:
               Estimate Std. Error z value Pr(>|z|)  
   (Intercept)    3.146      1.372   2.292   0.0219 *
   pd          -168.515     84.130  -2.003   0.0452 *
   ---
   Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
   
   (Dispersion parameter for binomial family taken to be 1)
   
       Null deviance: 71.188  on 53  degrees of freedom
   Residual deviance: 66.574  on 52  degrees of freedom
   AIC: 70.574
   
   Number of Fisher Scoring iterations: 4
confint(model)
   Waiting for profiling to be done...
                      2.5 %    97.5 %
   (Intercept)    0.6772497   6.16800
   pd          -349.7680563 -14.05782
wald.test(b = coef(model), Sigma = vcov(model), Terms = 2)
   Wald test:
   ----------
   
   Chi-squared test:
   X2 = 4.0, df = 1, P(> X2) = 0.045

Logistic regression plot

plot <- ggplot(pf, aes(pd, ft)) + stat_smooth(method="glm", method.args=list(family="binomial"), se=T, color="black") + scale_y_continuous(breaks=c(0, 1), labels=c("<50%", ">50%")) + scale_x_continuous() + xlab("Phylogenetic distance") + ylab("Hybrid germination") +  scale_fill_gradientn(colors=c("white","black")) + theme_bw()

plot + geom_bin2d()