This document summarises the R analyses conducted for the following study:A Winteraceae pollen tetrad from the early Paleocene of western Greenland, and the fossil record of Winteraceae in Laurasia and Gondwana.

knitr::opts_chunk$set(echo = TRUE)
#install.packages("phytools",dependencies = T)
library(phytools)
## Loading required package: ape
## Loading required package: maps
version
##                _                           
## platform       x86_64-pc-linux-gnu         
## arch           x86_64                      
## os             linux-gnu                   
## system         x86_64, linux-gnu           
## status                                     
## major          3                           
## minor          4.1                         
## year           2017                        
## month          06                          
## day            30                          
## svn rev        72865                       
## language       R                           
## version.string R version 3.4.1 (2017-06-30)
## nickname       Single Candle

Read in the datasets and ladderize tree

#setwd("C:/Users/BlaBla/Dropbox/Friddis/Winteraceae_CharMap")
setwd("/Data/Dropbox/Winteraceae_CharMap/")
wtree <- read.tree("PySt_AllData_Rooted.tree")
morphQual <- read.csv("MorphQual.csv",row.names=1,stringsAsFactors = F,header=F)
morphQuant <- read.csv("MorphQuant.csv",row.names=1,header=F)

par(mfrow=c(1,2))
plot(wtree)
plot(ladderize(wtree))

wtree <- ladderize(wtree)

Plotting the continuous characters

Mapping continuous trait evolution on the molecular tree using contMap. Note: According to the phytools manual, anc.ML() should be able to deal with characters including missing data, nonetheless it still failed to run. As workaround, taxa with missing data are pruned from the tree for each character map.

Below are comparisons between the fastAnc(), anc.ML(), and anc.Bayes() functions. In case of our data, the results of fastAnc and anc.ML are congruent as expected (same principle, but fastAnc not speedier than anc.ML); anc.Bayes results may deviate and include unreasonable estimates/evolutionary scenarios.

theANCplot <- function(index) {
  x <- morphQuant[,index];names(x)<-rownames(morphQuant)
    if (sum(is.na(x))>0) {
      x.na<- names(x[is.na(x)])
      x<-x[!is.na(x)]
    cat("The following taxa are missing from the fastAnc and anc.Bayes analyses for continuous Character QT",index,"\n",x.na)
    } else { x.na <- NULL}
    par(mfrow=c(2,2),oma=c(2,0,2,0))
    fa <- fastAnc(drop.tip(wtree,x.na),x)
    contMap(drop.tip(wtree,x.na),x,method="user",anc.states=fa)
    mtext(paste("fastAnc: ",qtnames[index],sep=""),side=3,outer=F)
    # fml <- anc.ML(wtree,x,model="BM")
    # contMap(wtree,x,method="user",anc.states=fml$ace)
    contMap(wtree,x,method="anc.ML",model="BM",col="red")
    mtext(paste("anc.ML (BM): ",qtnames[index],sep=""),side=3,outer=F)
    B <- anc.Bayes(drop.tip(wtree,x.na),x,ngen=10000)
    #plot(B[,"logLik"])
    estimates<-colMeans(B[100:nrow(B),]) # Exclude burnin
    contMap(drop.tip(wtree,x.na),x,method="user",anc.states=estimates)
    mtext(paste("anc.Bayes: ",qtnames[index],sep=""),side=3,outer=F)
    
    par(mfrow=c(2,1),oma=c(2,2,2,0),mar=c(3,3,0,0))
    # Phenogram of fastAnc
    phenogram(drop.tip(wtree,x.na),c(x,fa))
    mtext(paste("fastAnc:",qtnames[index]),side=3,col="red")
    # Phenogram of anc.Bayes
    phenogram(drop.tip(wtree,x.na),c(x,estimates))
    mtext(paste("anc.Bayes",qtnames[index]),side=3,col="red")
    mtext("Phenograms",outer=T,side=3,adj=0)  
  }

for (i in 1:ncol(morphQuant)) theANCplot(i)
## Control parameters (set by user or default):
## List of 7
##  $ sig2   : num 33258
##  $ a      : num [1, 1] 53.4
##  $ y      : num [1:24] 53.4 53.4 53.4 53.4 53.4 ...
##  $ pr.mean: num [1:26] 1000 0 0 0 0 0 0 0 0 0 ...
##  $ pr.var : num [1:26] 1e+06 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 ...
##  $ prop   : num [1:26] 7.98 7.98 7.98 7.98 7.98 ...
##  $ sample : num 100
## Starting MCMC...
## Done MCMC.

## Control parameters (set by user or default):
## List of 7
##  $ sig2   : num 251275
##  $ a      : num [1, 1] 12.7
##  $ y      : num [1:24] 12.7 12.7 12.7 12.7 12.7 ...
##  $ pr.mean: num [1:26] 1000 0 0 0 0 0 0 0 0 0 ...
##  $ pr.var : num [1:26] 1e+06 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 ...
##  $ prop   : num [1:26] 60.3 60.3 60.3 60.3 60.3 ...
##  $ sample : num 100
## Starting MCMC...
## Done MCMC.

## The following taxa are missing from the fastAnc and anc.Bayes analyses for continuous Character QT 3 
##  Zygogynum_acsmithii Zygogynum_baillonii
## Control parameters (set by user or default):
## List of 7
##  $ sig2   : num 103989
##  $ a      : num [1, 1] 30
##  $ y      : num [1:22] 30 30 30 30 30 ...
##  $ pr.mean: num [1:24] 1000 0 0 0 0 0 0 0 0 0 ...
##  $ pr.var : num [1:24] 1e+06 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 ...
##  $ prop   : num [1:24] 25 25 25 25 25 ...
##  $ sample : num 100
## Starting MCMC...
## Done MCMC.

## The following taxa are missing from the fastAnc and anc.Bayes analyses for continuous Character QT 4 
##  Zygogynum_acsmithii Zygogynum_baillonii
## Control parameters (set by user or default):
## List of 7
##  $ sig2   : num 124624
##  $ a      : num [1, 1] 9.6
##  $ y      : num [1:22] 9.6 9.6 9.6 9.6 9.6 ...
##  $ pr.mean: num [1:24] 1000 0 0 0 0 0 0 0 0 0 ...
##  $ pr.var : num [1:24] 1e+06 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 ...
##  $ prop   : num [1:24] 29.9 29.9 29.9 29.9 29.9 ...
##  $ sample : num 100
## Starting MCMC...
## Done MCMC.

## Control parameters (set by user or default):
## List of 7
##  $ sig2   : num 187441
##  $ a      : num [1, 1] 8.41
##  $ y      : num [1:24] 8.41 8.41 8.41 8.41 8.41 ...
##  $ pr.mean: num [1:26] 1000 0 0 0 0 0 0 0 0 0 ...
##  $ pr.var : num [1:26] 1e+06 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 ...
##  $ prop   : num [1:26] 45 45 45 45 45 ...
##  $ sample : num 100
## Starting MCMC...
## Done MCMC.

## Control parameters (set by user or default):
## List of 7
##  $ sig2   : num 80698
##  $ a      : num [1, 1] 4.35
##  $ y      : num [1:24] 4.35 4.35 4.35 4.35 4.35 ...
##  $ pr.mean: num [1:26] 1000 0 0 0 0 0 0 0 0 0 ...
##  $ pr.var : num [1:26] 1e+06 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 ...
##  $ prop   : num [1:26] 19.4 19.4 19.4 19.4 19.4 ...
##  $ sample : num 100
## Starting MCMC...
## Done MCMC.

## The following taxa are missing from the fastAnc and anc.Bayes analyses for continuous Character QT 7 
##  Bubbia_semecarpoides Bubbia_whiteana Zygogynum_pancheri Zygogynum_acsmithii
## Control parameters (set by user or default):
## List of 7
##  $ sig2   : num 116
##  $ a      : num [1, 1] 1.06
##  $ y      : num [1:20] 1.06 1.06 1.06 1.06 1.06 ...
##  $ pr.mean: num [1:22] 1000 0 0 0 0 0 0 0 0 0 ...
##  $ pr.var : num [1:22] 1e+06 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 1e+03 ...
##  $ prop   : num [1:22] 0.0278 0.0278 0.0278 0.0278 0.0278 ...
##  $ sample : num 100
## Starting MCMC...
## Done MCMC.

Phylomorphospace

A few phylomorphospace plots. Note the lack of clear patterns. Stand-alone, each individual trait has little phylogenetic correlation at the supergeneric level (all traits are homoplasious and likely to evolve in parallel/convergently). Nonetheless, size trends can be seen for generic lineages.

ancThresh

The ancThresh() function uses Bayesian MCMC to estimate ancestral states and thresholds for a discrete character under the threshold model from quantitative genetics (Felsenstein 2012). Number of generations: 200,000 Sampling: 1,000 Burnin: 50,000

## Application note: Time-consumptive
#do run once RUN ONCE - RUN AGAIN IF YOU UPDATE THE CHARACTER MATRICES
# Run again by setting rerun equal to TRUE. Run the code between the (rerun) {} - go get some coffee. this will take a while.
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
rerun <- FALSE

if (rerun) {
ngenx <- 200000
nsamx <- 1000
# ngenx <- 20000 # Testing
# nsamx <- 100 # Testing
RES <- NULL
for (iChar in 1:ncol(morphQual)) {
  res <- list()
  res$ngen <- ngenx
  res$sam <- nsamx
  res$char.orig <- morphQual[,iChar]
  names(res$char.orig) <- rownames(morphQual)
  res$missing <- names(res$char.orig)[is.na(res$char.orig)]
  res$char.final <- res$char.orig[!is.na(res$char.orig)]
  res$nChar <- length(unique(res$char.final))
  res$x <- letters[as.numeric(res$char.final)+1]
  names(res$x) <- names(res$char.final)
  if (length(res$missing)>0) {
    res$tree <-drop.tip(wtree,res$missing)
  } else {
    res$tree <- wtree
  }
  colors<-setNames(rainbow(length(unique(res$x))),letters[1:res$nChar])
  res$mcmc <- ancThresh(res$tree,res$x,ngen=ngenx,control=list(sample=nsamx,piecol=colors),model="OU",plot=T)
  mtext(iChar,side=2,line=-3,cex=2,las=2)
  RES[[iChar]] <- res
  }
save(RES,file="ancThresh.runs.Rdata")
}

load("ancThresh.runs.Rdata") # Loads RES

#res$mcmc <- ancThresh(drop.tip(wtree,res$missing),res$x,ngen=ngenx,control=list(sample=nsamx),model="OU",plot=F)

burnin<-50000 
#burnin<-500 # quicktest

# plotThresh()  

for (iChar in 1:length(RES)) {
  res <- RES[[iChar]]
  colors<-setNames(rainbow(length(unique(res$x))),letters[1:res$nChar])
  ii<-which(res$mcmc$par[,"gen"]==burnin)
  #plotThresh(tree=wtree,x=x,mcmc=res.mcmc,burnin=50000,piecol=colors,align.tip.label=T)
  #par(mfrow=c(1,2))
  #plotThresh(res$tree,x=res$x,mcmc = res$mcmc,piecol=colors,tipcol="input")
  #plotThresh(res$tree,x=res$x,mcmc = res$mcmc,piecol=colors,tipcol="input")
  plot.phylo(res$tree,label.offset = 0.002)
  tipcols <- colors[res$x[ res$tree$tip.label]]
  names(tipcols) <- res$tree$tip.label
  tiplabels(pch=22, bg=colors[res$x[ res$tree$tip.label]],cex=1.5)
  
  #XX<-t(apply(res$mcmc$mcmc[ii:nrow(res$mcmc$mcmc),],2,function(f) table(f)))
  #summary(factor(x,levels=letters[1:res$nChar]))/(nrow(res$mcmc$mcmc)-ii+1)))
  XX<-t(apply(res$mcmc$mcmc[ii:nrow(res$mcmc$mcmc),],2,function(x) 
  summary(factor(x,levels=letters[1:res$nChar]))/(nrow(res$mcmc$mcmc)-ii+1)))
  nodelabels(pie=XX,piecol=colors,cex=0.5)
  #legend("bottomleft",legend=(sort(unique(res$char.final))),fill = colors)
  legend("bottomleft",legend=qcs[[iChar]],fill = colors,title=names(qcs)[iChar])
  mtext(qlnames[iChar],side=3,line=0,cex=2,las=1)
}