Load packages

rm(list = ls())

# unload all non-based packages
out <- sapply(paste('package:', names(sessionInfo()$otherPkgs), sep = ""), function(x) try(detach(x, unload = FALSE, character.only = TRUE), silent = T))

## add 'developer/' to packages to be installed from github
x <- c(
  "ggtree",
  "ape",
  "viridis",
  "readxl",
  "ggplot2",
  "leaflet",
   "phangorn",
   "parallel",
   "pbapply",
    "cowplot",
    "coda"
    )
  
out <- lapply(x, function(y) {
  # get pakage name
  pkg <- strsplit(y, "/")[[1]]
  pkg <- pkg[length(pkg)]
  
  # check if installed, if not then install
  if (!pkg %in% installed.packages()[, "Package"])  {
  if (grepl("/", y))
  devtools::install_github(y, force = TRUE)
  else
  install.packages(y)
  }
  
  # load package
  try(require(pkg, character.only = T), silent = T)
})

knitr::opts_knit$set(root.dir = normalizePath(".."))

knitr::opts_chunk$set(dpi = 58, fig.width = 20, fig.height = 15)

Set parameters, read data and make base ggtree plots

#colors for plots
cols <- viridis(10, alpha = 0.7)

# font size of clade labels
fontsize <- 13

# edge width
lwd <- 2

# functions to get descendant nodes
select.tip.or.node <- function(element, tree) {
    ifelse(element < Ntip(tree)+1, tree$tip.label[element], tree$node.label[element-Ntip(tree)])
}

get_edge_table <- function(phy){
  
    phy$node.label <- paste0("node", seq_len(Ntip(phy)))

    edge_table <- data.frame(
                "parent" = phy$edge[,1],
                "par.name" = sapply(phy$edge[,1], select.tip.or.node, tree = phy),
                "child" = phy$edge[,2],
                "chi.name" = sapply(phy$edge[,2], select.tip.or.node, tree = phy),
                length = phy$edge.length
                )
return(edge_table)
}

# make any numeric variable within a specific range 
range01 <- function(x, min = 0, max = 1){
  x <- x + min(x)
  x <- (x-min(x))/(max(x)-min(x))

  x <- x * (max - min)
  
  x <- x + min
  
  return(x)
  }

# ggplot2 theme
theme_set(theme_classic(base_size = 14,base_family = "Arial"))

# main tree
phy <- read.nexus("data/processed/trees/ConsensoBayesTraits.trees")

phy <- rotateConstr(phy = phy, constraint = rev(phy$tip))

phy <- ladderize(phy)


# read branchwise rates
xls <- list.files(path = "./output/bayestraits", full.names = TRUE, pattern = "\\.xls")

# get branchwise rates into a list
branchwise_rates <- lapply(xls, function(x) {

  X <- as.data.frame(read_excel(x))
  
  X$variable <- gsub("BranchWiseRate_|BranchWiseRates_|.xlsx", "", basename(x))
  
  # get the ones with r == 1 (r != 1 means significantly different than background rate)
  X$log.r <- log(X$`Median Scalar (r)` + 1)

  names(X)[7] <- "Optimized Rate (r * backgroundrate)"

  domain <- range(0, 1)
  
  
  pal_fun <- colorNumeric(palette = "viridis", domain = domain, reverse = TRUE)
  
    # previewColors(pal_fun, seq(min(domain), max(domain), length.out = 30))

  X$r.for.color <- NA
  
  # color range for low rates
  X$r.for.color[X$`Optimized Rate (r * backgroundrate)`< X$`Background Rate (per million year)`] <- range01(X$log.r[X$`Optimized Rate (r * backgroundrate)`< X$`Background Rate (per million year)`], min = 0.7, max = 0.8)
  
  # color range for high rates
  X$r.for.color[X$`Optimized Rate (r * backgroundrate)`> X$`Background Rate (per million year)`] <- range01(X$log.r[X$`Optimized Rate (r * backgroundrate)`> X$`Background Rate (per million year)`], min = 0.1, max = 0.5)
 
  X$rate.color <- "gray"
  
  X$rate.color <- pal_fun(X$r.for.color)
  
  # get gray the ones with less than 95% of time under background rate 
  X$rate.color[X$`Percentage time scaled`< 95] <- "gray"
  
  return(X)
  })

# names(branchwise_rates) <-  gsub("BranchWiseRate_|BranchWiseRates_|.xlsx", "", basename(xls))

names(branchwise_rates) <- c("Sequence complexity", "# of element types", "Acoustic space", "Between song variation")

# get background rates per parameter
background_rates <- sapply(branchwise_rates, function(x)
  mean(x$`Background Rate (per million year)`))

log_background_rates <- log(background_rates + 1)


# put branchwise rates in a single data frame

phy.btw.sng.vr <- drop.tip(phy, tip = setdiff(phy$tip.label, branchwise_rates$`Between song variation`$`Taxa List`[branchwise_rates$`Between song variation`$`No Taxa` == 1]))

# ggplot2 theme
theme_set(theme_tree2(base_size = 14,base_family = "Arial"))

# song acoustic parameters
song.acoust.param <- read.csv("data/processed/acoustic/Song parameters nightjars swifts and hummingbirds.csv", stringsAsFactors = FALSE)

song.acoust.param$species <- song.acoust.param$species_uscr

sp.clade <- song.acoust.param[!duplicated(song.acoust.param$species) , c("species", "Clade")]

# rename hummers
sp.clade$maj.clade <- ifelse(sp.clade$Clade %in% c("Nightjars", "Swifts"), sp.clade$Clade, "Hummingbirds")

# all clades Except for Patagona
all.clade.nodes <- sapply(c("Emeralds", "Coquettes", "Swifts", "Brilliants", "Mangoes", "Hermits", "Nightjars", "Bees", "Mtn. Gems", "Topazes", "Hummingbirds"), function(x) getMRCA(phy = phy, tip = if(x == "Hummingbirds") sp.clade$species[!sp.clade$Clade %in% c("Swifts", "Nightjars")] else sp.clade$species[sp.clade$Clade == x]))


# 3 major clades
maj.clade.nodes <- sapply(c("Swifts", "Nightjars", "Hummingbirds"), function(x) getMRCA(phy = phy, tip = if(x == "Hummingbirds") sp.clade$species[!sp.clade$Clade %in% c("Swifts", "Nightjars")] else sp.clade$species[sp.clade$Clade == x]))

sp.clade.bsv  <- sp.clade[sp.clade$species %in%  phy.btw.sng.vr$tip.label, ]

# 3 major clades for between song variation
maj.clade.nodes.bsv <- sapply(c("Swifts", "Nightjars", "Hummingbirds"), function(x) getMRCA(phy = phy.btw.sng.vr, tip = if(x == "Hummingbirds") sp.clade.bsv$species[!sp.clade.bsv$Clade %in% c("Swifts", "Nightjars")] else sp.clade.bsv$species[sp.clade.bsv$Clade == x]))

# all clades Except for Patagona for between song variation
all.clade.nodes.bsv <- sapply(c("Emeralds", "Coquettes", "Swifts", "Brilliants", "Mangoes", "Hermits", "Nightjars", "Bees", "Mtn. Gems", "Topazes", "Hummingbirds"), function(x) getMRCA(phy = phy.btw.sng.vr, tip = if(x == "Hummingbirds") sp.clade.bsv$species[!sp.clade.bsv$Clade %in% c("Swifts", "Nightjars")] else sp.clade.bsv$species[sp.clade.bsv$Clade == x]))

# MRCAs for major clades
major.clade.nodes <- sapply(c("Nightjars", "Swifts", "Hummingbirds"), function(x) getMRCA(phy = phy, tip = sp.clade$species[sp.clade$maj.clade == x]))

# MRCAs for major clades for between song variation
major.clade.nodes.bsv <- sapply(c("Nightjars", "Swifts", "Hummingbirds"), function(x) getMRCA(phy = phy.btw.sng.vr, tip = sp.clade.bsv$species[sp.clade.bsv$maj.clade == x]))

# base ggtree graph major clades  
gg.all.clade <- ggtree(phy, ladderize = FALSE, size = lwd) +
  theme_tree2() +  xlim(NA, 130) + 
  theme(text=element_text(size = 35)) + labs(x = "Millions of years")
  
# add clade labels
    for(i in 1:length(all.clade.nodes))
  gg.all.clade <- gg.all.clade + geom_cladelabel(node = all.clade.nodes[i], label = names(all.clade.nodes)[i],   fontsize = fontsize, align=TRUE, barsize = 2, offset = if(names(all.clade.nodes)[i] == "Hummingbirds") 33 else 0.7, color = if(names(all.clade.nodes)[i] == "Hummingbirds") "gray81" else "gray45")
  
# base ggtree graph major clades for between song variation
gg.all.clade.bsv <- ggtree(phy.btw.sng.vr, ladderize = FALSE, size = lwd) +
  theme_tree2() + xlim(NA, 130) + 
  theme(text=element_text(size = 35))

# add labels
    for(i in 1:length(all.clade.nodes.bsv))
  gg.all.clade.bsv <- gg.all.clade.bsv + geom_cladelabel(node = all.clade.nodes.bsv[i], label = names(all.clade.nodes.bsv)[i],   fontsize = fontsize, align=TRUE, barsize = 2, offset = if(names(all.clade.nodes.bsv)[i] == "Hummingbirds") 33 else 0.7, color = if(names(all.clade.nodes.bsv)[i] == "Hummingbirds") "gray81" else "gray45")


# tree with 10 tips for the all clades except Patagona
all.clade.collapse.tree <- drop.tip(phy, tip = c(setdiff(phy$tip.label, sp.clade$species[!duplicated(sp.clade$Clade)]), "Patagona_gigas"))

# rename tips
all.clade.collapse.tree$tip.label <- c("Nightjars", "Swifts", "Hermits", "Topazes", "Mangoes", "Emeralds", "Mtn. Gems", "Bees", "Brilliants", "Coquettes")

# base ggtree graph all clades  
gg.all.clade.collapse.tree <- ggtree(all.clade.collapse.tree, ladderize = FALSE, size = lwd) +
  theme_tree2() + 
  theme(text=element_text(size = 35)) +
    geom_hilight(node = 13, fill = viridis(10)[8]) + # add hummingbird clade label
  geom_label(aes(label = label), hjust = 0.8, size = 10) # add labels to tips
  
# tree with 3 tips for the 3 major clades
major.clade.collapse.tree <- drop.tip(phy, tip = setdiff(phy$tip.label, c("Phaethornis_guy", "Nyctidromus_albicollis","Streptoprocne_zonaris")))

# rename tips
major.clade.collapse.tree$tip.label <- c("Nightjars", "Swifts", "Hummingbirds")

# base ggtree graph major clades  
gg.major.clade.collapse.tree <- ggtree(major.clade.collapse.tree, ladderize = FALSE) +
  theme_tree2() + 
  theme(text=element_text(size = 35)) + 
  geom_label(aes(label = label), hjust = 0.8, size = 10) 


### all nodes for each clade
all.nodes <- lapply(all.clade.nodes, function(x) 
        c(x, allDescendants(x = phy)[[x]]))

all.nodes.bsv <- lapply(all.clade.nodes.bsv, function(x) 
        c(x, allDescendants(x = phy.btw.sng.vr)[[x]]))

### all nodes for each of 3 major clade
maj.3.nodes <- lapply(maj.clade.nodes, function(x) 
        c(x, allDescendants(x = phy)[[x]]))

maj.3.nodes.bsv <- lapply(maj.clade.nodes.bsv, function(x) 
        c(x, allDescendants(x = phy.btw.sng.vr)[[x]]))

Branchwhise evolutionary rates on song complexity parameters

Blueish colors: significantly faster than background brownian motion rates
Greenish colors: significantly slower than background brownian motion rates
Significance determined as speding >= 95% of time at a rate higher than background

ggtrs <- lapply(1:length(branchwise_rates), function(y){
  
  X <- branchwise_rates[[y]]
  
    X$node <- sapply(X$`Taxa List`, function(y) {
    
    tips <- strsplit(y, split = ",")[[1]]
    
    phytemp <- if (X$variable[1] == "Btwn") phy.btw.sng.vr else phy
    
    node <- if (length(tips) == 1) which(phytemp$tip == tips) else
    getMRCA(phy = phytemp, tip = tips)
  })
  
    if (X$variable[1] != "Btwn")
  gg.all.clade %<+% X + aes(color=I(rate.color))  else
    gg.all.clade.bsv %<+% X + aes(color=I(rate.color))
  # + ggtitle(X$variable[1])
  
})

names(ggtrs) <- names(branchwise_rates)

Sequence complexity

ggtrs$`Sequence complexity`

Acoustic space

ggtrs$`Acoustic space`

Between song variation

ggtrs$`Between song variation`

Number of element types per song

ggtrs$`# of element types`

Evolutionary rate more heterogeneous in hummingbirds, and close to background brownian motion rate in outgroups except for “Between song variation” that evolved slower mostly in outgroups and a bit in hermits
Hermits (or a groups within hermits) tend to evolve slower in all four parameters
Acoustic space shows the highest contrasts between hummingbirds and the two outgroups
Overall complexity in bee hummingbirds has evolved faster than in other hummingbirds except between song variation

Posterior evolutionary rates per clade

# read branchwise rates
trees <- list.files(path = "./output/bayestraits", full.names = TRUE, pattern = "\\.trees$")

rate.trees <- lapply(trees, read.nexus)

names(rate.trees) <- c("Acoustic space", "Between song variation", "Sequence complexity", "# of element types")

# get mean posterior rates per parameter 
post_rates_l <- pblapply(1:length(trees), cl = detectCores() - 1, function(y){
  
  # extract 1 multiphylo with branch length representing amount of change 
  rate.tree <- rate.trees[[y]]

  # get unstransformed tree edge lengths
 if (names(rate.trees)[y] != "Between song variation")
  edge_tab <- get_edge_table(phy) else
    edge_tab <- get_edge_table(phy.btw.sng.vr)
    # edge_tab1 <- get_edge_table(drop.tip(phy, tip = setdiff(phy$tip.label, rate.tree[[1]]$tip.label)))
  
  node.list <- if(names(rate.trees)[y] != "Between song variation") all.nodes else all.nodes.bsv
  
  # loop over each sub tree to get rates for each branch x tree
  rates <- lapply(rate.tree, function(z){
      
    # edge tab for transformed
    edge_tab_rates <- get_edge_table(z)   

    # change rate column name
    names(edge_tab_rates)[5] <- "rate.length"
    
    # merge unstransformed and transformed 
    mrg_edge_tab <- merge(edge_tab, edge_tab_rates[, c("parent", "child", "rate.length")])
    
    # calculate rate
    mrg_edge_tab$rate <- mrg_edge_tab$rate.length / mrg_edge_tab$length  
    
    })

  # put results in a  matrix
  rate.mat <- do.call(cbind, rates)
  
  # make temporary edge table to get parent numbers
  mrg_edge_tab_temp <- merge(edge_tab, get_edge_table(rate.tree[[1]])[, c("parent", "child", "chi.name")])
  
  # include parent label
  rate.mat <- cbind(mrg_edge_tab_temp$parent, rate.mat)

  # get mean rate across trees for each clade
  rates.by.clade <- lapply(1:length(node.list), function(w)
    data.frame(clade = names(node.list)[w], rate = apply(rate.mat[rate.mat[,1] %in% node.list[[w]], ], 2, mean)))
  
  rates.by.clade.df <- do.call(rbind, rates.by.clade)

  rates.by.clade.df$variable <- names(rate.trees)[y]

  return(rates.by.clade.df)
  })

# put all together in a single data frame
post_rates <- do.call(rbind, post_rates_l)

post_rates$log.rate <- log(post_rates$rate + 1)

# save as RDS
saveRDS(post_rates, "./data/processed/posterior_evolutionary_rates_per_clade.RDS")

# read branchwise rates
trees <- list.files(path = "./output/bayestraits", full.names = TRUE, pattern = "\\.trees$")

rate.trees <- lapply(trees, read.nexus)

names(rate.trees) <-  c("Acoustic space", "Between song variation", "Sequence complexity", "# of element types")

# get mean posterior rates per parameter 
post_rates_l <- pblapply(1:length(trees), cl = detectCores() - 1, function(y){
  
  # extract 1 multiphylo with branch length representing amount of change 
  rate.tree <- rate.trees[[y]]

  # get unstransformed tree edge lengths
 if (names(rate.trees)[y] != "Between song variation")
  edge_tab <- get_edge_table(phy) else
    edge_tab <- get_edge_table(phy.btw.sng.vr)
  
  node.list <- if(names(rate.trees)[y] != "Between song variation") maj.3.nodes else maj.3.nodes.bsv
  
  # loop over each sub tree to get rates for each branch x tree
  rates <- lapply(rate.tree, function(z){
      
    # edge tab for transformed
    edge_tab_rates <- get_edge_table(z)   

    # change rate column name
    names(edge_tab_rates)[5] <- "rate.length"
    
    # merge unstransformed and transformed 
    mrg_edge_tab <- merge(edge_tab, edge_tab_rates[, c("parent", "child", "rate.length")])
    
    # calculate rate
    mrg_edge_tab$rate <- mrg_edge_tab$rate.length / mrg_edge_tab$length  
    })

  # put results in a  matrix
  rate.mat <- do.call(cbind, rates)
  
   # make temporary edge table to get parent numbers
  mrg_edge_tab_temp <- merge(edge_tab, get_edge_table(rate.tree[[1]])[, c("parent", "child", "chi.name")])
  
  # include parent label
  rate.mat <- cbind(mrg_edge_tab_temp$parent, rate.mat)

  # get mean rate across trees for each clade
  rates.by.clade <- lapply(1:length(node.list), function(w)
    data.frame(clade = names(node.list)[w], rate = apply(rate.mat[rate.mat[,1] %in% node.list[[w]], ], 2, mean)))
  
  rates.by.clade.df <- do.call(rbind, rates.by.clade)

  rates.by.clade.df$variable <- names(rate.trees)[y]

  return(rates.by.clade.df)
  })

# put all together in a single data frame
post_rates <- do.call(rbind, post_rates_l)

post_rates$log.rate <- log(post_rates$rate + 1)

# save as RDS
saveRDS(post_rates, "./data/processed/posterior_evolutionary_rates_per_clade_3_major_clades.RDS")

post_rates <- readRDS("./data/processed/posterior_evolutionary_rates_per_clade.RDS")

  post_rates$maj.clade <- ifelse(post_rates$clade %in% c("Swifts", "Nightjars"), post_rates$clade, "Hummingbirds")
  
post_rates_3_clades <- readRDS("./data/processed/posterior_evolutionary_rates_per_clade_3_major_clades.RDS")


# for plot have clade as species
post_rates_3_clades$species <- post_rates_3_clades$clade
post_rates$species <- post_rates$clade

gg.rates <- lapply(unique(post_rates$variable), function(w){

  # all clades
  gg.post.rate <- gg.all.clade.collapse.tree
 
  # get data subset
  dat <- post_rates[post_rates$variable == w, ]
  
  # keep only data within 95% quantile
  dat.l <- lapply(unique(dat$clade), function(x){
    
    X <- dat[dat$clade == x, ] 
  # if (x != "Nightjars")
  # {
    Z <- X$log.rate
    class(Z) <- "mcmc"  
    hpd <- HPDinterval(Z)
    Y <- X[X$log.rate > hpd[1] & X$log.rate < hpd[2], ] 
  # } 
  # else Y <- X
  # 
  return(Y)
  
        # if (x == "Nightjars" & X$variable[1] == "Between song variation") return(X) else
    })
  

  dat <- do.call(rbind, dat.l)
  
  # add facet violin
  gg.post.rate <- facet_plot(gg.post.rate, data = dat, geom = geom_violin, aes(x= log.rate, group = label, fill = maj.clade), panel = paste("Log", w, "rates")) +
      scale_fill_manual(values = cols[c(5, 3, 8)]) + 
    theme(legend.position = "none") + geom_vline(xintercept = log_background_rates[w], lty = 2, size = 1.5, col = c("transparent", "black"))

     
   # 3 major clades
  gg.post.rate.3.maj.clades <- gg.major.clade.collapse.tree
 
  # get data subset
  dat <- post_rates_3_clades[post_rates_3_clades$variable == w, ]
  
  # keep only data within 95% quantile
  dat.l <- lapply(unique(dat$clade), function(x){
    
    X <- dat[dat$clade == x, ] 
    
    try(Y <- X[X$log.rate > quantile(X$log.rate, 0.025) & X$log.rate < quantile(X$log.rate, 0.975), ], silent = TRUE)
  
        if (x == "Nightjars" & X$variable[1] == "Between song variation") return(X) else
    return(Y)
    })
  

  dat <- do.call(rbind, dat.l)
  
  # add facet violin
  gg.post.rate.3.maj.clades <- facet_plot(gg.post.rate.3.maj.clades, data = dat, geom = geom_violin, aes(x= log.rate, group = label, fill = label), panel = paste("Log", w, "rates"))  +
        scale_fill_manual(values = cols[c(8, 3, 5)]) + 
    theme(legend.position = "none") +
    geom_vline(xintercept = log_background_rates[w], lty = 2, size = 1.5,  col = c("transparent", "black"))

  
 # # plot both together
  cow_plt <- plot_grid(gg.post.rate.3.maj.clades, gg.post.rate, ncol = 2)
  
  return(cow_plt)
  })

names(gg.rates) <- unique(post_rates$variable)

Sequence complexity

gg.rates$`Sequence complexity`

Acoustic space

gg.rates$`Acoustic space`

Between song variation

gg.rates$`Between song variation`

Number of elements types per song

gg.rates$`# of element types`

Overall, evolutionary rates tend to be higher in hummingbirds, with some important differences between hummingbird sub-clades
Mountain gems show the highest evolutionary rates among all clades
Nightjars and swifts very similar except for “number of element types”

Posterior evolutionary correlation between complexity parameters

As a metric of phenotypic integration

(should we use phylogenetically control regressions?)

# proof of concept that trimming file share node labels
# ac_sp_tr <- rate.trees$`Acoustic space`[[1]]
# 
# btwn_tr <- rate.trees$`Between song variation`[[1]]
# 
# prn_ac_sp_tr <- drop.tip(ac_sp_tr, tip = setdiff(ac_sp_tr$tip.label, btwn_tr$tip.label))
# 
# jpeg("trim_phylo.jpeg", width = 1000, height = 5000, pointsize = 20)
#   par(mar = rep(0, 4), mfrow = c(1, 2))
#   plot.phylo(btwn_tr, show.tip.label = FALSE, use.edge.length = FALSE)
#   nodelabels()
#   
#   plot.phylo(prn_ac_sp_tr, show.tip.label = FALSE, use.edge.length = FALSE)
#   nodelabels()
#   dev.off()



# read branchwise rates
trees <- list.files(path = "./output/bayestraits", full.names = TRUE, pattern = "\\.trees$")

rate.trees <- lapply(trees, read.nexus)

names(rate.trees) <-  c("Acoustic space", "Between song variation", "Sequence complexity", "# of element types")

rate.trees <- rate.trees[c(1, 3, 4, 2)]

# make combinations of 2 parameters at the time
grd_trees <- t(combn(names(rate.trees), 2))

# get get correlation between complexity 
evo_cors_l <- pblapply(1:nrow(grd_trees), cl = detectCores() - 1, function(y){
  
  # extract 1 bayes traits multiphylo with branch length representing amount of change 
  bt_tr1 <- rate.trees[[grd_trees[y, 1]]]
  bt_tr2 <- rate.trees[[grd_trees[y, 2]]]
  
    # prune tree if different ntips
 if (Ntip(bt_tr1[[1]]) != Ntip(bt_tr2[[1]]))
  {
    bt_tr1 <- lapply(bt_tr1, drop.tip, tip = setdiff(bt_tr1[[1]]$tip.label, bt_tr2[[1]]$tip.label))

  # get node list depending on number of species
    node.list <- all.nodes.bsv
    
     # get unstransformed tree edge lengths
    edge_tab <- get_edge_table(phy.btw.sng.vr)
    # edge_tab <- get_edge_table(drop.tip(phy, tip = setdiff(phy$tip.label, phy.btw.sng.vr$tip.label)))
    
    } else { # if both trees have 318 species
     # get unstransformed tree edge lengths
    edge_tab <- get_edge_table(phy)
   
    # node list
   node.list <- all.nodes
    }
  
  # make edge rate table template
  edge_tab_templ <- get_edge_table(bt_tr1[[1]])   

  # change rate column name
  names(edge_tab_templ)[5] <- "delete"
  
  # merge unstransformed and transformed 
  edge_tab_templ <- merge(edge_tab, edge_tab_templ[, c("parent", "child", "delete")])
  
  edge_tab_templ$delete <- NULL
  
  # loop over each sub tree to get rates for each branch x tree
  rates_tr1 <- sapply(bt_tr1, function(z){
      
  # edge tab for transformed
  edge_tab_rates <- get_edge_table(z)   

  # change rate column name
  names(edge_tab_rates)[5] <- "rate.length"
  
  # merge unstransformed and transformed 
  mrg_edge_tab <- merge(edge_tab_templ, edge_tab_rates[, c("parent", "child", "rate.length")])
  
  # calculate rate
  mrg_edge_tab$rate <- mrg_edge_tab$rate.length / mrg_edge_tab$length   
  
  return(mrg_edge_tab$rate) 
  })

  # rates second tree
  rates_tr2 <- sapply(bt_tr2, function(z){
      
  # edge tab for transformed
  edge_tab_rates <- get_edge_table(z)   

  # change rate column name
  names(edge_tab_rates)[5] <- "rate.length"
  
  # merge unstransformed and transformed 
  mrg_edge_tab <- merge(edge_tab_templ, edge_tab_rates[, c("parent", "child", "rate.length")])
  
  # calculate rate
  mrg_edge_tab$rate <- mrg_edge_tab$rate.length / mrg_edge_tab$length  
  
  return(mrg_edge_tab$rate)
  })
  
 # sample 1000 tree values randomly for each tree
 # ang get correlation of log rates
  cors_clade_l <- lapply(1:length(node.list), function(w)
  {
  
    # select rates for each clade based on nodes
    rt1 <- rates_tr1[edge_tab_templ$parent %in% node.list[[w]], ]
    rt2 <- rates_tr2[edge_tab_templ$parent %in% node.list[[w]], ]
    
    cors <- sapply(1:10000, function(x)
     cor(log(rt1[, sample(1:ncol(rt1), 1), drop = TRUE] + 1), log(rt2[, sample(1:ncol(rt2), 1), drop = TRUE] + 1))
       )
    
    return(data.frame(clade = names(node.list)[w], cor = cors))
    })
  
  cors_clade <- do.call(rbind, cors_clade_l)
  
  # names(cors_clade) <- names(node.list)
  
  cors_df <- data.frame(clade = cors_clade$clade, param1 = grd_trees[y, 1], param2 = grd_trees[y, 2], cor = cors_clade$cor)
  
  return(cors_df)
  })

# put all together in a single data frame
evo_cors <- do.call(rbind, evo_cors_l)


# save as RDS
saveRDS(evo_cors, "./data/processed/posterior_evolutionary_rate_correlations.RDS")

evo_cors <- readRDS("./data/processed/posterior_evolutionary_rate_correlations.RDS")

evo_cors$variable.pair <- paste0(evo_cors$param1, "-", evo_cors$param2)

evo_cors_3_clades <- evo_cors[evo_cors$clade %in% c("Nightjars", "Swifts", "Hummingbirds"), ]

# remove hummingbirds
evo_cors <- evo_cors[evo_cors$clade != "Hummingbirds", ]

# for plot have clade as species
evo_cors_3_clades$species <- as.character(evo_cors_3_clades$clade)
evo_cors$species <- as.character(evo_cors$clade)

evo_cors$maj.clade <- as.character(ifelse(evo_cors$species %in% c("Nightjars", "Swifts"), evo_cors$species, "Hummingbirds"))


gg_cor_rates <- lapply(c(unique(evo_cors$variable.pair), "overall"), function(w){

  # all clades
  gg.cor.rate <- gg.all.clade.collapse.tree + 
    geom_vline(xintercept = 0, lty = 2, size = 1.5,  col = c("transparent", "black"))
 
  # get data subset
  if (w != "overall")
  dat <- evo_cors[evo_cors$variable.pair == w, ] else
    dat <- evo_cors
  
  # keep only data within 95% quantile
  dat.l <- lapply(unique(dat$species), function(x){
    
    X <- dat[dat$species == x, ] 
    Z <- X$cor
  class(Z) <- "mcmc"  
  hpd <- HPDinterval(Z)
    
  Y <- X[X$cor > hpd[1] & X$cor < hpd[2], ]  
  return(Y)
    })
  
  dat <- do.call(rbind, dat.l)
  
  # add facet violin
  gg.cor.rate <- facet_plot(gg.cor.rate, data = dat, geom = geom_violin, aes(x= cor, group = label, fill = maj.clade), panel = w) +
      scale_fill_manual(values = cols[c(8, 3, 5)]) +
    theme(legend.position = "none") 

     
   # 3 major clades
  gg.cor.rate.3.maj.clades <- gg.major.clade.collapse.tree + geom_vline(xintercept = 0, lty = 2, size = 1.5, col = c("transparent", "black"))
 
  # get data subset
    if (w != "overall")
  dat <- evo_cors_3_clades[evo_cors_3_clades$variable.pair == w, ] else
    dat <- evo_cors_3_clades
  
  # keep only data within 95% quantile
  dat.l <- lapply(unique(dat$clade), function(x){
    
      X <- dat[dat$clade == x, ] 
      Z <- X$cor
      class(Z) <- "mcmc"  
      hpd <- HPDinterval(Z)
      Y <- X[X$cor > hpd[1] & X$cor < hpd[2], ]  
      return(Y)
        })
  

  dat <- do.call(rbind, dat.l)
  
  # add facet violin
  gg.cor.rate.3.maj.clades <-
    facet_plot(gg.cor.rate.3.maj.clades, data = dat, geom = geom_violin, aes(x= cor, group = label, fill = label), panel = w)  +
        scale_fill_manual(values = cols[c(8, 3, 5)]) + 
    theme(legend.position = "none") 
  
 # # plot both together
  cow_plt <- plot_grid(gg.cor.rate.3.maj.clades, gg.cor.rate, ncol = 2)
  
  return(cow_plt)
  })

names(gg_cor_rates) <- c(unique(evo_cors$variable.pair), "overall")

Acoustic space vs Sequence complexity

gg_cor_rates$`Acoustic space-Sequence complexity`

Acoustic space vs Number of element types

gg_cor_rates$`Acoustic space-# of element types`

Acoustic space vs Between song variation

gg_cor_rates$`Acoustic space-Between song variation`

Sequence complexity vs Number of element types

gg_cor_rates$`Sequence complexity-# of element types`

Sequence complexity vs Between song variation

gg_cor_rates$`Sequence complexity-Between song variation`

Number of element types vs Between song variation

gg_cor_rates$`# of element types-Between song variation`

Overall correlation between evolutionary rates of all 4 complexity parameters

gg_cor_rates$overall

Nightjars show low integration across all parameter pairs
Hummingbirds and swifts show similar levels of integration and higher than nightjars
However, only hummingbirds show “significant” integration (always higher than 0) for all pairwise comparisons as well as overall integration

Session information

## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
## 
## locale:
##  [1] LC_CTYPE=es_ES.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=es_CR.UTF-8        LC_COLLATE=es_ES.UTF-8    
##  [5] LC_MONETARY=es_CR.UTF-8    LC_MESSAGES=es_ES.UTF-8   
##  [7] LC_PAPER=es_CR.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] coda_0.19-3       cowplot_1.0.0     pbapply_1.4-2     phangorn_2.5.5   
##  [5] leaflet_2.0.3     ggplot2_3.3.0     readxl_1.3.1      viridis_0.5.1    
##  [9] viridisLite_0.3.0 ape_5.3           ggtree_2.1.3     
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.3          lattice_0.20-41     tidyr_1.0.0        
##  [4] assertthat_0.2.1    zeallot_0.1.0       digest_0.6.22      
##  [7] mime_0.9            R6_2.4.1            cellranger_1.1.0   
## [10] backports_1.1.5     evaluate_0.14       pillar_1.4.2       
## [13] rlang_0.4.1         lazyeval_0.2.2      Matrix_1.2-18      
## [16] rmarkdown_1.17      labeling_0.3        stringr_1.4.0      
## [19] htmlwidgets_1.5.1   igraph_1.2.4.1      munsell_0.5.0      
## [22] shiny_1.4.0         compiler_3.6.1      httpuv_1.5.2       
## [25] xfun_0.12           pkgconfig_2.0.3     htmltools_0.4.0    
## [28] tidyselect_0.2.5    tibble_2.1.3        gridExtra_2.3      
## [31] quadprog_1.5-8      crayon_1.3.4        dplyr_0.8.3        
## [34] withr_2.1.2         later_1.0.0         grid_3.6.1         
## [37] nlme_3.1-142        jsonlite_1.6        xtable_1.8-4       
## [40] gtable_0.3.0        lifecycle_0.1.0     magrittr_1.5       
## [43] scales_1.0.0        tidytree_0.3.2      stringi_1.4.6      
## [46] promises_1.1.0      rvcheck_0.1.8       vctrs_0.2.0        
## [49] fastmatch_1.1-0     RColorBrewer_1.1-2  tools_3.6.1        
## [52] treeio_1.10.0       glue_1.3.1          purrr_0.3.3        
## [55] crosstalk_1.0.0     fastmap_1.0.1       yaml_2.2.1         
## [58] colorspace_1.4-1    BiocManager_1.30.10 knitr_1.28

Branchwise rates results

Hummingbird song evolution

Marcelo Araya-Salas, PhD

19-04-2020

Load packages

Set parameters, read data and make base ggtree plots

Branchwhise evolutionary rates on song complexity parameters

Sequence complexity

Acoustic space

Between song variation

Number of element types per song

Posterior evolutionary rates per clade

Sequence complexity

Acoustic space

Between song variation

Number of elements types per song

Posterior evolutionary correlation between complexity parameters

Acoustic space vs Sequence complexity

Acoustic space vs Number of element types

Acoustic space vs Between song variation

Sequence complexity vs Number of element types

Sequence complexity vs Between song variation

Number of element types vs Between song variation

Overall correlation between evolutionary rates of all 4 complexity parameters