rm(list = ls())
# unload all non-based packages
out <- sapply(paste('package:', names(sessionInfo()$otherPkgs), sep = ""), function(x) try(detach(x, unload = FALSE, character.only = TRUE), silent = T))
## add 'developer/' to packages to be installed from github
x <- c(
"ggtree",
"ape",
"viridis",
"readxl",
"ggplot2",
"leaflet",
"phangorn",
"parallel",
"pbapply",
"cowplot",
"coda"
)
out <- lapply(x, function(y) {
# get pakage name
pkg <- strsplit(y, "/")[[1]]
pkg <- pkg[length(pkg)]
# check if installed, if not then install
if (!pkg %in% installed.packages()[, "Package"]) {
if (grepl("/", y))
devtools::install_github(y, force = TRUE)
else
install.packages(y)
}
# load package
try(require(pkg, character.only = T), silent = T)
})
knitr::opts_knit$set(root.dir = normalizePath(".."))
knitr::opts_chunk$set(dpi = 58, fig.width = 20, fig.height = 15)
#colors for plots
cols <- viridis(10, alpha = 0.7)
# font size of clade labels
fontsize <- 13
# edge width
lwd <- 2
# functions to get descendant nodes
select.tip.or.node <- function(element, tree) {
ifelse(element < Ntip(tree)+1, tree$tip.label[element], tree$node.label[element-Ntip(tree)])
}
get_edge_table <- function(phy){
phy$node.label <- paste0("node", seq_len(Ntip(phy)))
edge_table <- data.frame(
"parent" = phy$edge[,1],
"par.name" = sapply(phy$edge[,1], select.tip.or.node, tree = phy),
"child" = phy$edge[,2],
"chi.name" = sapply(phy$edge[,2], select.tip.or.node, tree = phy),
length = phy$edge.length
)
return(edge_table)
}
# make any numeric variable within a specific range
range01 <- function(x, min = 0, max = 1){
x <- x + min(x)
x <- (x-min(x))/(max(x)-min(x))
x <- x * (max - min)
x <- x + min
return(x)
}
# ggplot2 theme
theme_set(theme_classic(base_size = 14,base_family = "Arial"))
# main tree
phy <- read.nexus("data/processed/trees/ConsensoBayesTraits.trees")
phy <- rotateConstr(phy = phy, constraint = rev(phy$tip))
phy <- ladderize(phy)
# read branchwise rates
xls <- list.files(path = "./output/bayestraits", full.names = TRUE, pattern = "\\.xls")
# get branchwise rates into a list
branchwise_rates <- lapply(xls, function(x) {
X <- as.data.frame(read_excel(x))
X$variable <- gsub("BranchWiseRate_|BranchWiseRates_|.xlsx", "", basename(x))
# get the ones with r == 1 (r != 1 means significantly different than background rate)
X$log.r <- log(X$`Median Scalar (r)` + 1)
names(X)[7] <- "Optimized Rate (r * backgroundrate)"
domain <- range(0, 1)
pal_fun <- colorNumeric(palette = "viridis", domain = domain, reverse = TRUE)
# previewColors(pal_fun, seq(min(domain), max(domain), length.out = 30))
X$r.for.color <- NA
# color range for low rates
X$r.for.color[X$`Optimized Rate (r * backgroundrate)`< X$`Background Rate (per million year)`] <- range01(X$log.r[X$`Optimized Rate (r * backgroundrate)`< X$`Background Rate (per million year)`], min = 0.7, max = 0.8)
# color range for high rates
X$r.for.color[X$`Optimized Rate (r * backgroundrate)`> X$`Background Rate (per million year)`] <- range01(X$log.r[X$`Optimized Rate (r * backgroundrate)`> X$`Background Rate (per million year)`], min = 0.1, max = 0.5)
X$rate.color <- "gray"
X$rate.color <- pal_fun(X$r.for.color)
# get gray the ones with less than 95% of time under background rate
X$rate.color[X$`Percentage time scaled`< 95] <- "gray"
return(X)
})
# names(branchwise_rates) <- gsub("BranchWiseRate_|BranchWiseRates_|.xlsx", "", basename(xls))
names(branchwise_rates) <- c("Sequence complexity", "# of element types", "Acoustic space", "Between song variation")
# get background rates per parameter
background_rates <- sapply(branchwise_rates, function(x)
mean(x$`Background Rate (per million year)`))
log_background_rates <- log(background_rates + 1)
# put branchwise rates in a single data frame
phy.btw.sng.vr <- drop.tip(phy, tip = setdiff(phy$tip.label, branchwise_rates$`Between song variation`$`Taxa List`[branchwise_rates$`Between song variation`$`No Taxa` == 1]))
# ggplot2 theme
theme_set(theme_tree2(base_size = 14,base_family = "Arial"))
# song acoustic parameters
song.acoust.param <- read.csv("data/processed/acoustic/Song parameters nightjars swifts and hummingbirds.csv", stringsAsFactors = FALSE)
song.acoust.param$species <- song.acoust.param$species_uscr
sp.clade <- song.acoust.param[!duplicated(song.acoust.param$species) , c("species", "Clade")]
# rename hummers
sp.clade$maj.clade <- ifelse(sp.clade$Clade %in% c("Nightjars", "Swifts"), sp.clade$Clade, "Hummingbirds")
# all clades Except for Patagona
all.clade.nodes <- sapply(c("Emeralds", "Coquettes", "Swifts", "Brilliants", "Mangoes", "Hermits", "Nightjars", "Bees", "Mtn. Gems", "Topazes", "Hummingbirds"), function(x) getMRCA(phy = phy, tip = if(x == "Hummingbirds") sp.clade$species[!sp.clade$Clade %in% c("Swifts", "Nightjars")] else sp.clade$species[sp.clade$Clade == x]))
# 3 major clades
maj.clade.nodes <- sapply(c("Swifts", "Nightjars", "Hummingbirds"), function(x) getMRCA(phy = phy, tip = if(x == "Hummingbirds") sp.clade$species[!sp.clade$Clade %in% c("Swifts", "Nightjars")] else sp.clade$species[sp.clade$Clade == x]))
sp.clade.bsv <- sp.clade[sp.clade$species %in% phy.btw.sng.vr$tip.label, ]
# 3 major clades for between song variation
maj.clade.nodes.bsv <- sapply(c("Swifts", "Nightjars", "Hummingbirds"), function(x) getMRCA(phy = phy.btw.sng.vr, tip = if(x == "Hummingbirds") sp.clade.bsv$species[!sp.clade.bsv$Clade %in% c("Swifts", "Nightjars")] else sp.clade.bsv$species[sp.clade.bsv$Clade == x]))
# all clades Except for Patagona for between song variation
all.clade.nodes.bsv <- sapply(c("Emeralds", "Coquettes", "Swifts", "Brilliants", "Mangoes", "Hermits", "Nightjars", "Bees", "Mtn. Gems", "Topazes", "Hummingbirds"), function(x) getMRCA(phy = phy.btw.sng.vr, tip = if(x == "Hummingbirds") sp.clade.bsv$species[!sp.clade.bsv$Clade %in% c("Swifts", "Nightjars")] else sp.clade.bsv$species[sp.clade.bsv$Clade == x]))
# MRCAs for major clades
major.clade.nodes <- sapply(c("Nightjars", "Swifts", "Hummingbirds"), function(x) getMRCA(phy = phy, tip = sp.clade$species[sp.clade$maj.clade == x]))
# MRCAs for major clades for between song variation
major.clade.nodes.bsv <- sapply(c("Nightjars", "Swifts", "Hummingbirds"), function(x) getMRCA(phy = phy.btw.sng.vr, tip = sp.clade.bsv$species[sp.clade.bsv$maj.clade == x]))
# base ggtree graph major clades
gg.all.clade <- ggtree(phy, ladderize = FALSE, size = lwd) +
theme_tree2() + xlim(NA, 130) +
theme(text=element_text(size = 35)) + labs(x = "Millions of years")
# add clade labels
for(i in 1:length(all.clade.nodes))
gg.all.clade <- gg.all.clade + geom_cladelabel(node = all.clade.nodes[i], label = names(all.clade.nodes)[i], fontsize = fontsize, align=TRUE, barsize = 2, offset = if(names(all.clade.nodes)[i] == "Hummingbirds") 33 else 0.7, color = if(names(all.clade.nodes)[i] == "Hummingbirds") "gray81" else "gray45")
# base ggtree graph major clades for between song variation
gg.all.clade.bsv <- ggtree(phy.btw.sng.vr, ladderize = FALSE, size = lwd) +
theme_tree2() + xlim(NA, 130) +
theme(text=element_text(size = 35))
# add labels
for(i in 1:length(all.clade.nodes.bsv))
gg.all.clade.bsv <- gg.all.clade.bsv + geom_cladelabel(node = all.clade.nodes.bsv[i], label = names(all.clade.nodes.bsv)[i], fontsize = fontsize, align=TRUE, barsize = 2, offset = if(names(all.clade.nodes.bsv)[i] == "Hummingbirds") 33 else 0.7, color = if(names(all.clade.nodes.bsv)[i] == "Hummingbirds") "gray81" else "gray45")
# tree with 10 tips for the all clades except Patagona
all.clade.collapse.tree <- drop.tip(phy, tip = c(setdiff(phy$tip.label, sp.clade$species[!duplicated(sp.clade$Clade)]), "Patagona_gigas"))
# rename tips
all.clade.collapse.tree$tip.label <- c("Nightjars", "Swifts", "Hermits", "Topazes", "Mangoes", "Emeralds", "Mtn. Gems", "Bees", "Brilliants", "Coquettes")
# base ggtree graph all clades
gg.all.clade.collapse.tree <- ggtree(all.clade.collapse.tree, ladderize = FALSE, size = lwd) +
theme_tree2() +
theme(text=element_text(size = 35)) +
geom_hilight(node = 13, fill = viridis(10)[8]) + # add hummingbird clade label
geom_label(aes(label = label), hjust = 0.8, size = 10) # add labels to tips
# tree with 3 tips for the 3 major clades
major.clade.collapse.tree <- drop.tip(phy, tip = setdiff(phy$tip.label, c("Phaethornis_guy", "Nyctidromus_albicollis","Streptoprocne_zonaris")))
# rename tips
major.clade.collapse.tree$tip.label <- c("Nightjars", "Swifts", "Hummingbirds")
# base ggtree graph major clades
gg.major.clade.collapse.tree <- ggtree(major.clade.collapse.tree, ladderize = FALSE) +
theme_tree2() +
theme(text=element_text(size = 35)) +
geom_label(aes(label = label), hjust = 0.8, size = 10)
### all nodes for each clade
all.nodes <- lapply(all.clade.nodes, function(x)
c(x, allDescendants(x = phy)[[x]]))
all.nodes.bsv <- lapply(all.clade.nodes.bsv, function(x)
c(x, allDescendants(x = phy.btw.sng.vr)[[x]]))
### all nodes for each of 3 major clade
maj.3.nodes <- lapply(maj.clade.nodes, function(x)
c(x, allDescendants(x = phy)[[x]]))
maj.3.nodes.bsv <- lapply(maj.clade.nodes.bsv, function(x)
c(x, allDescendants(x = phy.btw.sng.vr)[[x]]))
ggtrs <- lapply(1:length(branchwise_rates), function(y){
X <- branchwise_rates[[y]]
X$node <- sapply(X$`Taxa List`, function(y) {
tips <- strsplit(y, split = ",")[[1]]
phytemp <- if (X$variable[1] == "Btwn") phy.btw.sng.vr else phy
node <- if (length(tips) == 1) which(phytemp$tip == tips) else
getMRCA(phy = phytemp, tip = tips)
})
if (X$variable[1] != "Btwn")
gg.all.clade %<+% X + aes(color=I(rate.color)) else
gg.all.clade.bsv %<+% X + aes(color=I(rate.color))
# + ggtitle(X$variable[1])
})
names(ggtrs) <- names(branchwise_rates)
ggtrs$`Sequence complexity`
ggtrs$`Acoustic space`
ggtrs$`Between song variation`
ggtrs$`# of element types`
Evolutionary rate more heterogeneous in hummingbirds, and close to background brownian motion rate in outgroups except for “Between song variation” that evolved slower mostly in outgroups and a bit in hermits
Hermits (or a groups within hermits) tend to evolve slower in all four parameters
Acoustic space shows the highest contrasts between hummingbirds and the two outgroups
Overall complexity in bee hummingbirds has evolved faster than in other hummingbirds except between song variation
# read branchwise rates
trees <- list.files(path = "./output/bayestraits", full.names = TRUE, pattern = "\\.trees$")
rate.trees <- lapply(trees, read.nexus)
names(rate.trees) <- c("Acoustic space", "Between song variation", "Sequence complexity", "# of element types")
# get mean posterior rates per parameter
post_rates_l <- pblapply(1:length(trees), cl = detectCores() - 1, function(y){
# extract 1 multiphylo with branch length representing amount of change
rate.tree <- rate.trees[[y]]
# get unstransformed tree edge lengths
if (names(rate.trees)[y] != "Between song variation")
edge_tab <- get_edge_table(phy) else
edge_tab <- get_edge_table(phy.btw.sng.vr)
# edge_tab1 <- get_edge_table(drop.tip(phy, tip = setdiff(phy$tip.label, rate.tree[[1]]$tip.label)))
node.list <- if(names(rate.trees)[y] != "Between song variation") all.nodes else all.nodes.bsv
# loop over each sub tree to get rates for each branch x tree
rates <- lapply(rate.tree, function(z){
# edge tab for transformed
edge_tab_rates <- get_edge_table(z)
# change rate column name
names(edge_tab_rates)[5] <- "rate.length"
# merge unstransformed and transformed
mrg_edge_tab <- merge(edge_tab, edge_tab_rates[, c("parent", "child", "rate.length")])
# calculate rate
mrg_edge_tab$rate <- mrg_edge_tab$rate.length / mrg_edge_tab$length
})
# put results in a matrix
rate.mat <- do.call(cbind, rates)
# make temporary edge table to get parent numbers
mrg_edge_tab_temp <- merge(edge_tab, get_edge_table(rate.tree[[1]])[, c("parent", "child", "chi.name")])
# include parent label
rate.mat <- cbind(mrg_edge_tab_temp$parent, rate.mat)
# get mean rate across trees for each clade
rates.by.clade <- lapply(1:length(node.list), function(w)
data.frame(clade = names(node.list)[w], rate = apply(rate.mat[rate.mat[,1] %in% node.list[[w]], ], 2, mean)))
rates.by.clade.df <- do.call(rbind, rates.by.clade)
rates.by.clade.df$variable <- names(rate.trees)[y]
return(rates.by.clade.df)
})
# put all together in a single data frame
post_rates <- do.call(rbind, post_rates_l)
post_rates$log.rate <- log(post_rates$rate + 1)
# save as RDS
saveRDS(post_rates, "./data/processed/posterior_evolutionary_rates_per_clade.RDS")
# read branchwise rates
trees <- list.files(path = "./output/bayestraits", full.names = TRUE, pattern = "\\.trees$")
rate.trees <- lapply(trees, read.nexus)
names(rate.trees) <- c("Acoustic space", "Between song variation", "Sequence complexity", "# of element types")
# get mean posterior rates per parameter
post_rates_l <- pblapply(1:length(trees), cl = detectCores() - 1, function(y){
# extract 1 multiphylo with branch length representing amount of change
rate.tree <- rate.trees[[y]]
# get unstransformed tree edge lengths
if (names(rate.trees)[y] != "Between song variation")
edge_tab <- get_edge_table(phy) else
edge_tab <- get_edge_table(phy.btw.sng.vr)
node.list <- if(names(rate.trees)[y] != "Between song variation") maj.3.nodes else maj.3.nodes.bsv
# loop over each sub tree to get rates for each branch x tree
rates <- lapply(rate.tree, function(z){
# edge tab for transformed
edge_tab_rates <- get_edge_table(z)
# change rate column name
names(edge_tab_rates)[5] <- "rate.length"
# merge unstransformed and transformed
mrg_edge_tab <- merge(edge_tab, edge_tab_rates[, c("parent", "child", "rate.length")])
# calculate rate
mrg_edge_tab$rate <- mrg_edge_tab$rate.length / mrg_edge_tab$length
})
# put results in a matrix
rate.mat <- do.call(cbind, rates)
# make temporary edge table to get parent numbers
mrg_edge_tab_temp <- merge(edge_tab, get_edge_table(rate.tree[[1]])[, c("parent", "child", "chi.name")])
# include parent label
rate.mat <- cbind(mrg_edge_tab_temp$parent, rate.mat)
# get mean rate across trees for each clade
rates.by.clade <- lapply(1:length(node.list), function(w)
data.frame(clade = names(node.list)[w], rate = apply(rate.mat[rate.mat[,1] %in% node.list[[w]], ], 2, mean)))
rates.by.clade.df <- do.call(rbind, rates.by.clade)
rates.by.clade.df$variable <- names(rate.trees)[y]
return(rates.by.clade.df)
})
# put all together in a single data frame
post_rates <- do.call(rbind, post_rates_l)
post_rates$log.rate <- log(post_rates$rate + 1)
# save as RDS
saveRDS(post_rates, "./data/processed/posterior_evolutionary_rates_per_clade_3_major_clades.RDS")
post_rates <- readRDS("./data/processed/posterior_evolutionary_rates_per_clade.RDS")
post_rates$maj.clade <- ifelse(post_rates$clade %in% c("Swifts", "Nightjars"), post_rates$clade, "Hummingbirds")
post_rates_3_clades <- readRDS("./data/processed/posterior_evolutionary_rates_per_clade_3_major_clades.RDS")
# for plot have clade as species
post_rates_3_clades$species <- post_rates_3_clades$clade
post_rates$species <- post_rates$clade
gg.rates <- lapply(unique(post_rates$variable), function(w){
# all clades
gg.post.rate <- gg.all.clade.collapse.tree
# get data subset
dat <- post_rates[post_rates$variable == w, ]
# keep only data within 95% quantile
dat.l <- lapply(unique(dat$clade), function(x){
X <- dat[dat$clade == x, ]
# if (x != "Nightjars")
# {
Z <- X$log.rate
class(Z) <- "mcmc"
hpd <- HPDinterval(Z)
Y <- X[X$log.rate > hpd[1] & X$log.rate < hpd[2], ]
# }
# else Y <- X
#
return(Y)
# if (x == "Nightjars" & X$variable[1] == "Between song variation") return(X) else
})
dat <- do.call(rbind, dat.l)
# add facet violin
gg.post.rate <- facet_plot(gg.post.rate, data = dat, geom = geom_violin, aes(x= log.rate, group = label, fill = maj.clade), panel = paste("Log", w, "rates")) +
scale_fill_manual(values = cols[c(5, 3, 8)]) +
theme(legend.position = "none") + geom_vline(xintercept = log_background_rates[w], lty = 2, size = 1.5, col = c("transparent", "black"))
# 3 major clades
gg.post.rate.3.maj.clades <- gg.major.clade.collapse.tree
# get data subset
dat <- post_rates_3_clades[post_rates_3_clades$variable == w, ]
# keep only data within 95% quantile
dat.l <- lapply(unique(dat$clade), function(x){
X <- dat[dat$clade == x, ]
try(Y <- X[X$log.rate > quantile(X$log.rate, 0.025) & X$log.rate < quantile(X$log.rate, 0.975), ], silent = TRUE)
if (x == "Nightjars" & X$variable[1] == "Between song variation") return(X) else
return(Y)
})
dat <- do.call(rbind, dat.l)
# add facet violin
gg.post.rate.3.maj.clades <- facet_plot(gg.post.rate.3.maj.clades, data = dat, geom = geom_violin, aes(x= log.rate, group = label, fill = label), panel = paste("Log", w, "rates")) +
scale_fill_manual(values = cols[c(8, 3, 5)]) +
theme(legend.position = "none") +
geom_vline(xintercept = log_background_rates[w], lty = 2, size = 1.5, col = c("transparent", "black"))
# # plot both together
cow_plt <- plot_grid(gg.post.rate.3.maj.clades, gg.post.rate, ncol = 2)
return(cow_plt)
})
names(gg.rates) <- unique(post_rates$variable)
gg.rates$`Sequence complexity`
gg.rates$`Acoustic space`
gg.rates$`Between song variation`
gg.rates$`# of element types`
Overall, evolutionary rates tend to be higher in hummingbirds, with some important differences between hummingbird sub-clades
Mountain gems show the highest evolutionary rates among all clades
Nightjars and swifts very similar except for “number of element types”
As a metric of phenotypic integration
(should we use phylogenetically control regressions?)
# proof of concept that trimming file share node labels
# ac_sp_tr <- rate.trees$`Acoustic space`[[1]]
#
# btwn_tr <- rate.trees$`Between song variation`[[1]]
#
# prn_ac_sp_tr <- drop.tip(ac_sp_tr, tip = setdiff(ac_sp_tr$tip.label, btwn_tr$tip.label))
#
# jpeg("trim_phylo.jpeg", width = 1000, height = 5000, pointsize = 20)
# par(mar = rep(0, 4), mfrow = c(1, 2))
# plot.phylo(btwn_tr, show.tip.label = FALSE, use.edge.length = FALSE)
# nodelabels()
#
# plot.phylo(prn_ac_sp_tr, show.tip.label = FALSE, use.edge.length = FALSE)
# nodelabels()
# dev.off()
# read branchwise rates
trees <- list.files(path = "./output/bayestraits", full.names = TRUE, pattern = "\\.trees$")
rate.trees <- lapply(trees, read.nexus)
names(rate.trees) <- c("Acoustic space", "Between song variation", "Sequence complexity", "# of element types")
rate.trees <- rate.trees[c(1, 3, 4, 2)]
# make combinations of 2 parameters at the time
grd_trees <- t(combn(names(rate.trees), 2))
# get get correlation between complexity
evo_cors_l <- pblapply(1:nrow(grd_trees), cl = detectCores() - 1, function(y){
# extract 1 bayes traits multiphylo with branch length representing amount of change
bt_tr1 <- rate.trees[[grd_trees[y, 1]]]
bt_tr2 <- rate.trees[[grd_trees[y, 2]]]
# prune tree if different ntips
if (Ntip(bt_tr1[[1]]) != Ntip(bt_tr2[[1]]))
{
bt_tr1 <- lapply(bt_tr1, drop.tip, tip = setdiff(bt_tr1[[1]]$tip.label, bt_tr2[[1]]$tip.label))
# get node list depending on number of species
node.list <- all.nodes.bsv
# get unstransformed tree edge lengths
edge_tab <- get_edge_table(phy.btw.sng.vr)
# edge_tab <- get_edge_table(drop.tip(phy, tip = setdiff(phy$tip.label, phy.btw.sng.vr$tip.label)))
} else { # if both trees have 318 species
# get unstransformed tree edge lengths
edge_tab <- get_edge_table(phy)
# node list
node.list <- all.nodes
}
# make edge rate table template
edge_tab_templ <- get_edge_table(bt_tr1[[1]])
# change rate column name
names(edge_tab_templ)[5] <- "delete"
# merge unstransformed and transformed
edge_tab_templ <- merge(edge_tab, edge_tab_templ[, c("parent", "child", "delete")])
edge_tab_templ$delete <- NULL
# loop over each sub tree to get rates for each branch x tree
rates_tr1 <- sapply(bt_tr1, function(z){
# edge tab for transformed
edge_tab_rates <- get_edge_table(z)
# change rate column name
names(edge_tab_rates)[5] <- "rate.length"
# merge unstransformed and transformed
mrg_edge_tab <- merge(edge_tab_templ, edge_tab_rates[, c("parent", "child", "rate.length")])
# calculate rate
mrg_edge_tab$rate <- mrg_edge_tab$rate.length / mrg_edge_tab$length
return(mrg_edge_tab$rate)
})
# rates second tree
rates_tr2 <- sapply(bt_tr2, function(z){
# edge tab for transformed
edge_tab_rates <- get_edge_table(z)
# change rate column name
names(edge_tab_rates)[5] <- "rate.length"
# merge unstransformed and transformed
mrg_edge_tab <- merge(edge_tab_templ, edge_tab_rates[, c("parent", "child", "rate.length")])
# calculate rate
mrg_edge_tab$rate <- mrg_edge_tab$rate.length / mrg_edge_tab$length
return(mrg_edge_tab$rate)
})
# sample 1000 tree values randomly for each tree
# ang get correlation of log rates
cors_clade_l <- lapply(1:length(node.list), function(w)
{
# select rates for each clade based on nodes
rt1 <- rates_tr1[edge_tab_templ$parent %in% node.list[[w]], ]
rt2 <- rates_tr2[edge_tab_templ$parent %in% node.list[[w]], ]
cors <- sapply(1:10000, function(x)
cor(log(rt1[, sample(1:ncol(rt1), 1), drop = TRUE] + 1), log(rt2[, sample(1:ncol(rt2), 1), drop = TRUE] + 1))
)
return(data.frame(clade = names(node.list)[w], cor = cors))
})
cors_clade <- do.call(rbind, cors_clade_l)
# names(cors_clade) <- names(node.list)
cors_df <- data.frame(clade = cors_clade$clade, param1 = grd_trees[y, 1], param2 = grd_trees[y, 2], cor = cors_clade$cor)
return(cors_df)
})
# put all together in a single data frame
evo_cors <- do.call(rbind, evo_cors_l)
# save as RDS
saveRDS(evo_cors, "./data/processed/posterior_evolutionary_rate_correlations.RDS")
evo_cors <- readRDS("./data/processed/posterior_evolutionary_rate_correlations.RDS")
evo_cors$variable.pair <- paste0(evo_cors$param1, "-", evo_cors$param2)
evo_cors_3_clades <- evo_cors[evo_cors$clade %in% c("Nightjars", "Swifts", "Hummingbirds"), ]
# remove hummingbirds
evo_cors <- evo_cors[evo_cors$clade != "Hummingbirds", ]
# for plot have clade as species
evo_cors_3_clades$species <- as.character(evo_cors_3_clades$clade)
evo_cors$species <- as.character(evo_cors$clade)
evo_cors$maj.clade <- as.character(ifelse(evo_cors$species %in% c("Nightjars", "Swifts"), evo_cors$species, "Hummingbirds"))
gg_cor_rates <- lapply(c(unique(evo_cors$variable.pair), "overall"), function(w){
# all clades
gg.cor.rate <- gg.all.clade.collapse.tree +
geom_vline(xintercept = 0, lty = 2, size = 1.5, col = c("transparent", "black"))
# get data subset
if (w != "overall")
dat <- evo_cors[evo_cors$variable.pair == w, ] else
dat <- evo_cors
# keep only data within 95% quantile
dat.l <- lapply(unique(dat$species), function(x){
X <- dat[dat$species == x, ]
Z <- X$cor
class(Z) <- "mcmc"
hpd <- HPDinterval(Z)
Y <- X[X$cor > hpd[1] & X$cor < hpd[2], ]
return(Y)
})
dat <- do.call(rbind, dat.l)
# add facet violin
gg.cor.rate <- facet_plot(gg.cor.rate, data = dat, geom = geom_violin, aes(x= cor, group = label, fill = maj.clade), panel = w) +
scale_fill_manual(values = cols[c(8, 3, 5)]) +
theme(legend.position = "none")
# 3 major clades
gg.cor.rate.3.maj.clades <- gg.major.clade.collapse.tree + geom_vline(xintercept = 0, lty = 2, size = 1.5, col = c("transparent", "black"))
# get data subset
if (w != "overall")
dat <- evo_cors_3_clades[evo_cors_3_clades$variable.pair == w, ] else
dat <- evo_cors_3_clades
# keep only data within 95% quantile
dat.l <- lapply(unique(dat$clade), function(x){
X <- dat[dat$clade == x, ]
Z <- X$cor
class(Z) <- "mcmc"
hpd <- HPDinterval(Z)
Y <- X[X$cor > hpd[1] & X$cor < hpd[2], ]
return(Y)
})
dat <- do.call(rbind, dat.l)
# add facet violin
gg.cor.rate.3.maj.clades <-
facet_plot(gg.cor.rate.3.maj.clades, data = dat, geom = geom_violin, aes(x= cor, group = label, fill = label), panel = w) +
scale_fill_manual(values = cols[c(8, 3, 5)]) +
theme(legend.position = "none")
# # plot both together
cow_plt <- plot_grid(gg.cor.rate.3.maj.clades, gg.cor.rate, ncol = 2)
return(cow_plt)
})
names(gg_cor_rates) <- c(unique(evo_cors$variable.pair), "overall")
gg_cor_rates$`Acoustic space-Sequence complexity`
gg_cor_rates$`Acoustic space-# of element types`
gg_cor_rates$`Acoustic space-Between song variation`
gg_cor_rates$`Sequence complexity-# of element types`
gg_cor_rates$`Sequence complexity-Between song variation`
gg_cor_rates$`# of element types-Between song variation`
gg_cor_rates$overall
Nightjars show low integration across all parameter pairs
Hummingbirds and swifts show similar levels of integration and higher than nightjars
However, only hummingbirds show “significant” integration (always higher than 0) for all pairwise comparisons as well as overall integration
Session information
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
##
## locale:
## [1] LC_CTYPE=es_ES.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=es_CR.UTF-8 LC_COLLATE=es_ES.UTF-8
## [5] LC_MONETARY=es_CR.UTF-8 LC_MESSAGES=es_ES.UTF-8
## [7] LC_PAPER=es_CR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] coda_0.19-3 cowplot_1.0.0 pbapply_1.4-2 phangorn_2.5.5
## [5] leaflet_2.0.3 ggplot2_3.3.0 readxl_1.3.1 viridis_0.5.1
## [9] viridisLite_0.3.0 ape_5.3 ggtree_2.1.3
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.3 lattice_0.20-41 tidyr_1.0.0
## [4] assertthat_0.2.1 zeallot_0.1.0 digest_0.6.22
## [7] mime_0.9 R6_2.4.1 cellranger_1.1.0
## [10] backports_1.1.5 evaluate_0.14 pillar_1.4.2
## [13] rlang_0.4.1 lazyeval_0.2.2 Matrix_1.2-18
## [16] rmarkdown_1.17 labeling_0.3 stringr_1.4.0
## [19] htmlwidgets_1.5.1 igraph_1.2.4.1 munsell_0.5.0
## [22] shiny_1.4.0 compiler_3.6.1 httpuv_1.5.2
## [25] xfun_0.12 pkgconfig_2.0.3 htmltools_0.4.0
## [28] tidyselect_0.2.5 tibble_2.1.3 gridExtra_2.3
## [31] quadprog_1.5-8 crayon_1.3.4 dplyr_0.8.3
## [34] withr_2.1.2 later_1.0.0 grid_3.6.1
## [37] nlme_3.1-142 jsonlite_1.6 xtable_1.8-4
## [40] gtable_0.3.0 lifecycle_0.1.0 magrittr_1.5
## [43] scales_1.0.0 tidytree_0.3.2 stringi_1.4.6
## [46] promises_1.1.0 rvcheck_0.1.8 vctrs_0.2.0
## [49] fastmatch_1.1-0 RColorBrewer_1.1-2 tools_3.6.1
## [52] treeio_1.10.0 glue_1.3.1 purrr_0.3.3
## [55] crosstalk_1.0.0 fastmap_1.0.1 yaml_2.2.1
## [58] colorspace_1.4-1 BiocManager_1.30.10 knitr_1.28