Cultural evolution tree analysis

Author
Published

October 18, 2024

Code
# print link to github repo if any
if (file.exists("./.git/config")){
  config <- readLines("./.git/config")
  url <- grep("url",  config, value = TRUE)
  url <- gsub("\\turl = |.git$", "", url)
  cat("\nSource code and data found at [", url, "](", url, ")", sep = "")
  }

Source code and data found at

Code
# options to customize chunk outputs
knitr::opts_chunk$set(
  tidy.opts = list(width.cutoff = 65), 
  tidy = TRUE,
  message = FALSE
 )

Purpose

  • The first goal of this report

  • The second goal of this report

Analysis flowchart

flowchart
  A[Read data] --> B(Format data) 
  B --> C(Graphs)
  C --> D(Statistical analysis)
  D --> E(Model summary) 

style A fill:#44015466
style B fill:#3E4A894D
style C fill:#26828E4D
style D fill:#6DCD594D

Load packages

1 Read data

Code
# read revbayes output
rb.trees <- readRDS("~/Dropbox/Projects/lbh_cultural_evolution_revBayes/output/revbayes_output_in_single_R_object.RDS")

selected_trees <- names(rb.trees)[(grepl("prank", names(rb.trees)) &
    grepl("all", names(rb.trees)) & !grepl("run", names(rb.trees)) &
    grepl("Uexp", names(rb.trees)) & grepl("old", names(rb.trees))) |
    grepl("TR1_prank_new_all_Uexp_586149.trees", names(rb.trees))]

rb.trees <- rb.trees[selected_trees]

2 Extract 100 trees with highest posterior probability

Code
# subsample 100 trees per lek those with highest posterior
# probability
rb.trees <- lapply(rb.trees, function(x) {

    phylos <- x$trees[order(x$ptable$Posterior, decreasing = TRUE)[1:100]]

    return(phylos)
})

3 Get consensus tree per lek (not used yet)

Code
rb.trees <- lapply(rb.trees, function(x) {

    # get consensus tree
    consensus_tree <- phangorn::mcc(x)

    return(consensus_tree)
})

4 Combine into (several) single trees

Code
# get matrix with index of single trees per lek to use in the
# combined tree
tree_indices <- replicate(n = length(rb.trees), sample(1:100))

combined_trees <- lapply(seq_len(nrow(tree_indices)), function(x) {

    # get trees
    trees <- lapply(seq_len(ncol(tree_indices)), function(y) rb.trees[[y]][[tree_indices[x,
        y]]])

    # get root and tip age for each
    root_age <- sapply(trees, function(x) max(node.depth.edgelength(x)))
    tip_year <- sapply(trees, function(y) max(sapply(strsplit(y$tip.label,
        split = "-"), function(x) as.numeric(x[3]))))

    tip_year_diff <- max(tip_year) - tip_year
    root_age_diff <- max(root_age) - root_age

    length_to_add <- root_age_diff - tip_year_diff

    length_to_add <- length_to_add + abs(min(length_to_add))

    # length_to_add <- length_to_add + 15
    length_to_add[length_to_add == 0] <- 0.1

    # solve multifurcation
    trees <- lapply(trees, multi2di)

    # root trees
    trees <- lapply(seq_along(trees), function(z) {
        y <- trees[[z]]
        y <- root(y, node = which.max(node.depth(y)))
        y$root.edge <- length_to_add[z]
        return(y)
    })

    # combine trees
    combined_tree <- bind.tree(trees[[1]], trees[[2]], position = trees[[1]]$root.edge)

    for (i in 3:length(trees)) {
        combined_tree <- bind.tree(trees[[i]], combined_tree, position = trees[[i]]$root.edge)
    }

    return(combined_tree)
})

class(combined_trees) <- "multiPhylo"

saveRDS(combined_trees, "./data/processed/100_combined_trees_pranK_all_uexp_old.RDS")

5 Plot combined trees (a few of them)

Code
combined_trees <- readRDS("./data/processed/100_combined_trees_pranK_all_uexp_old.RDS")

for (i in sample(1:100, 20)) {
    # plot.phylo(combined_trees[[i]], cex = 0.4, root.edge =
    # TRUE, main = i)

    # Add the time scale axisPhylo(backward = FALSE)
    combined_tree <- combined_trees[[i]]

    # make it ultrametric
    combined_tree <- multi2di(combined_tree, resolve.root = TRUE)

    # is.binary(combined_tree) combined_tree <-
    # chronos(combined_tree, lambda = 0.5, quiet = TRUE)
    combined_tree <- chronopl(combined_tree, lambda = 0.5)

    groupInfo <- sapply(strsplit(combined_tree$tip.label, split = "-"),
        "[[", 1)

    g <- split(combined_tree$tip.label, groupInfo)

    p <- ggtree(combined_tree)

    clades <- sapply(g, function(n) MRCA(p, n))

    p <- groupClade(p, clades, group_name = "subtree") + aes(color = subtree) +
        scale_color_viridis_d(option = "G", begin = 0.1, end = 0.9,
            guide = "none")

    for (i in seq_len(clades)) p <- p + geom_cladelabel(clades[i],
        names(clades)[i], barsize = 2, angle = 90, hjust = 0.5, fontsize = 3,
        color = mako(4, begin = 0.1, end = 0.9)[i])

    print(p)
}

Takeaways


Session information

R version 4.4.1 (2024-06-14)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 22.04.4 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=es_CR.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=es_CR.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=es_CR.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C       

time zone: America/Costa_Rica
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] RRphylo_2.8.1     emmeans_1.10.3    ggtree_3.12.0     phangorn_2.11.1  
 [5] rwty_1.0.2        ggridges_0.5.6    cowplot_1.1.3     brmsish_1.0.0    
 [9] brms_2.22.0       Rcpp_1.0.13       ape_5.8           ggplot2_3.5.1    
[13] kableExtra_1.4.0  knitr_1.48        viridis_0.6.5     viridisLite_0.4.2
[17] pbapply_1.7-2    

loaded via a namespace (and not attached):
  [1] mnormt_2.1.1            formatR_1.14            gridExtra_2.3          
  [4] remotes_2.5.0           sandwich_3.1-0          rlang_1.1.4            
  [7] magrittr_2.0.3          multcomp_1.4-25         matrixStats_1.4.1      
 [10] compiler_4.4.1          loo_2.8.0               maps_3.4.2             
 [13] systemfonts_1.1.0       vctrs_0.6.5             reshape2_1.4.4         
 [16] combinat_0.0-8          quadprog_1.5-8          stringr_1.5.1          
 [19] pkgconfig_2.0.3         arrayhelpers_1.1-0      crayon_1.5.3           
 [22] fastmap_1.2.0           backports_1.5.0         labeling_0.4.3         
 [25] utf8_1.2.4              rmarkdown_2.28          purrr_1.0.2            
 [28] xfun_0.48               cachem_1.1.0            clusterGeneration_1.3.8
 [31] aplot_0.2.3             jsonlite_1.8.9          tidybayes_3.0.7        
 [34] parallel_4.4.1          R6_2.5.1                stringi_1.8.4          
 [37] RColorBrewer_1.1-3      GGally_2.2.1            numDeriv_2016.8-1.1    
 [40] estimability_1.5.1      iterators_1.0.14        optimParallel_1.0-2    
 [43] zoo_1.8-12              bayesplot_1.11.1        Matrix_1.7-0           
 [46] splines_4.4.1           igraph_2.0.3            tidyselect_1.2.1       
 [49] rstudioapi_0.16.0       abind_1.4-8             yaml_2.3.10            
 [52] doParallel_1.0.17       codetools_0.2-20        lattice_0.22-6         
 [55] tibble_3.2.1            plyr_1.8.9              treeio_1.26.0          
 [58] withr_3.0.1             bridgesampling_1.1-2    posterior_1.6.0        
 [61] coda_0.19-4.1           evaluate_1.0.1          phytools_2.3-0         
 [64] gridGraphics_0.5-1      survival_3.7-0          sketchy_1.0.3          
 [67] ggstats_0.7.0           RcppParallel_5.1.9      ggdist_3.3.2           
 [70] xml2_1.3.6              pillar_1.9.0            tensorA_0.36.2.1       
 [73] packrat_0.9.2           stats4_4.4.1            foreach_1.5.2          
 [76] checkmate_2.3.2         ggfun_0.1.5             distributional_0.5.0   
 [79] generics_0.1.3          rstantools_2.4.0        munsell_0.5.1          
 [82] scales_1.3.0            tidytree_0.4.6          xtable_1.8-4           
 [85] glue_1.8.0              scatterplot3d_0.3-44    lazyeval_0.2.2         
 [88] tools_4.4.1             xaringanExtra_0.8.0     fs_1.6.4               
 [91] mvtnorm_1.3-1           fastmatch_1.1-4         grid_4.4.1             
 [94] tidyr_1.3.1             colorspace_2.1-1        patchwork_1.3.0        
 [97] nlme_3.1-165            cli_3.6.3               DEoptim_2.2-8          
[100] expm_0.999-9            fansi_1.0.6             svUnit_1.0.6           
[103] svglite_2.1.3           Brobdingnag_1.2-9       ggdendro_0.2.0         
[106] dplyr_1.1.4             gtable_0.3.5            yulab.utils_0.1.4      
[109] digest_0.6.37           ggplotify_0.1.2         TH.data_1.1-2          
[112] farver_2.1.2            htmlwidgets_1.6.4       memoise_2.0.1          
[115] htmltools_0.5.8.1       lifecycle_1.0.4         MASS_7.3-61