## vector with package names
x <- c("pbapply", "viridis", "knitr", "kableExtra", "ggplot2", "ape")

aa <- lapply(x, function(y) {
  
  # check if installed, if not then install 
  if (!y %in% installed.packages()[,"Package"]) 
    install.packages(y) 

  # load package
  try(require(y, character.only = T), silent = T)
})
knitr::opts_knit$set(root.dir = normalizePath(".."))

knitr::opts_chunk$set(dpi = 38, fig.width = 18, fig.height = 10, echo = TRUE) 

options(knitr.kable.NA = '')

source('~/Dropbox/Projects/lbh_cultural_evolution/source/custom_treespace.R')

theme_set(theme_classic(base_size = 22))
rb.trees <- readRDS("./output/revbayes_output_in_single_R_object.RDS")

rb.trees <- rb.trees[grep("run_", names(rb.trees), invert = TRUE, value = TRUE)]

trees_diags <- data.frame(model = names(rb.trees))

# get lek name
trees_diags$lek <- sapply(strsplit(trees_diags$model, "_", fixed = TRUE), "[", 1)

# substitution model
trees_diags$subs <- sapply(strsplit(trees_diags$model, "_", fixed = TRUE), "[", 2)

# period
trees_diags$period <- sapply(strsplit(trees_diags$model, "_", fixed = TRUE), "[", 3)

# and which fossils were used
trees_diags$fossils <- sapply(strsplit(trees_diags$model, "_", fixed = TRUE), "[", 4)

trees_diags$align <- sapply(strsplit(trees_diags$model, "_", fixed = TRUE), "[", 5)

trees_diags$n_trees <- sapply(rb.trees, function(x) length(x$trees))

# Number of models by iterations and lek
kbl <- knitr::kable(as.matrix(table(trees_diags$n_trees, trees_diags$lek)), caption = "Number of models with a specific number of trees by lek")

kableExtra::kable_styling(kbl)

Tree spaces by leks

All fossils using pairwise shared tips

  • 100 trees for each model evenly spaced along the chain
  • Some trees were removed if NAs were produced when comparing topologies (i.e. non-comparable topologies)
#selected leks
sel_leks <- c("SUR", "CCE", "HC1", "BR1", "TR1")

tree_names <- grep("early", names(rb.trees), invert = TRUE, value = TRUE)

pnts_lks <- lapply(sel_leks, function(i) {
  
  print(i)
  lek_trees <- grep(i, tree_names, value = TRUE)
  
  multi_list <- lapply(lek_trees, function(x) read.tree(file.path("./output/most_recent_revbayes_models/", x), keep.multi = TRUE))
  
  names(multi_list) <- sapply(basename(lek_trees), function(x) paste(strsplit(x, "_", fixed = TRUE)[[1]][1:5], collapse = "_"))
  
  pnts <- try(custom_treespace(multi_list, n.points = 100, method = "RF", cl = 10), silent = TRUE)

  if (!is(pnts, "try-error")) 
    return(pnts) else
      return(NULL)
})

names(pnts_lks) <- sel_leks

sapply(pnts_lks, is.null)

saveRDS(pnts_lks, file = "./output/topology/tree_distance_all_fossils_pairwise_shared.RDS")

New and old data sets

pnts_lks <- readRDS("./output/topology/tree_distance_all_fossils_pairwise_shared.RDS")

ggs <- lapply(pnts_lks, function(X) {

    ggplot(data = X, aes(x = x, y = y, fill = generation)) +
    geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
    scale_colour_gradient(low = "red", high = "yellow") +
    geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_fill_gradientn(colours = viridis(256)) +
      labs(x = "MDS_v1", y = "MDS_v2") + 
      ggtitle(label = sprintf("Tree space for %d trees", nrow(X) / length(unique(X$chain)))) 
    })

ggs
## $SUR

## 
## $CCE

## 
## $HC1

## 
## $BR1

## 
## $TR1

Old data set

ggs <- lapply(pnts_lks, function(X) {
     
  X <- X[grep("old", X$chain), ]   
  
  if (nrow(X) > 0)
  ggplot(data = X, aes(x = x, y = y, fill = generation)) +
    geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
    scale_colour_gradient(low = "red", high = "yellow") +
    geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_fill_gradientn(colours = viridis(256)) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))
    })

ggs
## $SUR

## 
## $CCE

## 
## $HC1

## 
## $BR1
## NULL
## 
## $TR1
## NULL

New data set

ggs <- lapply(pnts_lks, function(X) {
     
  X <- X[grep("new", X$chain), ]   
  
  ggplot(data = X, aes(x = x, y = y, fill = generation)) +
    geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
    scale_colour_gradient(low = "red", high = "yellow") +
    geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_fill_gradientn(colours = viridis(256)) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %d trees", nrow(X) / length(unique(X$chain))))

    })

ggs
## $SUR

## 
## $CCE

## 
## $HC1

## 
## $BR1

## 
## $TR1

Clumping molecular clocks

Old data set

ggs <- lapply(pnts_lks, function(X) {
     
  X <- X[grep("old", X$chain), ]   
  X$clock <- ifelse(grepl("global", X$chain), "Global", "Uexp")
  X$chain <- gsub("_global|_Uexp", "", X$chain)
  
    if (nrow(X) > 0)
    ggplot(data = X, aes(x = x, y = y)) +
    geom_path(alpha = 0.1, size = 0.75, show.legend = FALSE) +
    geom_point(aes(shape = clock,  col = clock), size = 4) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_color_viridis_d(begin = 0.2, end = 0.8, alpha = 0.75) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %d trees", nrow(X) / length(unique(X$chain))))

    })

ggs
## $SUR

## 
## $CCE

## 
## $HC1

## 
## $BR1
## NULL
## 
## $TR1
## NULL

New data set

ggs <- lapply(pnts_lks, function(X) {
     
  X <- X[grep("new", X$chain), ]   
  X$clock <- ifelse(grepl("global", X$chain), "Global", "Uexp")
  X$chain <- gsub("_global|_Uexp", "", X$chain)
  
  ggplot(data = X, aes(x = x, y = y)) +
    geom_path(alpha = 0.1, size = 0.75, show.legend = FALSE) +
    geom_point(aes(shape = clock,  col = clock), size = 4) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_color_viridis_d(begin = 0.2, end = 0.8, alpha = 0.75) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %d trees", nrow(X) / length(unique(X$chain))))

    })

ggs
## $SUR

## 
## $CCE

## 
## $HC1

## 
## $BR1

## 
## $TR1

Early fossils using only pairwise shared tips

#selected leks
sel_leks <- c("SUR", "CCE", "HC1", "BR1", "TR1")
# sel_leks <- c("SUR", "TR1")

tree_names <- grep("early", names(rb.trees), value = TRUE)

pnts_lks <- lapply(sel_leks, function(i) {
  
  print(i)
  lek_trees <- grep(i, tree_names, value = TRUE)
  
  multi_list <- lapply(lek_trees, function(x) read.tree(file.path("./output/most_recent_revbayes_models/", x), keep.multi = TRUE))
  
  names(multi_list) <- sapply(basename(lek_trees), function(x) paste(strsplit(x, "_", fixed = TRUE)[[1]][1:5], collapse = "_"))
  
  pnts <- try(custom_treespace(multi_list, n.points = 100, method = "RF", cl = 10), silent = TRUE)

  if (!is(pnts, "try-error")) 
    return(pnts) else
      return(NULL)
})

names(pnts_lks) <- sel_leks

sapply(pnts_lks, is.null)

saveRDS(pnts_lks, file = "./output/topology/tree_distance_early_fossils_pairwise_shared.RDS")

New and old data sets

pnts_lks <- readRDS("./output/topology/tree_distance_early_fossils_pairwise_shared.RDS")

ggs <- lapply(pnts_lks, function(X) {

    ggplot(data = X, aes(x = x, y = y, fill = generation)) +
    geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
    scale_colour_gradient(low = "red", high = "yellow") +
    geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_fill_gradientn(colours = viridis(256)) +
      labs(x = "MDS_v1", y = "MDS_v2") + 
    ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain)))) 

    })

ggs
## $SUR

## 
## $CCE

## 
## $HC1

## 
## $BR1

## 
## $TR1

New data set

ggs <- lapply(pnts_lks, function(X) {
     
  X <- X[grep("new", X$chain), ]   
  
  ggplot(data = X, aes(x = x, y = y, fill = generation)) +
    geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
    scale_colour_gradient(low = "red", high = "yellow") +
    geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_fill_gradientn(colours = viridis(256)) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))
  })

ggs
## $SUR

## 
## $CCE

## 
## $HC1

## 
## $BR1

## 
## $TR1

Old data set

ggs <- lapply(pnts_lks, function(X) {
     
  X <- X[grep("old", X$chain), ]   
  
  if (nrow(X) > 0)
  ggplot(data = X, aes(x = x, y = y, fill = generation)) +
    geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
    scale_colour_gradient(low = "red", high = "yellow") +
    geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_fill_gradientn(colours = viridis(256)) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))
  })

ggs
## $SUR

## 
## $CCE

## 
## $HC1

## 
## $BR1
## NULL
## 
## $TR1
## NULL

Clumping molecular clocks

Old data set

ggs <- lapply(pnts_lks, function(X) {
     
  X <- X[grep("old", X$chain), ]   
  X$clock <- ifelse(grepl("global", X$chain), "Global", "Uexp")
  X$chain <- gsub("_global|_Uexp", "", X$chain)
  
    if (nrow(X) > 0)
    ggplot(data = X, aes(x = x, y = y)) +
    geom_path(alpha = 0.1, size = 0.75, show.legend = FALSE) +
    geom_point(aes(shape = clock,  col = clock), size = 4) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_color_viridis_d(begin = 0.2, end = 0.8, alpha = 0.75) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))

    })

ggs
## $SUR

## 
## $CCE

## 
## $HC1

## 
## $BR1
## NULL
## 
## $TR1
## NULL

New data set

ggs <- lapply(pnts_lks, function(X) {
     
  X <- X[grep("new", X$chain), ]   
  X$clock <- ifelse(grepl("global", X$chain), "Global", "Uexp")
  X$chain <- gsub("_global|_Uexp", "", X$chain)
  
  ggplot(data = X, aes(x = x, y = y)) +
    geom_path(alpha = 0.1, size = 0.75, show.legend = FALSE) +
    geom_point(aes(shape = clock,  col = clock), size = 4) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_color_viridis_d(begin = 0.2, end = 0.8, alpha = 0.75) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))

    })

ggs
## $SUR

## 
## $CCE

## 
## $HC1

## 
## $BR1

## 
## $TR1

R session information

## R version 4.0.3 (2020-10-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.1 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
## 
## locale:
##  [1] LC_CTYPE=pt_PT.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=es_CR.UTF-8        LC_COLLATE=pt_PT.UTF-8    
##  [5] LC_MONETARY=es_CR.UTF-8    LC_MESSAGES=pt_PT.UTF-8   
##  [7] LC_PAPER=es_CR.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] ape_5.4-1         ggplot2_3.3.2     kableExtra_1.3.1  knitr_1.30       
## [5] viridis_0.5.1     viridisLite_0.3.0 pbapply_1.4-3    
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.5       pillar_1.4.6     compiler_4.0.3   tools_4.0.3     
##  [5] digest_0.6.27    nlme_3.1-149     lattice_0.20-41  evaluate_0.14   
##  [9] lifecycle_0.2.0  tibble_3.0.4     gtable_0.3.0     pkgconfig_2.0.3 
## [13] rlang_0.4.9      rstudioapi_0.11  yaml_2.2.1       parallel_4.0.3  
## [17] xfun_0.19        gridExtra_2.3    withr_2.3.0      stringr_1.4.0   
## [21] dplyr_1.0.2      httr_1.4.2       xml2_1.3.2       generics_0.1.0  
## [25] vctrs_0.3.6      webshot_0.5.2    grid_4.0.3       tidyselect_1.1.0
## [29] glue_1.4.2       R6_2.5.0         rmarkdown_2.4    farver_2.0.3    
## [33] purrr_0.3.4      magrittr_2.0.1   scales_1.1.1     ellipsis_0.3.1  
## [37] htmltools_0.5.0  rvest_0.3.6      colorspace_1.4-1 labeling_0.3    
## [41] stringi_1.5.3    munsell_0.5.0    crayon_1.3.4