Tree topology diagnostics

Long-billed hermit song cultural evolution

Author

Marcelo Araya-Salas PhD

Published

October 28, 2024

Source code and data found at https://github.com/maRce10/lbh_cultural_evolution_revBayes

Load packages

Custom functions and global options

Code

knitr::opts_chunk$set(out.width = "100%", out.height = "100%", echo = TRUE) 

source('~/Dropbox/Projects/lbh_cultural_evolution_revBayes/source/custom_treespace.R')

theme_set(theme_classic(base_size = 10))

fill_color <- viridis::mako(10, alpha = 0.5)[7]

iterations <- 5000
chains <- 4
priors <- c(set_prior("normal(0, 1.5)", class = "Intercept"), set_prior("normal(0, 1.5)", class = "b"))

# to create several posterior predictive check plots out of a brms fit
custom_ppc1 <- function(fit, group = NULL, ndraws = 30) {
  
    ppc_dens <-
        pp_check(fit,
                 ndraws = ndraws,
                 type = 'dens_overlay_grouped',
                 group = group)  # shows dens_overlay plot by default
    pp_mean <-
        pp_check(
            fit,
            type = "stat_grouped",
            stat = "mean",
            group = group,
            ndraws = ndraws
        )
    pp_scat <-
        pp_check(fit,
                 type = "scatter_avg_grouped",
                 group = group,
                 ndraws = ndraws)
    pp_stat2 <- pp_check(fit, type = "stat_2d", ndraws = ndraws)
    pp_loo <- pp_check(fit, type = "loo_pit_qq", ndraws = ndraws)
    pp_error <-
        pp_check(fit,
                 type = "error_scatter_avg_grouped",
                 ndraws = ndraws,
                 group = group)
    plot_l <-
        list(ppc_dens, pp_mean, pp_scat, pp_error,  pp_stat2, pp_loo)
    
    plot_l[c(1, 3:length(plot_l))] <-
        lapply(plot_l[c(1, 3:length(plot_l))], function(x)
            x  + scale_color_viridis_d(
                begin = 0.1,
                end = 0.8,
                alpha = 0.5
            ) + scale_fill_viridis_d(
                begin = 0.1,
                end = 0.8,
                alpha = 0.5
            ) + theme_classic())     

    print(plot_grid(plotlist = plot_l, ncol = 2))
}

custom_ppc <- function(...) suppressMessages(custom_ppc1(...))

Read data

Code

rb.trees <- readRDS("./output/revbayes_output_in_single_R_object.RDS")

rb.trees <- rb.trees[grep("run_", names(rb.trees), invert = TRUE, value = TRUE)]

trees_diags <- data.frame(model = names(rb.trees))

# get lek name
trees_diags$lek <- sapply(strsplit(trees_diags$model, "_", fixed = TRUE), "[", 1)

# substitution model
trees_diags$subs <- sapply(strsplit(trees_diags$model, "_", fixed = TRUE), "[", 2)

# period
trees_diags$period <- sapply(strsplit(trees_diags$model, "_", fixed = TRUE), "[", 3)

# and which fossils were used
trees_diags$fossils <- sapply(strsplit(trees_diags$model, "_", fixed = TRUE), "[", 4)

trees_diags$align <- sapply(strsplit(trees_diags$model, "_", fixed = TRUE), "[", 5)

trees_diags$n_trees <- sapply(rb.trees, function(x) length(x$trees))

# Number of models by iterations and lek
kbl <-
    knitr::kable(as.matrix(table(trees_diags$lek, trees_diags$n_trees)), caption = "Number of models with a specific number of trees by lek")

kableExtra::kable_styling(kbl)

# Number of models by iterations and lek
kbl <-
    knitr::kable(as.matrix(table(trees_diags$lek, trees_diags$subs)), caption = "Number of models with a specific number of trees by lek")

kableExtra::kable_styling(kbl)

1 Topological space estimation

Pairwise topological distance between all trees for each lek
100 trees for each model evenly spaced along the chain
Comparing based on shared song types (pairwise shared tips)
Some trees were removed if NAs were produced when comparing topologies (i.e. non-comparable topologies)

1.1 All fossils

Code

#selected leks
sel_leks <- c("SUR", "CCE", "HC1", "BR1", "TR1")

tree_names <- grep("early", names(rb.trees), invert = TRUE, value = TRUE)

pnts_lks_all <- lapply(sel_leks, function(i) {
  
  print(i)
  lek_trees <- grep(i, tree_names, value = TRUE)
  
  multi_list <- lapply(lek_trees, function(x) read.tree(file.path("./output/most_recent_revbayes_models/", x), keep.multi = TRUE))
  
  names(multi_list) <- sapply(basename(lek_trees), function(x) paste(strsplit(x, "_", fixed = TRUE)[[1]][1:5], collapse = "_"))
  
  pnts <- try(custom_treespace(multi_list, n.points = 100, method = "RF", cl = 10), silent = TRUE)

  if (!is(pnts, "try-error")) 
    return(pnts) else
      return(NULL)
})

names(pnts_lks_all) <- sel_leks

sapply(pnts_lks_all, is.null)

saveRDS(pnts_lks_all, file = "./output/topology/tree_distance_all_fossils_pairwise_shared.RDS")

1.2 Early fossils

Code

#selected leks
sel_leks <- c("SUR", "CCE", "HC1", "BR1", "TR1")
# sel_leks <- c("SUR", "TR1")

tree_names <- grep("early", names(rb.trees), value = TRUE)

pnts_lks_early <- lapply(sel_leks, function(i) {
  
  print(i)
  lek_trees <- grep(i, tree_names, value = TRUE)
  
  multi_list <- lapply(lek_trees, function(x) read.tree(file.path("./output/most_recent_revbayes_models/", x), keep.multi = TRUE))
  
  names(multi_list) <- sapply(basename(lek_trees), function(x) paste(strsplit(x, "_", fixed = TRUE)[[1]][1:5], collapse = "_"))
  
  pnts <- try(custom_treespace(multi_list, n.points = 100, method = "RF", cl = 10), silent = TRUE)

  if (!is(pnts, "try-error")) 
    return(pnts) else
      return(NULL)
})

names(pnts_lks_early) <- sel_leks

sapply(pnts_lks_early, is.null)

saveRDS(pnts_lks_early, file = "./output/topology/tree_distance_early_fossils_pairwise_shared.RDS")

2 Plot topological spaces

2.1 All fossils

2.1.1 New and old data sets

Code

pnts_lks_all <- readRDS("./output/topology/tree_distance_all_fossils_pairwise_shared.RDS")

ggs <- lapply(pnts_lks_all, function(X) {

    ggplot(data = X, aes(x = x, y = y, fill = generation)) +
    geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
    scale_colour_gradient(low = "red", high = "yellow") +
    geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, ncol = 2) +
      scale_fill_gradientn(colours = mako(256)) +
      labs(x = "MDS_v1", y = "MDS_v2") + 
      ggtitle(label = sprintf("Tree space for %d trees", nrow(X) / length(unique(X$chain)))) 
    })

ggs

$SUR


$CCE


$HC1


$BR1


$TR1

2.1.2 Old data set

Code

ggs <- lapply(pnts_lks_all, function(X) {
     
  X <- X[grep("old", X$chain), ]   
  
  if (nrow(X) > 0)
  ggplot(data = X, aes(x = x, y = y, fill = generation)) +
    geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
    scale_colour_gradient(low = "red", high = "yellow") +
    geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_fill_gradientn(colours = mako(256)) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))
    })

ggs

$SUR


$CCE


$HC1


$BR1
NULL

$TR1
NULL

2.1.3 New data set

Code

ggs <- lapply(pnts_lks_all, function(X) {
     
  X <- X[grep("new", X$chain), ]   
  
  ggplot(data = X, aes(x = x, y = y, fill = generation)) +
    geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
    scale_colour_gradient(low = "red", high = "yellow") +
    geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_fill_gradientn(colours = mako(256)) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %d trees", nrow(X) / length(unique(X$chain))))

    })

ggs

$SUR


$CCE


$HC1


$BR1


$TR1

2.1.3.1 Clumping molecular clocks

2.1.3.1.1 Old data set

Code

ggs <- lapply(pnts_lks_all, function(X) {
     
  X <- X[grep("old", X$chain), ]   
  X$clock <- ifelse(grepl("global", X$chain), "Global", "Uexp")
  X$chain <- gsub("_global|_Uexp", "", X$chain)
  
    if (nrow(X) > 0)
    ggplot(data = X, aes(x = x, y = y)) +
    geom_path(alpha = 0.1, size = 0.75, show.legend = FALSE) +
    geom_point(aes(shape = clock,  col = clock), size = 4) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_color_viridis_d(option = "G", begin = 0.2, end = 0.8, alpha = 0.75) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %d trees", nrow(X) / length(unique(X$chain))))

    })

ggs

$SUR


$CCE


$HC1


$BR1
NULL

$TR1
NULL

2.1.3.1.2 New data set

Code

ggs <- lapply(pnts_lks_all, function(X) {
     
  X <- X[grep("new", X$chain), ]   
  X$clock <- ifelse(grepl("global", X$chain), "Global", "Uexp")
  X$chain <- gsub("_global|_Uexp", "", X$chain)
  
  ggplot(data = X, aes(x = x, y = y)) +
    geom_path(alpha = 0.1, size = 0.75, show.legend = FALSE) +
    geom_point(aes(shape = clock,  col = clock), size = 4) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_color_viridis_d(option = "G", begin = 0.2, end = 0.8, alpha = 0.75) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %d trees", nrow(X) / length(unique(X$chain))))

    })

ggs

$SUR


$CCE


$HC1


$BR1


$TR1

2.2 Early fossils

2.2.1 New and old data sets

Code

pnts_lks_early <- readRDS("./output/topology/tree_distance_early_fossils_pairwise_shared.RDS")

ggs <- lapply(pnts_lks_early, function(X) {

    ggplot(data = X, aes(x = x, y = y, fill = generation)) +
    geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
    scale_colour_gradient(low = "red", high = "yellow") +
    geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_fill_gradientn(colours = mako(256)) +
      labs(x = "MDS_v1", y = "MDS_v2") + 
    ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain)))) 

    })

ggs

$SUR


$CCE


$HC1


$BR1


$TR1

2.2.2 New data set

Code

ggs <- lapply(pnts_lks_early, function(X) {
     
  X <- X[grep("new", X$chain), ]   
  
  ggplot(data = X, aes(x = x, y = y, fill = generation)) +
    geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
    scale_colour_gradient(low = "red", high = "yellow") +
    geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_fill_gradientn(colours = mako(256)) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))
  })

ggs

$SUR


$CCE


$HC1


$BR1


$TR1

2.2.3 Old data set

Code

ggs <- lapply(pnts_lks_early, function(X) {
     
  X <- X[grep("old", X$chain), ]   
  
  if (nrow(X) > 0)
  ggplot(data = X, aes(x = x, y = y, fill = generation)) +
    geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
    scale_colour_gradient(low = "red", high = "yellow") +
    geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_fill_gradientn(colours = mako(256)) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))
  })

ggs

$SUR


$CCE


$HC1


$BR1
NULL

$TR1
NULL

2.2.4 Clumping molecular clocks

2.2.4.1 Old data set

Code

ggs <- lapply(pnts_lks_early, function(X) {
     
  X <- X[grep("old", X$chain), ]   
  X$clock <- ifelse(grepl("global", X$chain), "Global", "Uexp")
  X$chain <- gsub("_global|_Uexp", "", X$chain)
  
    if (nrow(X) > 0)
    ggplot(data = X, aes(x = x, y = y)) +
    geom_path(alpha = 0.1, size = 0.75, show.legend = FALSE) +
    geom_point(aes(shape = clock,  col = clock), size = 4) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_color_viridis_d(option = "G", begin = 0.2, end = 0.8, alpha = 0.75) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))

    })

ggs

$SUR


$CCE


$HC1


$BR1
NULL

$TR1
NULL

2.2.4.2 New data set

Code

ggs <- lapply(pnts_lks_early, function(X) {
     
  X <- X[grep("new", X$chain), ]   
  X$clock <- ifelse(grepl("global", X$chain), "Global", "Uexp")
  X$chain <- gsub("_global|_Uexp", "", X$chain)
  
  ggplot(data = X, aes(x = x, y = y)) +
    geom_path(alpha = 0.1, size = 0.75, show.legend = FALSE) +
    geom_point(aes(shape = clock,  col = clock), size = 4) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_color_viridis_d(option = "G", begin = 0.2, end = 0.8, alpha = 0.75) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))

    })

ggs

$SUR


$CCE


$HC1


$BR1


$TR1

3 Topological space size

3.1 Observations per lek and alignment

All fossils

Code

# size

out <- lapply(pnts_lks_all, function(x){
  
    Y <- space_size(formula = chain ~ x + y, data = x, pb = FALSE, method = "density")
  Y$mean.c.size <- Y$size# - mean(Y$size)
 
  return(Y)  
})

topo_size_all <- do.call(rbind, out)

topo_size_all$alignment <- sapply(topo_size_all$group, function(x) strsplit(x, "_", fixed = TRUE)[[1]][1])

topo_size_all$data.set <- sapply(topo_size_all$group, function(x) strsplit(x, "_", fixed = TRUE)[[1]][2])

topo_size_all$fossils <- sapply(topo_size_all$group, function(x) strsplit(x, "_", fixed = TRUE)[[1]][3])

topo_size_all$models <- sapply(topo_size_all$group, function(x) strsplit(x, "_", fixed = TRUE)[[1]][4])

topo_size_all$lek <- substr(rownames(topo_size_all), 0 , 3)

tab <- as.data.frame(table(topo_size_all$lek, topo_size_all$alignment))
names(tab) <- c("lek", "model", "count")

tab

lek	model	count
BR1	all.equal	2
CCE	all.equal	4
HC1	all.equal	4
SUR	all.equal	4
TR1	all.equal	2
BR1	optimal	2
CCE	optimal	4
HC1	optimal	4
SUR	optimal	4
TR1	optimal	2
BR1	prank	2
CCE	prank	4
HC1	prank	4
SUR	prank	4
TR1	prank	2

Early fossils

Code

out <- lapply(pnts_lks_early, function(x){
  
    Y <- space_size(formula = chain ~ x + y, data = x, pb = FALSE, method = "density")
  Y$mean.c.size <- Y$size# - mean(Y$size)
  # Y$size <- Y$size / max(Y$size)
  
  return(Y)  
})

topo_size_early <- do.call(rbind, out)

topo_size_early$alignment <- sapply(topo_size_early$group, function(x) strsplit(x, "_", fixed = TRUE)[[1]][1])

topo_size_early$data.set <- sapply(topo_size_early$group, function(x) strsplit(x, "_", fixed = TRUE)[[1]][2])

topo_size_early$fossils <- sapply(topo_size_early$group, function(x) strsplit(x, "_", fixed = TRUE)[[1]][3])

topo_size_early$models <- sapply(topo_size_early$group, function(x) strsplit(x, "_", fixed = TRUE)[[1]][4])

topo_size_early$lek <- substr(rownames(topo_size_early), 0 , 3)

tab <- as.data.frame(table(topo_size_early$lek, topo_size_early$alignment))
names(tab) <- c("lek", "model", "count")

tab

lek	model	count
BR1	all.equal	2
CCE	all.equal	4
HC1	all.equal	4
SUR	all.equal	4
TR1	all.equal	2
BR1	optimal	2
CCE	optimal	4
HC1	optimal	4
SUR	optimal	4
TR1	optimal	2
BR1	prank	2
CCE	prank	4
HC1	prank	4
SUR	prank	4
TR1	prank	2

3.2 Plot size by treatment

Code

topo_size_all$fossils <- "all"
topo_size_early$fossils <- "early"

shared_columns <- intersect(names(topo_size_all), names(topo_size_early))

topo_size <- rbind(topo_size_all[, shared_columns], topo_size_early[, shared_columns])

# raincloud plot:
ggplot(topo_size,
       aes(x = alignment, y = size, fill = alignment)) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    # fill = fill_color,
    alpha = 0.5,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
  geom_boxplot(# fill = fill_color,
    width = .15,
    # remove outliers
    outlier.shape = NA) +
    # add justified jitter from the {gghalves} package
    gghalves::geom_half_point(
      # color = fill_color,
      # draw jitter on the left
      side = "l",
      # control range of jitter
      range_scale = .4,
      # add some transparency
      alpha = .5,
      transformation = ggplot2::position_jitter(height = 0)
    ) +
      scale_fill_viridis_d(option = "G",
                           end = 0.8,
                           alpha = 0.6) +
           scale_x_discrete(labels = c(
        "natural" = "Natural",
        "artificial" = "Artificial",
        "simulated" = "Simulated"
      )) +
      theme(legend.position = "none") +
      facet_wrap(~ data.set + models + fossils, ncol = 2) +
  labs(x = "Alignment", y = "Topologic space dissimilarity")

3.3 Stats

3.3.1 Size of the topological space

Code

topo_size$alignment <- factor(topo_size$alignment, levels = c("all.equal", "optimal", "prank"))

topo_size$data.set <- factor(topo_size$data.set, levels = c("new", "old"))

topo_size$models <- factor(topo_size$models, levels = c("Uexp", "global"))

topo_size$fossils <- factor(topo_size$fossils, levels = c("early", "all"))

# weakly informative priors
priors <- c(
  prior(normal(0, 5), class = "Intercept"),
  prior(normal(0, 2), class = "b"),
  prior(exponential(1), class = "sd")
)

mod <- brm(
  formula = size ~ alignment + data.set + models + fossils + (1|lek), 
  data = topo_size,
  iter = iterations,
  family = gaussian(),
  chains = chains,
  cores = chains,
  prior = priors,
  save_pars = save_pars(all = TRUE),
   control = list(adapt_delta = 0.99, max_treedepth = 15),
  file = "./data/processed/regression_results_topological_size",
  file_refit = "always")

mod <- add_criterion(mod, criterion = c("loo"))

# weakly informative priors
priors <- c(
  prior(normal(0, 5), class = "Intercept"),
  prior(normal(0, 2), class = "b"),
  prior(exponential(1), class = "sd")
)

sub_mod <- brm(
  formula = size ~ data.set + fossils + (1|lek), 
  data = topo_size,
  iter = iterations,
  family = gaussian(),
  chains = chains,
  cores = chains,
  prior = priors,
  save_pars = save_pars(all = TRUE),
   control = list(adapt_delta = 0.99, max_treedepth = 15),
  file = "./data/processed/regression_results_topological_size_submodel",
  file_refit = "always")

sub_mod <- add_criterion(sub_mod, criterion = c("loo"))


# weakly informative priors
priors <- c(
  prior(normal(0, 5), class = "Intercept"),
  prior(exponential(1), class = "sd")
)

null_mod <- brm(
  formula = size ~ 1 + (1|lek), 
  data = topo_size,
  iter = iterations,
  family = gaussian(),
  chains = chains,
  cores = chains,
  prior = priors,
  save_pars = save_pars(all = TRUE),
   control = list(adapt_delta = 0.99, max_treedepth = 15),
  file = "./data/processed/null_regression_results_topological_size",
  file_refit = "always"
)

null_mod <- add_criterion(null_mod, criterion = c("loo"))

Model selection:

Code

mod <- readRDS("./data/processed/regression_results_topological_size.rds")

sub_mod <- readRDS("./data/processed/regression_results_topological_size_submodel.rds")

null_mod <- readRDS("./data/processed/null_regression_results_topological_size.rds")

loo_diff <- loo::loo_compare(mod, sub_mod, null_mod)

as.data.frame(loo_diff[,1:2], row.names = c("model", "sub_model", "null model"))

	elpd_diff	se_diff
model	0.0000000	0.000000
sub_model	-0.4158121	1.973736
null model	-24.8684153	4.150403

Code

looic_df <- as.data.frame(rbind(loo(mod)$estimates[3, ], loo(sub_mod)$estimates[3, ], loo(null_mod)$estimates[3, ]),row.names = c("model", "sub_model", "null model"))

names(looic_df)[1] <- "LOOIC"

looic_df$delta <- looic_df$LOOIC - min(looic_df$LOOIC)

looic_df

	LOOIC	SE	delta
model	-488.8457	32.21383	0.8316242
sub_model	-489.6774	34.43142	0.0000000
null model	-439.9405	32.88068	49.7368306

Bayes factor:

Code

# Calculate Bayes factor
bf <- bayes_factor(mod, null_mod)

Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4

Code

bf$bf

[1] 0.05251836

Code

sbf <- bayes_factor(sub_mod, null_mod)

Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6

Code

sbf$bf

[1] 16580585

Code

extended_summary(fit = mod,
    n.posterior = 1000,
    fill =  mako(10)[7],
    trace.palette = viridis,
    remove.intercepts = TRUE,
    highlight = TRUE,
    trace = TRUE,
    return = FALSE, 
    print.name = FALSE, 
    spread.type = "HPDI"
)

	priors	formula	iterations	chains	thinning	warmup	diverg_transitions	rhats > 1.05	min_bulk_ESS	min_tail_ESS	seed
1	b-normal(0, 2) Intercept-normal(0, 5) sd-exponential(1) sigma-student_t(3, 0, 2.5)	size ~ alignment + data.set + models + fossils + (1 \| lek)	5000	4	1	2500	1 (1e-04%)	0	8279.233	7138.681	316515948

	Estimate	l-95% CI	u-95% CI	Rhat	Bulk_ESS	Tail_ESS
b_alignmentoptimal	0.004	-0.005	0.012	1	8362.105	7450.773
b_alignmentprank	-0.005	-0.014	0.003	1	8279.233	7251.602
b_data.setold	0.031	0.023	0.040	1	9693.843	7392.857
b_modelsglobal	-0.002	-0.009	0.005	1.001	11404.147	7138.681
b_fossilsall	0.012	0.005	0.019	1	10432.653	7270.792

Contrasts for alignment strategies:

Code

# contrasts
contrasts(
    fit = mod,
    sort.levels = c("prank", "optimal", "all.equal"),
    predictor = "alignment",
    n.posterior = 2000,
    level.sep = " VS ",
    html.table = TRUE,
    plot = TRUE,
    highlight = TRUE,
    fill = mako(10)[7]
)

	Contrasts	Estimate	Est.Error	l-95% CI	u-95% CI
1	prank VS optimal	-0.009	0.005	-0.018	0.000
2	prank VS all.equal	-0.005	0.004	-0.014	0.003
3	optimal VS all.equal	0.004	0.004	-0.005	0.013

Code

custom_ppc(fit = mod, group = "alignment")

4 Topological space dissimilarity

4.1 Observations per lek and alignment

All fossils

Code

# similarity
out <- lapply(pnts_lks_all, function(x){
  
  Y <- space_similarity(formula = chain ~ x + y, data = x, pb = FALSE, type = "density.overlap")
  
  Y$mean.overlap <- Y$mean.overlap #-  mean(Y$mean.overlap)
  # Y$overlap <- Y$overlap / max(Y$overlap)
  
  return(Y)  
})

topo_similarity_all <- do.call(rbind, out)

topo_similarity_all2 <- topo_similarity_all[, c("group.1",  "group.2", "overlap.1in2")]
names(topo_similarity_all2) <- c("group.1", "group.2", "mean.overlap")

topo_similarity_all3 <- topo_similarity_all[, c("group.2",  "group.1", "overlap.2in1")]
names(topo_similarity_all3) <- c("group.2", "group.1", "mean.overlap")

topo_similarity_all <- rbind(topo_similarity_all2, topo_similarity_all3)

topo_similarity_all$alignment <- sapply(topo_similarity_all$group.1, function(x) strsplit(x, "_", fixed = TRUE)[[1]][1])

topo_similarity_all$data.set <- sapply(topo_similarity_all$group.1, function(x) strsplit(x, "_", fixed = TRUE)[[1]][2])

topo_similarity_all$fossils <- sapply(topo_similarity_all$group.1, function(x) strsplit(x, "_", fixed = TRUE)[[1]][3])

topo_similarity_all$models <- sapply(topo_similarity_all$group.1, function(x) strsplit(x, "_", fixed = TRUE)[[1]][4])


topo_similarity_all$lek <- substr(rownames(topo_similarity_all), 0 , 3)

tab <- as.data.frame(table(topo_similarity_all$lek, topo_similarity_all$alignment))
names(tab) <- c("lek", "model", "count")

tab

lek	model	count
BR1	all.equal	18
CCE	all.equal	76
HC1	all.equal	76
SUR	all.equal	76
TR1	all.equal	18
BR1	optimal	10
CCE	optimal	44
HC1	optimal	44
SUR	optimal	44
TR1	optimal	10
BR1	prank	2
CCE	prank	12
HC1	prank	12
SUR	prank	12
TR1	prank	2

Early fossils

Code

out <- lapply(pnts_lks_early, function(x){
  
  Y <- space_similarity(formula = chain ~ x + y, data = x, pb = FALSE, type = "mean.density.overlap")
  
  Y$mean.overlap <- Y$mean.overlap -  mean(Y$mean.overlap)
  # Y$overlap <- Y$overlap / max(Y$overlap)
  
  return(Y)  
})

topo_similarity_early <- do.call(rbind, out)

topo_similarity_early2 <- topo_similarity_early[, c("group.1",  "group.2", "overlap.2in1")]
names(topo_similarity_early2) <- c("group.2", "group.1", "mean.overlap")

topo_similarity_early3 <- topo_similarity_early[, c("group.1",  "group.2", "overlap.1in2")]
names(topo_similarity_early3) <- c("group.2", "group.1", "mean.overlap")

topo_similarity_early <- rbind(topo_similarity_early2, topo_similarity_early3)

topo_similarity_early$alignment <- sapply(topo_similarity_early$group.1, function(x) strsplit(x, "_", fixed = TRUE)[[1]][1])

topo_similarity_early$data.set <- sapply(topo_similarity_early$group.1, function(x) strsplit(x, "_", fixed = TRUE)[[1]][2])

topo_similarity_early$fossils <- sapply(topo_similarity_early$group.1, function(x) strsplit(x, "_", fixed = TRUE)[[1]][3])

topo_similarity_early$models <- sapply(topo_similarity_early$group.1, function(x) strsplit(x, "_", fixed = TRUE)[[1]][4])

topo_similarity_early$lek <- substr(rownames(topo_similarity_early), 0 , 3)

tab <- as.data.frame(table(topo_similarity_early$lek, topo_similarity_early$alignment))
names(tab) <- c("lek", "model", "count")

tab

lek	model	count
BR1	all.equal	2
CCE	all.equal	12
HC1	all.equal	12
SUR	all.equal	12
TR1	all.equal	2
BR1	optimal	10
CCE	optimal	44
HC1	optimal	44
SUR	optimal	44
TR1	optimal	10
BR1	prank	18
CCE	prank	76
HC1	prank	76
SUR	prank	76
TR1	prank	18

4.2 Plot dissimilarity by treatment

Code

topo_similarity_all$fossils <- "all"
topo_similarity_early$fossils <- "early"

shared_columns <- intersect(names(topo_similarity_all), names(topo_similarity_early))

topo_similarity <- rbind(topo_similarity_all[, shared_columns], topo_similarity_early[, shared_columns])

topo_similarity$dissimilarity <- 1 - topo_similarity$mean.overlap

# raincloud plot:
ggplot(topo_similarity,
       aes(x = alignment, y = dissimilarity, fill = alignment)) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    # fill = fill_color,
    alpha = 0.5,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
  geom_boxplot(# fill = fill_color,
    width = .15,
    # remove outliers
    outlier.shape = NA) +
    # add justified jitter from the {gghalves} package
    gghalves::geom_half_point(
      # color = fill_color,
      # draw jitter on the left
      side = "l",
      # control range of jitter
      range_scale = .4,
      # add some transparency
      alpha = .5,
      transformation = ggplot2::position_jitter(height = 0)
    ) +
      scale_fill_viridis_d(option = "G",
                           end = 0.8,
                           alpha = 0.6) +
           scale_x_discrete(labels = c(
        "natural" = "Natural",
        "artificial" = "Artificial",
        "simulated" = "Simulated"
      )) +
      theme(legend.position = "none") +
      facet_wrap(~ data.set + models + fossils, ncol = 2) +
  labs(x = "Alignment", y = "Topologic space dissimilarity")

4.3 Stats

4.3.1 Dissimilarity of the topological space

Code

# topo_similarity$dissimilarity[topo_similarity$dissimilarity <= 0] <- 0.001 

topo_similarity$alignment <- factor(topo_similarity$alignment, levels = c("all.equal", "optimal", "prank"))

topo_similarity$data.set <- factor(topo_similarity$data.set, levels = c("new", "old"))

topo_similarity$models <- factor(topo_similarity$models, levels = c("Uexp", "global"))

topo_similarity$fossils <- factor(topo_similarity$fossils, levels = c("early", "all"))

# weakly informative priors
priors <- c(
  prior(normal(0, 5), class = "Intercept"),
  prior(normal(0, 2), class = "b"),
  prior(exponential(1), class = "sd")
)

mod <- brm(
  formula = dissimilarity ~ alignment + data.set + models + fossils + (1 | lek), 
  data = topo_similarity,
  iter = iterations,
  family = gaussian(),
  chains = chains,
  cores = chains,
  prior = priors,
  save_pars = save_pars(all = TRUE),
   control = list(adapt_delta = 0.99, max_treedepth = 15),
  file = "./data/processed/regression_results_topological_similiarity",
  file_refit = "always")

mod <- add_criterion(mod, criterion = c("loo"))

# weakly informative priors
priors <- c(
  prior(normal(0, 5), class = "Intercept"),
  prior(exponential(1), class = "sd")
)

null_mod <- brm(
  formula = dissimilarity ~ 1 + (1 | lek), 
  data = topo_similarity,
  iter = iterations,
  family = gaussian(),
  chains = chains,
  cores = chains,
  prior = priors,
  save_pars = save_pars(all = TRUE),
  control = list(adapt_delta = 0.99, max_treedepth = 15),
  file = "./data/processed/null_regression_results_topological_similiarity",
  file_refit = "always"
)

null_mod <- add_criterion(null_mod, criterion = c("loo"))

Model selection:

Code

mod <- readRDS("./data/processed/regression_results_topological_similiarity.rds")

null_mod <- readRDS("./data/processed/null_regression_results_topological_similiarity.rds")

loo_diff <- loo::loo_compare(mod, null_mod)

as.data.frame(loo_diff[,1:2], row.names = c("model", "null model"))

	elpd_diff	se_diff
model	0.00000	0.000000
null model	-34.39049	9.235735

Code

looic_df <- as.data.frame(rbind(loo(mod)$estimates[3, ],  loo(null_mod)$estimates[3, ]),row.names = c("model", "null model"))

names(looic_df)[1] <- "LOOIC"

looic_df$delta <- looic_df$LOOIC - min(looic_df$LOOIC)

looic_df

	LOOIC	SE	delta
model	637.3852	39.31947	0.00000
null model	706.1661	35.42949	68.78099

Bayes factor:

Code

# Calculate Bayes factor
bf <- bayes_factor(mod, null_mod)

Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6

Code

bf$bf

[1] 40198678

Code

extended_summary(fit = mod,
    n.posterior = 1000,
    fill =  mako(10)[7],
    trace.palette = viridis,
    remove.intercepts = TRUE,
    highlight = TRUE,
    trace = TRUE,
    return = FALSE, 
    print.name = FALSE,
    spread.type = "HPDI"    
)

	priors	formula	iterations	chains	thinning	warmup	diverg_transitions	rhats > 1.05	min_bulk_ESS	min_tail_ESS	seed
1	b-normal(0, 2) Intercept-normal(0, 5) sd-exponential(1) sigma-student_t(3, 0, 2.5)	dissimilarity ~ alignment + data.set + models + fossils + (1 \| lek)	5000	4	1	2500	0 (0%)	0	5569.35	6172.415	1784803372

	Estimate	l-95% CI	u-95% CI	Rhat	Bulk_ESS	Tail_ESS
b_alignmentoptimal	0.008	-0.047	0.069	1.001	6623.783	7290.831
b_alignmentprank	0.110	0.043	0.180	1	5569.350	6172.415
b_data.setold	0.174	0.130	0.224	1	9212.587	6984.457
b_modelsglobal	0.030	-0.015	0.076	1	10553.290	7260.399
b_fossilsall	0.176	0.119	0.235	1.001	6321.103	6978.831

Contrasts for alignment strategies:

Code

# contrasts
contrasts(
    fit = mod,
    sort.levels = c("prank", "optimal", "all.equal"),
    predictor = "alignment",
    n.posterior = 2000,
    level.sep = " VS ",
    html.table = TRUE,
    plot = TRUE,
    highlight = TRUE,
    fill = mako(10)[7]
)

	Contrasts	Estimate	Est.Error	l-95% CI	u-95% CI
1	prank VS optimal	0.102	0.029	0.045	0.159
2	prank VS all.equal	0.110	0.035	0.043	0.180
3	optimal VS all.equal	0.008	0.030	-0.049	0.067

Code

custom_ppc(fit = mod, group = "alignment")

Session information

R version 4.4.1 (2024-06-14)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 22.04.4 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=es_CR.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=es_CR.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=es_CR.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C       

time zone: America/Costa_Rica
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] ggridges_0.5.6       cowplot_1.1.3        brmsish_1.0.0       
 [4] brms_2.22.0          Rcpp_1.0.13          PhenotypeSpace_0.1.0
 [7] ape_5.8              ggplot2_3.5.1        kableExtra_1.4.0    
[10] knitr_1.48           viridis_0.6.5        viridisLite_0.4.2   
[13] pbapply_1.7-2       

loaded via a namespace (and not attached):
  [1] tensorA_0.36.2.1       rstudioapi_0.16.0      jsonlite_1.8.9        
  [4] magrittr_2.0.3         TH.data_1.1-2          estimability_1.5.1    
  [7] spatstat.utils_3.1-0   farver_2.1.2           rmarkdown_2.28        
 [10] vctrs_0.6.5            spatstat.explore_3.3-2 terra_1.7-78          
 [13] htmltools_0.5.8.1      curl_5.2.3             distributional_0.5.0  
 [16] tidybayes_3.0.7        raster_3.6-26          StanHeaders_2.32.10   
 [19] KernSmooth_2.23-24     htmlwidgets_1.6.4      plyr_1.8.9            
 [22] sandwich_3.1-0         emmeans_1.10.3         zoo_1.8-12            
 [25] lifecycle_1.0.4        pkgconfig_2.0.3        Matrix_1.7-0          
 [28] R6_2.5.1               fastmap_1.2.0          digest_0.6.37         
 [31] colorspace_2.1-1       CircStats_0.2-6        tensor_1.5            
 [34] vegan_2.6-8            labeling_0.4.3         fansi_1.0.6           
 [37] spatstat.sparse_3.1-0  adehabitatHR_0.4.22    polyclip_1.10-7       
 [40] abind_1.4-8            mgcv_1.9-1             compiler_4.4.1        
 [43] proxy_0.4-27           withr_3.0.1            backports_1.5.0       
 [46] inline_0.3.19          DBI_1.2.3              QuickJSR_1.4.0        
 [49] pkgbuild_1.4.4         MASS_7.3-61            classInt_0.4-10       
 [52] loo_2.8.0              permute_0.9-7          tools_4.4.1           
 [55] units_0.8-5            goftest_1.2-3          glue_1.8.0            
 [58] nlme_3.1-165           grid_4.4.1             sf_1.0-17             
 [61] checkmate_2.3.2        reshape2_1.4.4         cluster_2.1.6         
 [64] ade4_1.7-22            generics_0.1.3         gtable_0.3.5          
 [67] spatstat.data_3.1-2    class_7.3-22           tidyr_1.3.1           
 [70] sp_2.1-4               xml2_1.3.6             utf8_1.2.4            
 [73] spatstat.geom_3.3-2    pillar_1.9.0           ggdist_3.3.2          
 [76] stringr_1.5.1          posterior_1.6.0        splines_4.4.1         
 [79] dplyr_1.1.4            lattice_0.22-6         gghalves_0.1.4        
 [82] survival_3.7-0         deldir_2.0-4           tidyselect_1.2.1      
 [85] arrayhelpers_1.1-0     gridExtra_2.3          V8_4.4.2              
 [88] svglite_2.1.3          stats4_4.4.1           xfun_0.48             
 [91] adehabitatMA_0.3.17    bridgesampling_1.1-2   matrixStats_1.4.1     
 [94] adehabitatLT_0.3.28    rstan_2.32.6           stringi_1.8.4         
 [97] yaml_2.3.10            boot_1.3-30            evaluate_1.0.1        
[100] codetools_0.2-20       tibble_3.2.1           cli_3.6.3             
[103] RcppParallel_5.1.9     xtable_1.8-4           systemfonts_1.1.0     
[106] munsell_0.5.1          spatstat.random_3.3-1  coda_0.19-4.1         
[109] svUnit_1.0.6           spatstat.univar_3.0-1  parallel_4.4.1        
[112] rstantools_2.4.0       bayesplot_1.11.1       Brobdingnag_1.2-9     
[115] mvtnorm_1.3-1          scales_1.3.0           e1071_1.7-16          
[118] purrr_1.0.2            rlang_1.1.4            multcomp_1.4-25