Tree topology diagnostics

Long-billed hermit song cultural evolution

Author
Published

October 28, 2024

Source code and data found at https://github.com/maRce10/lbh_cultural_evolution_revBayes

Load packages

Custom functions and global options

Code
knitr::opts_chunk$set(out.width = "100%", out.height = "100%", echo = TRUE) 

source('~/Dropbox/Projects/lbh_cultural_evolution_revBayes/source/custom_treespace.R')

theme_set(theme_classic(base_size = 10))

fill_color <- viridis::mako(10, alpha = 0.5)[7]

iterations <- 5000
chains <- 4
priors <- c(set_prior("normal(0, 1.5)", class = "Intercept"), set_prior("normal(0, 1.5)", class = "b"))

# to create several posterior predictive check plots out of a brms fit
custom_ppc1 <- function(fit, group = NULL, ndraws = 30) {
  
    ppc_dens <-
        pp_check(fit,
                 ndraws = ndraws,
                 type = 'dens_overlay_grouped',
                 group = group)  # shows dens_overlay plot by default
    pp_mean <-
        pp_check(
            fit,
            type = "stat_grouped",
            stat = "mean",
            group = group,
            ndraws = ndraws
        )
    pp_scat <-
        pp_check(fit,
                 type = "scatter_avg_grouped",
                 group = group,
                 ndraws = ndraws)
    pp_stat2 <- pp_check(fit, type = "stat_2d", ndraws = ndraws)
    pp_loo <- pp_check(fit, type = "loo_pit_qq", ndraws = ndraws)
    pp_error <-
        pp_check(fit,
                 type = "error_scatter_avg_grouped",
                 ndraws = ndraws,
                 group = group)
    plot_l <-
        list(ppc_dens, pp_mean, pp_scat, pp_error,  pp_stat2, pp_loo)
    
    plot_l[c(1, 3:length(plot_l))] <-
        lapply(plot_l[c(1, 3:length(plot_l))], function(x)
            x  + scale_color_viridis_d(
                begin = 0.1,
                end = 0.8,
                alpha = 0.5
            ) + scale_fill_viridis_d(
                begin = 0.1,
                end = 0.8,
                alpha = 0.5
            ) + theme_classic())     

    print(plot_grid(plotlist = plot_l, ncol = 2))
}

custom_ppc <- function(...) suppressMessages(custom_ppc1(...))

Read data

Code
rb.trees <- readRDS("./output/revbayes_output_in_single_R_object.RDS")

rb.trees <- rb.trees[grep("run_", names(rb.trees), invert = TRUE, value = TRUE)]

trees_diags <- data.frame(model = names(rb.trees))

# get lek name
trees_diags$lek <- sapply(strsplit(trees_diags$model, "_", fixed = TRUE), "[", 1)

# substitution model
trees_diags$subs <- sapply(strsplit(trees_diags$model, "_", fixed = TRUE), "[", 2)

# period
trees_diags$period <- sapply(strsplit(trees_diags$model, "_", fixed = TRUE), "[", 3)

# and which fossils were used
trees_diags$fossils <- sapply(strsplit(trees_diags$model, "_", fixed = TRUE), "[", 4)

trees_diags$align <- sapply(strsplit(trees_diags$model, "_", fixed = TRUE), "[", 5)

trees_diags$n_trees <- sapply(rb.trees, function(x) length(x$trees))

# Number of models by iterations and lek
kbl <-
    knitr::kable(as.matrix(table(trees_diags$lek, trees_diags$n_trees)), caption = "Number of models with a specific number of trees by lek")

kableExtra::kable_styling(kbl)

# Number of models by iterations and lek
kbl <-
    knitr::kable(as.matrix(table(trees_diags$lek, trees_diags$subs)), caption = "Number of models with a specific number of trees by lek")

kableExtra::kable_styling(kbl)

1 Topological space estimation

  • Pairwise topological distance between all trees for each lek
  • 100 trees for each model evenly spaced along the chain
  • Comparing based on shared song types (pairwise shared tips)
  • Some trees were removed if NAs were produced when comparing topologies (i.e. non-comparable topologies)

1.1 All fossils

Code
#selected leks
sel_leks <- c("SUR", "CCE", "HC1", "BR1", "TR1")

tree_names <- grep("early", names(rb.trees), invert = TRUE, value = TRUE)

pnts_lks_all <- lapply(sel_leks, function(i) {
  
  print(i)
  lek_trees <- grep(i, tree_names, value = TRUE)
  
  multi_list <- lapply(lek_trees, function(x) read.tree(file.path("./output/most_recent_revbayes_models/", x), keep.multi = TRUE))
  
  names(multi_list) <- sapply(basename(lek_trees), function(x) paste(strsplit(x, "_", fixed = TRUE)[[1]][1:5], collapse = "_"))
  
  pnts <- try(custom_treespace(multi_list, n.points = 100, method = "RF", cl = 10), silent = TRUE)

  if (!is(pnts, "try-error")) 
    return(pnts) else
      return(NULL)
})

names(pnts_lks_all) <- sel_leks

sapply(pnts_lks_all, is.null)

saveRDS(pnts_lks_all, file = "./output/topology/tree_distance_all_fossils_pairwise_shared.RDS")

1.2 Early fossils

Code
#selected leks
sel_leks <- c("SUR", "CCE", "HC1", "BR1", "TR1")
# sel_leks <- c("SUR", "TR1")

tree_names <- grep("early", names(rb.trees), value = TRUE)

pnts_lks_early <- lapply(sel_leks, function(i) {
  
  print(i)
  lek_trees <- grep(i, tree_names, value = TRUE)
  
  multi_list <- lapply(lek_trees, function(x) read.tree(file.path("./output/most_recent_revbayes_models/", x), keep.multi = TRUE))
  
  names(multi_list) <- sapply(basename(lek_trees), function(x) paste(strsplit(x, "_", fixed = TRUE)[[1]][1:5], collapse = "_"))
  
  pnts <- try(custom_treespace(multi_list, n.points = 100, method = "RF", cl = 10), silent = TRUE)

  if (!is(pnts, "try-error")) 
    return(pnts) else
      return(NULL)
})

names(pnts_lks_early) <- sel_leks

sapply(pnts_lks_early, is.null)

saveRDS(pnts_lks_early, file = "./output/topology/tree_distance_early_fossils_pairwise_shared.RDS")

2 Plot topological spaces

2.1 All fossils

2.1.1 New and old data sets

Code
pnts_lks_all <- readRDS("./output/topology/tree_distance_all_fossils_pairwise_shared.RDS")

ggs <- lapply(pnts_lks_all, function(X) {

    ggplot(data = X, aes(x = x, y = y, fill = generation)) +
    geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
    scale_colour_gradient(low = "red", high = "yellow") +
    geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, ncol = 2) +
      scale_fill_gradientn(colours = mako(256)) +
      labs(x = "MDS_v1", y = "MDS_v2") + 
      ggtitle(label = sprintf("Tree space for %d trees", nrow(X) / length(unique(X$chain)))) 
    })

ggs
$SUR


$CCE


$HC1


$BR1


$TR1

2.1.2 Old data set

Code
ggs <- lapply(pnts_lks_all, function(X) {
     
  X <- X[grep("old", X$chain), ]   
  
  if (nrow(X) > 0)
  ggplot(data = X, aes(x = x, y = y, fill = generation)) +
    geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
    scale_colour_gradient(low = "red", high = "yellow") +
    geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_fill_gradientn(colours = mako(256)) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))
    })

ggs
$SUR


$CCE


$HC1


$BR1
NULL

$TR1
NULL

2.1.3 New data set

Code
ggs <- lapply(pnts_lks_all, function(X) {
     
  X <- X[grep("new", X$chain), ]   
  
  ggplot(data = X, aes(x = x, y = y, fill = generation)) +
    geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
    scale_colour_gradient(low = "red", high = "yellow") +
    geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_fill_gradientn(colours = mako(256)) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %d trees", nrow(X) / length(unique(X$chain))))

    })

ggs
$SUR


$CCE


$HC1


$BR1


$TR1

2.1.3.1 Clumping molecular clocks

2.1.3.1.1 Old data set
Code
ggs <- lapply(pnts_lks_all, function(X) {
     
  X <- X[grep("old", X$chain), ]   
  X$clock <- ifelse(grepl("global", X$chain), "Global", "Uexp")
  X$chain <- gsub("_global|_Uexp", "", X$chain)
  
    if (nrow(X) > 0)
    ggplot(data = X, aes(x = x, y = y)) +
    geom_path(alpha = 0.1, size = 0.75, show.legend = FALSE) +
    geom_point(aes(shape = clock,  col = clock), size = 4) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_color_viridis_d(option = "G", begin = 0.2, end = 0.8, alpha = 0.75) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %d trees", nrow(X) / length(unique(X$chain))))

    })

ggs
$SUR


$CCE


$HC1


$BR1
NULL

$TR1
NULL
2.1.3.1.2 New data set
Code
ggs <- lapply(pnts_lks_all, function(X) {
     
  X <- X[grep("new", X$chain), ]   
  X$clock <- ifelse(grepl("global", X$chain), "Global", "Uexp")
  X$chain <- gsub("_global|_Uexp", "", X$chain)
  
  ggplot(data = X, aes(x = x, y = y)) +
    geom_path(alpha = 0.1, size = 0.75, show.legend = FALSE) +
    geom_point(aes(shape = clock,  col = clock), size = 4) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_color_viridis_d(option = "G", begin = 0.2, end = 0.8, alpha = 0.75) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %d trees", nrow(X) / length(unique(X$chain))))

    })

ggs
$SUR


$CCE


$HC1


$BR1


$TR1

2.2 Early fossils

2.2.1 New and old data sets

Code
pnts_lks_early <- readRDS("./output/topology/tree_distance_early_fossils_pairwise_shared.RDS")

ggs <- lapply(pnts_lks_early, function(X) {

    ggplot(data = X, aes(x = x, y = y, fill = generation)) +
    geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
    scale_colour_gradient(low = "red", high = "yellow") +
    geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_fill_gradientn(colours = mako(256)) +
      labs(x = "MDS_v1", y = "MDS_v2") + 
    ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain)))) 

    })

ggs
$SUR


$CCE


$HC1


$BR1


$TR1

2.2.2 New data set

Code
ggs <- lapply(pnts_lks_early, function(X) {
     
  X <- X[grep("new", X$chain), ]   
  
  ggplot(data = X, aes(x = x, y = y, fill = generation)) +
    geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
    scale_colour_gradient(low = "red", high = "yellow") +
    geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_fill_gradientn(colours = mako(256)) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))
  })

ggs
$SUR


$CCE


$HC1


$BR1


$TR1

2.2.3 Old data set

Code
ggs <- lapply(pnts_lks_early, function(X) {
     
  X <- X[grep("old", X$chain), ]   
  
  if (nrow(X) > 0)
  ggplot(data = X, aes(x = x, y = y, fill = generation)) +
    geom_path(alpha = 0.25, aes(colour = generation), size = 0.75, show.legend = FALSE) +
    scale_colour_gradient(low = "red", high = "yellow") +
    geom_point(shape = 21, size = 4, colour = "white", alpha = 0.6) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_fill_gradientn(colours = mako(256)) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))
  })

ggs
$SUR


$CCE


$HC1


$BR1
NULL

$TR1
NULL

2.2.4 Clumping molecular clocks

2.2.4.1 Old data set

Code
ggs <- lapply(pnts_lks_early, function(X) {
     
  X <- X[grep("old", X$chain), ]   
  X$clock <- ifelse(grepl("global", X$chain), "Global", "Uexp")
  X$chain <- gsub("_global|_Uexp", "", X$chain)
  
    if (nrow(X) > 0)
    ggplot(data = X, aes(x = x, y = y)) +
    geom_path(alpha = 0.1, size = 0.75, show.legend = FALSE) +
    geom_point(aes(shape = clock,  col = clock), size = 4) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_color_viridis_d(option = "G", begin = 0.2, end = 0.8, alpha = 0.75) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))

    })

ggs
$SUR


$CCE


$HC1


$BR1
NULL

$TR1
NULL

2.2.4.2 New data set

Code
ggs <- lapply(pnts_lks_early, function(X) {
     
  X <- X[grep("new", X$chain), ]   
  X$clock <- ifelse(grepl("global", X$chain), "Global", "Uexp")
  X$chain <- gsub("_global|_Uexp", "", X$chain)
  
  ggplot(data = X, aes(x = x, y = y)) +
    geom_path(alpha = 0.1, size = 0.75, show.legend = FALSE) +
    geom_point(aes(shape = clock,  col = clock), size = 4) +
      theme(panel.background = element_blank(), axis.line = element_line(color = "grey"),
      panel.spacing = unit(0.1, "lines"), axis.title.x = element_text(vjust = -0.5), axis.title.y = element_text(vjust = 1.5)) +
    facet_wrap(~chain, nrow = 4) +
      scale_color_viridis_d(option = "G", begin = 0.2, end = 0.8, alpha = 0.75) +
      labs(x = "MDS_v1", y = "MDS_v2") +
    ggtitle(label = sprintf("Tree space for %1.0f trees", nrow(X) / length(unique(X$chain))))

    })

ggs
$SUR


$CCE


$HC1


$BR1


$TR1

3 Topological space size

3.1 Observations per lek and alignment

All fossils

Code
# size

out <- lapply(pnts_lks_all, function(x){
  
    Y <- space_size(formula = chain ~ x + y, data = x, pb = FALSE, method = "density")
  Y$mean.c.size <- Y$size# - mean(Y$size)
 
  return(Y)  
})

topo_size_all <- do.call(rbind, out)

topo_size_all$alignment <- sapply(topo_size_all$group, function(x) strsplit(x, "_", fixed = TRUE)[[1]][1])

topo_size_all$data.set <- sapply(topo_size_all$group, function(x) strsplit(x, "_", fixed = TRUE)[[1]][2])

topo_size_all$fossils <- sapply(topo_size_all$group, function(x) strsplit(x, "_", fixed = TRUE)[[1]][3])

topo_size_all$models <- sapply(topo_size_all$group, function(x) strsplit(x, "_", fixed = TRUE)[[1]][4])

topo_size_all$lek <- substr(rownames(topo_size_all), 0 , 3)

tab <- as.data.frame(table(topo_size_all$lek, topo_size_all$alignment))
names(tab) <- c("lek", "model", "count")

tab
lek model count
BR1 all.equal 2
CCE all.equal 4
HC1 all.equal 4
SUR all.equal 4
TR1 all.equal 2
BR1 optimal 2
CCE optimal 4
HC1 optimal 4
SUR optimal 4
TR1 optimal 2
BR1 prank 2
CCE prank 4
HC1 prank 4
SUR prank 4
TR1 prank 2

Early fossils

Code
out <- lapply(pnts_lks_early, function(x){
  
    Y <- space_size(formula = chain ~ x + y, data = x, pb = FALSE, method = "density")
  Y$mean.c.size <- Y$size# - mean(Y$size)
  # Y$size <- Y$size / max(Y$size)
  
  return(Y)  
})

topo_size_early <- do.call(rbind, out)

topo_size_early$alignment <- sapply(topo_size_early$group, function(x) strsplit(x, "_", fixed = TRUE)[[1]][1])

topo_size_early$data.set <- sapply(topo_size_early$group, function(x) strsplit(x, "_", fixed = TRUE)[[1]][2])

topo_size_early$fossils <- sapply(topo_size_early$group, function(x) strsplit(x, "_", fixed = TRUE)[[1]][3])

topo_size_early$models <- sapply(topo_size_early$group, function(x) strsplit(x, "_", fixed = TRUE)[[1]][4])

topo_size_early$lek <- substr(rownames(topo_size_early), 0 , 3)

tab <- as.data.frame(table(topo_size_early$lek, topo_size_early$alignment))
names(tab) <- c("lek", "model", "count")

tab
lek model count
BR1 all.equal 2
CCE all.equal 4
HC1 all.equal 4
SUR all.equal 4
TR1 all.equal 2
BR1 optimal 2
CCE optimal 4
HC1 optimal 4
SUR optimal 4
TR1 optimal 2
BR1 prank 2
CCE prank 4
HC1 prank 4
SUR prank 4
TR1 prank 2

3.2 Plot size by treatment

Code
topo_size_all$fossils <- "all"
topo_size_early$fossils <- "early"

shared_columns <- intersect(names(topo_size_all), names(topo_size_early))

topo_size <- rbind(topo_size_all[, shared_columns], topo_size_early[, shared_columns])

# raincloud plot:
ggplot(topo_size,
       aes(x = alignment, y = size, fill = alignment)) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    # fill = fill_color,
    alpha = 0.5,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
  geom_boxplot(# fill = fill_color,
    width = .15,
    # remove outliers
    outlier.shape = NA) +
    # add justified jitter from the {gghalves} package
    gghalves::geom_half_point(
      # color = fill_color,
      # draw jitter on the left
      side = "l",
      # control range of jitter
      range_scale = .4,
      # add some transparency
      alpha = .5,
      transformation = ggplot2::position_jitter(height = 0)
    ) +
      scale_fill_viridis_d(option = "G",
                           end = 0.8,
                           alpha = 0.6) +
           scale_x_discrete(labels = c(
        "natural" = "Natural",
        "artificial" = "Artificial",
        "simulated" = "Simulated"
      )) +
      theme(legend.position = "none") +
      facet_wrap(~ data.set + models + fossils, ncol = 2) +
  labs(x = "Alignment", y = "Topologic space dissimilarity") 

3.3 Stats

3.3.1 Size of the topological space

Code
topo_size$alignment <- factor(topo_size$alignment, levels = c("all.equal", "optimal", "prank"))

topo_size$data.set <- factor(topo_size$data.set, levels = c("new", "old"))

topo_size$models <- factor(topo_size$models, levels = c("Uexp", "global"))

topo_size$fossils <- factor(topo_size$fossils, levels = c("early", "all"))

# weakly informative priors
priors <- c(
  prior(normal(0, 5), class = "Intercept"),
  prior(normal(0, 2), class = "b"),
  prior(exponential(1), class = "sd")
)

mod <- brm(
  formula = size ~ alignment + data.set + models + fossils + (1|lek), 
  data = topo_size,
  iter = iterations,
  family = gaussian(),
  chains = chains,
  cores = chains,
  prior = priors,
  save_pars = save_pars(all = TRUE),
   control = list(adapt_delta = 0.99, max_treedepth = 15),
  file = "./data/processed/regression_results_topological_size",
  file_refit = "always")

mod <- add_criterion(mod, criterion = c("loo"))

# weakly informative priors
priors <- c(
  prior(normal(0, 5), class = "Intercept"),
  prior(normal(0, 2), class = "b"),
  prior(exponential(1), class = "sd")
)

sub_mod <- brm(
  formula = size ~ data.set + fossils + (1|lek), 
  data = topo_size,
  iter = iterations,
  family = gaussian(),
  chains = chains,
  cores = chains,
  prior = priors,
  save_pars = save_pars(all = TRUE),
   control = list(adapt_delta = 0.99, max_treedepth = 15),
  file = "./data/processed/regression_results_topological_size_submodel",
  file_refit = "always")

sub_mod <- add_criterion(sub_mod, criterion = c("loo"))


# weakly informative priors
priors <- c(
  prior(normal(0, 5), class = "Intercept"),
  prior(exponential(1), class = "sd")
)

null_mod <- brm(
  formula = size ~ 1 + (1|lek), 
  data = topo_size,
  iter = iterations,
  family = gaussian(),
  chains = chains,
  cores = chains,
  prior = priors,
  save_pars = save_pars(all = TRUE),
   control = list(adapt_delta = 0.99, max_treedepth = 15),
  file = "./data/processed/null_regression_results_topological_size",
  file_refit = "always"
)

null_mod <- add_criterion(null_mod, criterion = c("loo"))

Model selection:

Code
mod <- readRDS("./data/processed/regression_results_topological_size.rds")

sub_mod <- readRDS("./data/processed/regression_results_topological_size_submodel.rds")

null_mod <- readRDS("./data/processed/null_regression_results_topological_size.rds")

loo_diff <- loo::loo_compare(mod, sub_mod, null_mod)

as.data.frame(loo_diff[,1:2], row.names = c("model", "sub_model", "null model"))
elpd_diff se_diff
model 0.0000000 0.000000
sub_model -0.4158121 1.973736
null model -24.8684153 4.150403
Code
looic_df <- as.data.frame(rbind(loo(mod)$estimates[3, ], loo(sub_mod)$estimates[3, ], loo(null_mod)$estimates[3, ]),row.names = c("model", "sub_model", "null model"))

names(looic_df)[1] <- "LOOIC"

looic_df$delta <- looic_df$LOOIC - min(looic_df$LOOIC)

looic_df
LOOIC SE delta
model -488.8457 32.21383 0.8316242
sub_model -489.6774 34.43142 0.0000000
null model -439.9405 32.88068 49.7368306

Bayes factor:

Code
# Calculate Bayes factor
bf <- bayes_factor(mod, null_mod)
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Code
bf$bf
[1] 0.05251836
Code
sbf <- bayes_factor(sub_mod, null_mod)
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Code
sbf$bf
[1] 16580585
Code
extended_summary(fit = mod,
    n.posterior = 1000,
    fill =  mako(10)[7],
    trace.palette = viridis,
    remove.intercepts = TRUE,
    highlight = TRUE,
    trace = TRUE,
    return = FALSE, 
    print.name = FALSE, 
    spread.type = "HPDI"
)
priors formula iterations chains thinning warmup diverg_transitions rhats > 1.05 min_bulk_ESS min_tail_ESS seed
1 b-normal(0, 2) Intercept-normal(0, 5) sd-exponential(1) sigma-student_t(3, 0, 2.5) size ~ alignment + data.set + models + fossils + (1 | lek) 5000 4 1 2500 1 (1e-04%) 0 8279.233 7138.681 316515948
Estimate l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
b_alignmentoptimal 0.004 -0.005 0.012 1 8362.105 7450.773
b_alignmentprank -0.005 -0.014 0.003 1 8279.233 7251.602
b_data.setold 0.031 0.023 0.040 1 9693.843 7392.857
b_modelsglobal -0.002 -0.009 0.005 1.001 11404.147 7138.681
b_fossilsall 0.012 0.005 0.019 1 10432.653 7270.792

Contrasts for alignment strategies:

Code
# contrasts
contrasts(
    fit = mod,
    sort.levels = c("prank", "optimal", "all.equal"),
    predictor = "alignment",
    n.posterior = 2000,
    level.sep = " VS ",
    html.table = TRUE,
    plot = TRUE,
    highlight = TRUE,
    fill = mako(10)[7]
)
Contrasts Estimate Est.Error l-95% CI u-95% CI
1 prank VS optimal -0.009 0.005 -0.018 0.000
2 prank VS all.equal -0.005 0.004 -0.014 0.003
3 optimal VS all.equal 0.004 0.004 -0.005 0.013

Code
custom_ppc(fit = mod, group = "alignment")

4 Topological space dissimilarity

4.1 Observations per lek and alignment

All fossils

Code
# similarity
out <- lapply(pnts_lks_all, function(x){
  
  Y <- space_similarity(formula = chain ~ x + y, data = x, pb = FALSE, type = "density.overlap")
  
  Y$mean.overlap <- Y$mean.overlap #-  mean(Y$mean.overlap)
  # Y$overlap <- Y$overlap / max(Y$overlap)
  
  return(Y)  
})

topo_similarity_all <- do.call(rbind, out)

topo_similarity_all2 <- topo_similarity_all[, c("group.1",  "group.2", "overlap.1in2")]
names(topo_similarity_all2) <- c("group.1", "group.2", "mean.overlap")

topo_similarity_all3 <- topo_similarity_all[, c("group.2",  "group.1", "overlap.2in1")]
names(topo_similarity_all3) <- c("group.2", "group.1", "mean.overlap")

topo_similarity_all <- rbind(topo_similarity_all2, topo_similarity_all3)

topo_similarity_all$alignment <- sapply(topo_similarity_all$group.1, function(x) strsplit(x, "_", fixed = TRUE)[[1]][1])

topo_similarity_all$data.set <- sapply(topo_similarity_all$group.1, function(x) strsplit(x, "_", fixed = TRUE)[[1]][2])

topo_similarity_all$fossils <- sapply(topo_similarity_all$group.1, function(x) strsplit(x, "_", fixed = TRUE)[[1]][3])

topo_similarity_all$models <- sapply(topo_similarity_all$group.1, function(x) strsplit(x, "_", fixed = TRUE)[[1]][4])


topo_similarity_all$lek <- substr(rownames(topo_similarity_all), 0 , 3)

tab <- as.data.frame(table(topo_similarity_all$lek, topo_similarity_all$alignment))
names(tab) <- c("lek", "model", "count")

tab
lek model count
BR1 all.equal 18
CCE all.equal 76
HC1 all.equal 76
SUR all.equal 76
TR1 all.equal 18
BR1 optimal 10
CCE optimal 44
HC1 optimal 44
SUR optimal 44
TR1 optimal 10
BR1 prank 2
CCE prank 12
HC1 prank 12
SUR prank 12
TR1 prank 2

Early fossils

Code
out <- lapply(pnts_lks_early, function(x){
  
  Y <- space_similarity(formula = chain ~ x + y, data = x, pb = FALSE, type = "mean.density.overlap")
  
  Y$mean.overlap <- Y$mean.overlap -  mean(Y$mean.overlap)
  # Y$overlap <- Y$overlap / max(Y$overlap)
  
  return(Y)  
})

topo_similarity_early <- do.call(rbind, out)

topo_similarity_early2 <- topo_similarity_early[, c("group.1",  "group.2", "overlap.2in1")]
names(topo_similarity_early2) <- c("group.2", "group.1", "mean.overlap")

topo_similarity_early3 <- topo_similarity_early[, c("group.1",  "group.2", "overlap.1in2")]
names(topo_similarity_early3) <- c("group.2", "group.1", "mean.overlap")

topo_similarity_early <- rbind(topo_similarity_early2, topo_similarity_early3)

topo_similarity_early$alignment <- sapply(topo_similarity_early$group.1, function(x) strsplit(x, "_", fixed = TRUE)[[1]][1])

topo_similarity_early$data.set <- sapply(topo_similarity_early$group.1, function(x) strsplit(x, "_", fixed = TRUE)[[1]][2])

topo_similarity_early$fossils <- sapply(topo_similarity_early$group.1, function(x) strsplit(x, "_", fixed = TRUE)[[1]][3])

topo_similarity_early$models <- sapply(topo_similarity_early$group.1, function(x) strsplit(x, "_", fixed = TRUE)[[1]][4])

topo_similarity_early$lek <- substr(rownames(topo_similarity_early), 0 , 3)

tab <- as.data.frame(table(topo_similarity_early$lek, topo_similarity_early$alignment))
names(tab) <- c("lek", "model", "count")

tab
lek model count
BR1 all.equal 2
CCE all.equal 12
HC1 all.equal 12
SUR all.equal 12
TR1 all.equal 2
BR1 optimal 10
CCE optimal 44
HC1 optimal 44
SUR optimal 44
TR1 optimal 10
BR1 prank 18
CCE prank 76
HC1 prank 76
SUR prank 76
TR1 prank 18

4.2 Plot dissimilarity by treatment

Code
topo_similarity_all$fossils <- "all"
topo_similarity_early$fossils <- "early"

shared_columns <- intersect(names(topo_similarity_all), names(topo_similarity_early))

topo_similarity <- rbind(topo_similarity_all[, shared_columns], topo_similarity_early[, shared_columns])

topo_similarity$dissimilarity <- 1 - topo_similarity$mean.overlap

# raincloud plot:
ggplot(topo_similarity,
       aes(x = alignment, y = dissimilarity, fill = alignment)) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    # fill = fill_color,
    alpha = 0.5,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
  geom_boxplot(# fill = fill_color,
    width = .15,
    # remove outliers
    outlier.shape = NA) +
    # add justified jitter from the {gghalves} package
    gghalves::geom_half_point(
      # color = fill_color,
      # draw jitter on the left
      side = "l",
      # control range of jitter
      range_scale = .4,
      # add some transparency
      alpha = .5,
      transformation = ggplot2::position_jitter(height = 0)
    ) +
      scale_fill_viridis_d(option = "G",
                           end = 0.8,
                           alpha = 0.6) +
           scale_x_discrete(labels = c(
        "natural" = "Natural",
        "artificial" = "Artificial",
        "simulated" = "Simulated"
      )) +
      theme(legend.position = "none") +
      facet_wrap(~ data.set + models + fossils, ncol = 2) +
  labs(x = "Alignment", y = "Topologic space dissimilarity") 

4.3 Stats

4.3.1 Dissimilarity of the topological space

Code
# topo_similarity$dissimilarity[topo_similarity$dissimilarity <= 0] <- 0.001 

topo_similarity$alignment <- factor(topo_similarity$alignment, levels = c("all.equal", "optimal", "prank"))

topo_similarity$data.set <- factor(topo_similarity$data.set, levels = c("new", "old"))

topo_similarity$models <- factor(topo_similarity$models, levels = c("Uexp", "global"))

topo_similarity$fossils <- factor(topo_similarity$fossils, levels = c("early", "all"))

# weakly informative priors
priors <- c(
  prior(normal(0, 5), class = "Intercept"),
  prior(normal(0, 2), class = "b"),
  prior(exponential(1), class = "sd")
)

mod <- brm(
  formula = dissimilarity ~ alignment + data.set + models + fossils + (1 | lek), 
  data = topo_similarity,
  iter = iterations,
  family = gaussian(),
  chains = chains,
  cores = chains,
  prior = priors,
  save_pars = save_pars(all = TRUE),
   control = list(adapt_delta = 0.99, max_treedepth = 15),
  file = "./data/processed/regression_results_topological_similiarity",
  file_refit = "always")

mod <- add_criterion(mod, criterion = c("loo"))

# weakly informative priors
priors <- c(
  prior(normal(0, 5), class = "Intercept"),
  prior(exponential(1), class = "sd")
)

null_mod <- brm(
  formula = dissimilarity ~ 1 + (1 | lek), 
  data = topo_similarity,
  iter = iterations,
  family = gaussian(),
  chains = chains,
  cores = chains,
  prior = priors,
  save_pars = save_pars(all = TRUE),
  control = list(adapt_delta = 0.99, max_treedepth = 15),
  file = "./data/processed/null_regression_results_topological_similiarity",
  file_refit = "always"
)

null_mod <- add_criterion(null_mod, criterion = c("loo"))

Model selection:

Code
mod <- readRDS("./data/processed/regression_results_topological_similiarity.rds")

null_mod <- readRDS("./data/processed/null_regression_results_topological_similiarity.rds")

loo_diff <- loo::loo_compare(mod, null_mod)

as.data.frame(loo_diff[,1:2], row.names = c("model", "null model"))
elpd_diff se_diff
model 0.00000 0.000000
null model -34.39049 9.235735
Code
looic_df <- as.data.frame(rbind(loo(mod)$estimates[3, ],  loo(null_mod)$estimates[3, ]),row.names = c("model", "null model"))

names(looic_df)[1] <- "LOOIC"

looic_df$delta <- looic_df$LOOIC - min(looic_df$LOOIC)

looic_df
LOOIC SE delta
model 637.3852 39.31947 0.00000
null model 706.1661 35.42949 68.78099

Bayes factor:

Code
# Calculate Bayes factor
bf <- bayes_factor(mod, null_mod)
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Code
bf$bf
[1] 40198678
Code
extended_summary(fit = mod,
    n.posterior = 1000,
    fill =  mako(10)[7],
    trace.palette = viridis,
    remove.intercepts = TRUE,
    highlight = TRUE,
    trace = TRUE,
    return = FALSE, 
    print.name = FALSE,
    spread.type = "HPDI"    
)
priors formula iterations chains thinning warmup diverg_transitions rhats > 1.05 min_bulk_ESS min_tail_ESS seed
1 b-normal(0, 2) Intercept-normal(0, 5) sd-exponential(1) sigma-student_t(3, 0, 2.5) dissimilarity ~ alignment + data.set + models + fossils + (1 | lek) 5000 4 1 2500 0 (0%) 0 5569.35 6172.415 1784803372
Estimate l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
b_alignmentoptimal 0.008 -0.047 0.069 1.001 6623.783 7290.831
b_alignmentprank 0.110 0.043 0.180 1 5569.350 6172.415
b_data.setold 0.174 0.130 0.224 1 9212.587 6984.457
b_modelsglobal 0.030 -0.015 0.076 1 10553.290 7260.399
b_fossilsall 0.176 0.119 0.235 1.001 6321.103 6978.831

Contrasts for alignment strategies:

Code
# contrasts
contrasts(
    fit = mod,
    sort.levels = c("prank", "optimal", "all.equal"),
    predictor = "alignment",
    n.posterior = 2000,
    level.sep = " VS ",
    html.table = TRUE,
    plot = TRUE,
    highlight = TRUE,
    fill = mako(10)[7]
)
Contrasts Estimate Est.Error l-95% CI u-95% CI
1 prank VS optimal 0.102 0.029 0.045 0.159
2 prank VS all.equal 0.110 0.035 0.043 0.180
3 optimal VS all.equal 0.008 0.030 -0.049 0.067

Code
custom_ppc(fit = mod, group = "alignment")

Session information

R version 4.4.1 (2024-06-14)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 22.04.4 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=es_CR.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=es_CR.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=es_CR.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C       

time zone: America/Costa_Rica
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] ggridges_0.5.6       cowplot_1.1.3        brmsish_1.0.0       
 [4] brms_2.22.0          Rcpp_1.0.13          PhenotypeSpace_0.1.0
 [7] ape_5.8              ggplot2_3.5.1        kableExtra_1.4.0    
[10] knitr_1.48           viridis_0.6.5        viridisLite_0.4.2   
[13] pbapply_1.7-2       

loaded via a namespace (and not attached):
  [1] tensorA_0.36.2.1       rstudioapi_0.16.0      jsonlite_1.8.9        
  [4] magrittr_2.0.3         TH.data_1.1-2          estimability_1.5.1    
  [7] spatstat.utils_3.1-0   farver_2.1.2           rmarkdown_2.28        
 [10] vctrs_0.6.5            spatstat.explore_3.3-2 terra_1.7-78          
 [13] htmltools_0.5.8.1      curl_5.2.3             distributional_0.5.0  
 [16] tidybayes_3.0.7        raster_3.6-26          StanHeaders_2.32.10   
 [19] KernSmooth_2.23-24     htmlwidgets_1.6.4      plyr_1.8.9            
 [22] sandwich_3.1-0         emmeans_1.10.3         zoo_1.8-12            
 [25] lifecycle_1.0.4        pkgconfig_2.0.3        Matrix_1.7-0          
 [28] R6_2.5.1               fastmap_1.2.0          digest_0.6.37         
 [31] colorspace_2.1-1       CircStats_0.2-6        tensor_1.5            
 [34] vegan_2.6-8            labeling_0.4.3         fansi_1.0.6           
 [37] spatstat.sparse_3.1-0  adehabitatHR_0.4.22    polyclip_1.10-7       
 [40] abind_1.4-8            mgcv_1.9-1             compiler_4.4.1        
 [43] proxy_0.4-27           withr_3.0.1            backports_1.5.0       
 [46] inline_0.3.19          DBI_1.2.3              QuickJSR_1.4.0        
 [49] pkgbuild_1.4.4         MASS_7.3-61            classInt_0.4-10       
 [52] loo_2.8.0              permute_0.9-7          tools_4.4.1           
 [55] units_0.8-5            goftest_1.2-3          glue_1.8.0            
 [58] nlme_3.1-165           grid_4.4.1             sf_1.0-17             
 [61] checkmate_2.3.2        reshape2_1.4.4         cluster_2.1.6         
 [64] ade4_1.7-22            generics_0.1.3         gtable_0.3.5          
 [67] spatstat.data_3.1-2    class_7.3-22           tidyr_1.3.1           
 [70] sp_2.1-4               xml2_1.3.6             utf8_1.2.4            
 [73] spatstat.geom_3.3-2    pillar_1.9.0           ggdist_3.3.2          
 [76] stringr_1.5.1          posterior_1.6.0        splines_4.4.1         
 [79] dplyr_1.1.4            lattice_0.22-6         gghalves_0.1.4        
 [82] survival_3.7-0         deldir_2.0-4           tidyselect_1.2.1      
 [85] arrayhelpers_1.1-0     gridExtra_2.3          V8_4.4.2              
 [88] svglite_2.1.3          stats4_4.4.1           xfun_0.48             
 [91] adehabitatMA_0.3.17    bridgesampling_1.1-2   matrixStats_1.4.1     
 [94] adehabitatLT_0.3.28    rstan_2.32.6           stringi_1.8.4         
 [97] yaml_2.3.10            boot_1.3-30            evaluate_1.0.1        
[100] codetools_0.2-20       tibble_3.2.1           cli_3.6.3             
[103] RcppParallel_5.1.9     xtable_1.8-4           systemfonts_1.1.0     
[106] munsell_0.5.1          spatstat.random_3.3-1  coda_0.19-4.1         
[109] svUnit_1.0.6           spatstat.univar_3.0-1  parallel_4.4.1        
[112] rstantools_2.4.0       bayesplot_1.11.1       Brobdingnag_1.2-9     
[115] mvtnorm_1.3-1          scales_1.3.0           e1071_1.7-16          
[118] purrr_1.0.2            rlang_1.1.4            multcomp_1.4-25