Exploring birdNET embeddings

Heterospecific vocal mimicry in a bellbird

Author

Published

March 5, 2026

Source code and data found at https://github.com/maRce10/vocal_mimicry_bellbird

Code

# options to customize chunk outputs
knitr::opts_chunk$set(
  tidy.opts = list(width.cutoff = 65), 
  tidy = TRUE,
  message = FALSE
 )

Purpose

Evaluate similarity of vocalizations of a mimetic bellbird and its model species using birdNET embeddings

Load packages

Code

# knitr is require for creating html/pdf/word reports
# formatR is used for soft-wrapping code

# install knitr package if not installed
if (!requireNamespace("sketchy", quietly = TRUE)) {
  install.packages("sketchy")
}

packages <- c(
  "knitr",
  "Rtsne",
  "ggplot2",
  "viridis",
  "umap",
  "xgboost",
  "caret",
  "doParallel",
  "reshape2",
  "pROC",
  "smotefamily",
  "brms",
  "cowplot"
)

# install/ load packages
sketchy::load_packages(packages = packages)

Set variables and functions

Code

# colors and shapes by species for scatterplots
cols <- c(adjustcolor("tomato", alpha.f = 0.4), mako(4, alpha = 0.4, begin = 0.2, end = 0.8))

shapes <- c(16, 19, 15, 18, 17)

names(shapes) <- names(cols) <- c(
  "mimetic_bellbird",
  "Procnias tricarunculatus",
  "Psarocolius montezuma",
  "Ramphastos sulfuratus",
  "Pteroglossus torquatus"
)

theme_set(theme_classic(base_size = 20))

# set method parameters
ctrl <- caret::trainControl(method = "repeatedcv", repeats = 5)

plot_overlap_props <- function(df, method = "mcp.overlap", formula = model_sp ~ tsne1 + tsne2){

ovlps <- space_similarity(
  formula,
  data = df,
  nperm = 1000,
  pb = FALSE,
  method = "mcp.overlap",
  trControl = ctrl,
  tuneLength = 10,
  seed = 123
)

ovlps1.2 <- data.frame(group.1 = ovlps$group.1, group.2 = ovlps$group.2, ovlp = ovlps[, 3])
ovlps2.1 <- data.frame(group.1 = ovlps$group.2, group.2 = ovlps$group.1, ovlp = ovlps[, 4])

ovlps <- rbind(ovlps1.2, ovlps2.1)

gg <- ggplot(ovlps, aes(x = group.1, y = group.2, fill = ovlp)) +
  geom_tile() +
  theme_bw() +
  coord_equal() +
    scale_fill_gradient2(low = mako(10)[3], high = mako(10)[7]) +
  geom_text(aes(label = round(ovlp, 2)), color = "black") +
  theme_classic() +
  theme(
    axis.text.x = element_text(
      color = "black",
      size = 11,
      angle = 30,
      vjust = 0.8,
      hjust = 0.8
    )
  ) +
    labs(x = "", y = "", fill = "Proportional\noverlap")

return(gg)
}

# to create several posterior predictive check plots out of a brms fit
custom_ppc <- function(fit, group = NULL, ndraws = 30) {
  by_group  <- if (!is.null(group)) {
    TRUE
  } else
    FALSE
  
  if (by_group)
    by_group  <-  if (any(names(fit$data) == group)) {
      TRUE
    } else
      FALSE
  
  if (by_group)
    by_group <-
      if (is.character(fit$data[, group]) |
          is.factor(fit$data[, group])) {
        TRUE
      } else
        FALSE
  
  
  if (by_group) {
    ppc_dens <- pp_check(fit,
                         ndraws = ndraws,
                         type = 'dens_overlay_grouped',
                         group = group)
    
    pp_mean <- pp_check(
      fit,
      type = "stat_grouped",
      stat = "mean",
      group = group,
      ndraws = ndraws
    )  + theme_classic()
    
    pp_scat <- pp_check(fit, type = "scatter_avg", # group = group,
                        ndraws = ndraws)
  } else {
    ppc_dens <- pp_check(fit, ndraws = ndraws, type = 'dens_overlay')
    
    pp_mean <- pp_check(fit,
                        type = "stat",
                        stat = "mean",
                        ndraws = ndraws) + theme_classic()
    
    pp_scat <-  pp_check(fit, type = "scatter_avg", ndraws = ndraws)
  }
  
  pp_stat2 <- pp_check(fit, type = "stat_2d", ndraws = ndraws)
  
  pp_plot_list <-
    list(ppc_dens, pp_mean, pp_scat, pp_stat2)
  
  pp_plot_list[c(1, 3:4)] <-
    lapply(pp_plot_list[c(1, 3:4)], function(x)
      x  + scale_color_viridis_d(
        begin = 0.3,
        end = 0.8,
        alpha = 0.5,
        option = "mako",
      ) + scale_fill_viridis_d(
        begin = 0.3,
        end = 0.8,
        alpha = 0.5,
        option = "mako"
      ) + theme_classic())
  
  
  ppc_plot <- plot_grid(plotlist = pp_plot_list, ncol = 2)
  
  print(ppc_plot)
}

1 Import data

Add metadata:

Code

raw_embs <- read.csv("data/processed/birdnet_predictions_and_embeddings.csv")

bb_est <- readRDS(
  "./data/processed/extended_selection_table_high_snr_models_and_allmimetic_sounds.RDS"
)

#add source
raw_embs$source <- sapply(raw_embs$sound_file, function(x) bb_est$source[paste0(attr(bb_est, "check.res")$sound.files, ".wav") == x][1])
raw_embs$model.species <- sapply(raw_embs$sound_file, function(x) bb_est$Model.species[paste0(attr(bb_est, "check.res")$sound.files, ".wav") == x][1])
raw_embs$vocalization <- sapply(raw_embs$sound_file, function(x) bb_est$Vocalization[paste0(attr(bb_est, "check.res")$sound.files, ".wav") == x][1])
raw_embs$species.vocalization <- sapply(raw_embs$sound_file, function(x) bb_est$species.vocalization[paste0(attr(bb_est, "check.res")$sound.files, ".wav") == x][1])

raw_embs$orig.sound.files <- sapply(raw_embs$sound_file, function(x) attr(bb_est, "check.res")$orig.sound.files[paste0(attr(bb_est, "check.res")$sound.files, ".wav") == x][1])

write.csv(raw_embs, "data/processed/birdnet_predictions_and_embeddings_with_metadata.csv", row.names = FALSE)

Sample sizes:

Code

embs <- read.csv("data/processed/birdnet_predictions_and_embeddings_with_metadata.csv")

# select only model species
model_sp_embs <- embs[grep("mimetic|babbling", embs$species.vocalization, invert = TRUE), ]

table(embs$source)


        mimetic_bellbird Procnias tricarunculatus    Psarocolius montezuma 
                    1660                      715                       74 
  Pteroglossus torquatus    Ramphastos sulfuratus 
                      64                     1272

Code

table(embs$species.vocalization)


       mimetic_P.tricarunculatus-adult    mimetic_P.tricarunculatus-aggresive 
                                   250                                     46 
    mimetic_P.tricarunculatus-babbling  mimetic_P.tricarunculatus-P.montezuma 
                                   177                                     17 
 mimetic_P.tricarunculatus-P.torquatus mimetic_P.tricarunculatus-R.sulfuratus 
                                   224                                    920 
     mimetic_P.tricarunculatus-whistle                      P.montezuma-model 
                                    26                                     74 
                     P.torquatus-model             P.tricarunculatus-babbling 
                                    64                                     16 
          P.tricarunculatus-Monteverde            P.tricarunculatus-Nicaragua 
                                   287                                    225 
              P.tricarunculatus-Panama            P.tricarunculatus-Talamanca 
                                    55                                    132 
                    R.sulfuratus-model 
                                  1272

Code

# Find column indices that match pattern
embedding_columns <- grep("^logit_", names(embs), value = TRUE)

# Check results
cat("Number of embedding columns:", length(embedding_columns), "\n")

Number of embedding columns: 6522

1.1 T-sne for model species

Perplexity = 30

Code

tsne_out <- Rtsne(as.matrix(model_sp_embs[, embedding_columns]))

tsne_model_species <- data.frame(model_sp = model_sp_embs$source, tsne1 = tsne_out$Y[,1], tsne2 = tsne_out$Y[,2])

saveRDS(tsne_model_species, "data/processed/tsne_dims_model_species.rds")

Code

tsne_only_model_species <- readRDS("data/processed/tsne_dims_model_species.rds")

ggplot(tsne_only_model_species, aes(x = tsne1, y = tsne2, color = model_sp, shape = model_sp)) +
  geom_point(size = 2) +
  theme_minimal() +
  labs(x = "t-SNE 1", y = "t-SNE 2", color = "", shape = "") +
    scale_color_manual(values = cols[names(cols) %in% unique(tsne_only_model_species$model_sp)]) +
    scale_shape_manual(values = shapes[names(shapes) %in% unique(tsne_only_model_species$model_sp)]) +
    theme_classic()

Perplexity = 5

T-sne including mimetic bellbird

Code

embs_mimetic_and_models <- embs[grep("babbling", embs$species.vocalization, invert = TRUE), ]

tsne_out <- Rtsne(as.matrix(embs_mimetic_and_models[, embedding_columns]),  num_threads = 30, perplexity = 5)

tsne_all <- data.frame(model_sp = embs_mimetic_and_models$source, sp.vocalization = embs_mimetic_and_models$species.vocalization, tsne1 = tsne_out$Y[,1], tsne2 = tsne_out$Y[,2])

saveRDS(tsne_all, "data/processed/tsne_dims_all_low_perplexity.rds")

Plotting only model species

Code

tsne_all <- readRDS("data/processed/tsne_dims_all_low_perplexity.rds")

tsne_model_species <- tsne_all[grep("mimetic|babbling", tsne_all$sp.vocalization, invert = TRUE), ]

ggplot(tsne_model_species, aes(x = tsne1, y = tsne2, color = model_sp, shape = model_sp)) +
  geom_point(size = 2) +
  theme_minimal() +
  labs(x = "t-SNE 1", y = "t-SNE 2", color = "", shape = "") +
    scale_color_manual(values = cols[names(cols) %in% unique(tsne_model_species$model_sp)]) +
      scale_shape_manual(values = shapes[names(shapes) %in% unique(tsne_model_species$model_sp)]) +
    theme_classic()

Including mimetic bellbird

Code

ggplot(tsne_all, aes(x = tsne1, y = tsne2, color = model_sp, shape = model_sp)) +
  geom_point(size = 2) +
  theme_minimal() +
  labs(x = "t-SNE 1", y = "t-SNE 2", color = "", shape = "") +
    scale_color_manual(values = cols[names(cols) %in% unique(tsne_all$model_sp)]) +
      scale_shape_manual(values = shapes[names(shapes) %in% unique(tsne_all$model_sp)]) +
    theme_classic()

Perplexity = 30

T-sne including mimetic bellbird

Code

embs_mimetic_and_models <- embs[grep("babbling", embs$species.vocalization, invert = TRUE), ]

tsne_out <- Rtsne(as.matrix(embs_mimetic_and_models[, embedding_columns]),  num_threads = 30, perplexity = 30)

tsne_all <- data.frame(model_sp = embs_mimetic_and_models$source, sp.vocalization = embs_mimetic_and_models$species.vocalization, tsne1 = tsne_out$Y[,1], tsne2 = tsne_out$Y[,2])

saveRDS(tsne_all, "data/processed/tsne_dims_all_mid_perplexity.rds")

Plotting only model species

Code

tsne_all <- readRDS("data/processed/tsne_dims_all_mid_perplexity.rds")

tsne_model_species <- tsne_all[grep("mimetic|babbling", tsne_all$sp.vocalization, invert = TRUE), ]

ggplot(tsne_model_species, aes(x = tsne1, y = tsne2, color = model_sp, shape = model_sp)) +
  geom_point(size = 2) +
  theme_minimal() +
  labs(x = "t-SNE 1", y = "t-SNE 2", color = "", shape = "") +
    scale_color_manual(values = cols[names(cols) %in% unique(tsne_model_species$model_sp)]) +
      scale_shape_manual(values = shapes[names(shapes) %in% unique(tsne_model_species$model_sp)]) +
    theme_classic()

Including mimetic bellbird

Code

ggplot(tsne_all, aes(x = tsne1, y = tsne2, color = model_sp, shape = model_sp)) +
  geom_point(size = 2) +
  theme_minimal() +
  labs(x = "t-SNE 1", y = "t-SNE 2", color = "", shape = "") +
    scale_color_manual(values = cols[names(cols) %in% unique(tsne_all$model_sp)]) +
      scale_shape_manual(values = shapes[names(shapes) %in% unique(tsne_all$model_sp)]) +
    theme_classic()

Perplexity = 50

T-sne including mimetic bellbird

Code

embs_mimetic_and_models <- embs[grep("babbling", embs$species.vocalization, invert = TRUE), ]

tsne_out <- Rtsne(as.matrix(embs_mimetic_and_models[, embedding_columns]),  num_threads = 30, perplexity = 50)

tsne_all <- data.frame(
  model_sp = embs_mimetic_and_models$source,
  sp.vocalization = embs_mimetic_and_models$species.vocalization,
  tsne1 = tsne_out$Y[, 1],
  tsne2 = tsne_out$Y[, 2]
)

saveRDS(tsne_all, "data/processed/tsne_dims_all_high_perplexity.rds")

Plotting only model species

Code

tsne_all <- readRDS("data/processed/tsne_dims_all_high_perplexity.rds")

tsne_model_species <- tsne_all[grep("mimetic|babbling", tsne_all$sp.vocalization, invert = TRUE), ]

ggplot(tsne_model_species, aes(x = tsne1, y = tsne2, color = model_sp, shape = model_sp)) +
  geom_point(size = 2) +
  theme_minimal() +
  labs(x = "t-SNE 1", y = "t-SNE 2", color = "", shape = "") +
    scale_color_manual(values = cols[names(cols) %in% unique(tsne_model_species$model_sp)]) +
      scale_shape_manual(values = shapes[names(shapes) %in% unique(tsne_model_species$model_sp)]) +
    theme_classic()

Including mimetic bellbird

Code

ggplot(tsne_all, aes(x = tsne1, y = tsne2, color = model_sp, shape = model_sp)) +
  geom_point(size = 2) +
  theme_minimal() +
  labs(x = "t-SNE 1", y = "t-SNE 2", color = "", shape = "") +
    scale_color_manual(values = cols[names(cols) %in% unique(tsne_all$model_sp)]) +
      scale_shape_manual(values = shapes[names(shapes) %in% unique(tsne_all$model_sp)]) +
    theme_classic()

1.2 UMAP

Code

# Configure UMAP
# n_neighbors is the most important parameter (similar to t-SNE perplexity)
umap_config <- umap.defaults
umap_config$n_neighbors <- 15  # Default is 15, good starting point
umap_config$min_dist <- 0.1     # Minimum distance between points (default 0.1)
umap_config$n_components <- 2   # 2D projection
umap_config$random_state <- 42  # For reproducibility

# Run UMAP
set.seed(42)

embs_mimetic_and_models <- embs[grep("babbling", embs$species.vocalization, invert = TRUE), ]

umap_result <- umap(
  as.matrix(embs_mimetic_and_models[, embedding_columns]),
  config = umap_config
)

# Create dataframe for plotting
umap_all <- data.frame(
  model_sp = embs_mimetic_and_models$source,
  sp.vocalization = embs_mimetic_and_models$species.vocalization,
  umap1 = umap_result$layout[, 1],
  umap2 = umap_result$layout[, 2]
)

saveRDS(umap_all, "data/processed/umap_dims_all.rds")

Plotting only model species

Code

umap_all <- readRDS("data/processed/umap_dims_all.rds")

umap_model_species <- umap_all[grep("mimetic|babbling", umap_all$sp.vocalization, invert = TRUE), ]

ggplot(umap_model_species, aes(x = umap1, y = umap2, color = model_sp, shape = model_sp)) +
  geom_point(size = 2) +
  theme_minimal() +
  labs(x = "UMAP 1", y = "UMAP 2", color = "", shape = "") +
    scale_color_manual(values = cols[names(cols) %in% unique(umap_model_species$model_sp)]) +
      scale_shape_manual(values = shapes[names(shapes) %in% unique(umap_model_species$model_sp)]) +
    theme_classic()

Code

ggplot(umap_all, aes(x = umap1, y = umap2, color = model_sp, shape = model_sp)) +
  geom_point(size = 2) +
  theme_minimal() +
  labs(x = "UMAP 1", y = "UMAP 2", color = "", shape = "") +
    scale_color_manual(values = cols[names(cols) %in% unique(umap_all$model_sp)]) +
      scale_shape_manual(values = shapes[names(shapes) %in% unique(umap_all$model_sp)]) +
    theme_classic()

2 k-nearest neighbor classification

2.1 Prepare data

Code

# Separate features and labels
model_sp_embs <- embs[grep("mimetic|babbling", embs$species.vocalization, invert = TRUE), ]

X <- as.matrix(model_sp_embs[, embedding_columns])
y <- model_sp_embs$source


# Convert to factor (ensure consistent levels)
y <- as.factor(y)
cat("Original class distribution:\n")

Original class distribution:

Code

table(y)

y
Procnias tricarunculatus    Psarocolius montezuma   Pteroglossus torquatus 
                     699                       74                       64 
   Ramphastos sulfuratus 
                    1272

2.2 Train/test split

Code

# use caret to create a 80-20 split of the data for training and testing the model, stratified by source + sound file (species), keeping entire orig.sound.files levels for testing
unique_orig_sound_files <- as.data.frame(model_sp_embs[
  !duplicated(model_sp_embs$orig.sound.files),
  c("orig.sound.files", "source")
])

set.seed(123)

# exclude pteroglossus to add it manually
train_sound_files <- createDataPartition(
  unique_orig_sound_files$source,
  p = .8,
  list = FALSE,
  times = 1
)

train_sound_files <- train_sound_files[unique_orig_sound_files$source[train_sound_files] != "Pteroglossus torquatus"]

# leave  c("Campan_0128.wav", "Campan_0129.wav") for testing
train_sound_files <- c(train_sound_files, which(unique_orig_sound_files$orig.sound.files %in% c("20200805-062852.wav", "20200805-063013.wav", "20200805-063224.wav", "Campan_0003.wav", "Campan_0127.wav", "Pteroglossus-torquatus-154157.wav", "Pteroglossus-torquatus-439335.wav", "Pteroglossus-torquatus-489619.wav", "Pteroglossus-torquatus-526136.wav")))

train_index <- which(model_sp_embs$orig.sound.files %in%
    unique_orig_sound_files$orig.sound.files[train_sound_files])

X_train <- X[train_index, ]
X_test <- X[-train_index, ]
y_train <- y[train_index]
y_test <- y[-train_index]

cat("\nTraining set size:", nrow(X_train))


Training set size: 1719

Code

cat("\nTest set size:", nrow(X_test))


Test set size: 390

Code

cat("\n\nTraining class distribution:\n")



Training class distribution:

Code

print(table(y_train))

y_train
Procnias tricarunculatus    Psarocolius montezuma   Pteroglossus torquatus 
                     566                       64                       56 
   Ramphastos sulfuratus 
                    1033

2.3 Standarize features

Code

# Calculate mean and SD from training set only (to avoid data leakage)
train_mean <- colMeans(X_train)
train_sd <- apply(X_train, 2, sd)

# Standardize both sets
X_train_scaled <- scale(X_train, center = train_mean, scale = train_sd)
X_test_scaled <- scale(X_test, center = train_mean, scale = train_sd)

# Check for any features with zero variance and remove them
zero_var <- which(train_sd == 0 | is.na(train_sd))
if(length(zero_var) > 0) {
  cat("\nRemoving", length(zero_var), "zero-variance features\n")
  X_train_scaled <- X_train_scaled[, -zero_var]
  X_test_scaled <- X_test_scaled[, -zero_var]
}

2.4 Run k-nearest neighbor classification

Code

train_data <- data.frame(X_train_scaled, source = gsub(" ", "_", as.character(y_train)))


# k nearest neighbor with caret
set.seed(123)


ctrl <- trainControl(
  method = "repeatedcv",
  number = 10,
  repeats = 5,
  sampling = "down",      # this handles imbalance
  classProbs = TRUE
)

set.seed(123)

knn_model <- train(
  source ~ .,
  data = train_data,
  method = "knn",
  trControl = ctrl,
  tuneLength = 20
)

saveRDS(knn_model, "data/processed/knn_model.rds")

Code

knn_model <- readRDS("data/processed/knn_model.rds")

test_data <- data.frame(X_test_scaled, source = gsub(" ", "_", as.character(y_test)))

pred_test <- predict(knn_model, newdata = test_data)


y_test <- gsub(" ", "_", as.character(y_test))
conf_mat <- confusionMatrix(pred_test, as.factor(y_test))


conf_df <- as.data.frame(conf_mat$table)

conf_df$total <- sapply(conf_df$Reference, function(x)
    sum(y_test ==
            x))

conf_df$proportion <- conf_df$Freq / conf_df$total

conf_df <- conf_df[complete.cases(conf_df), ]

conf_df$Reference <- gsub("_", " ", conf_df$Reference)
conf_df$Prediction <- gsub("_", " ", conf_df$Prediction)

ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
    geom_tile() + theme_bw() + coord_equal() +     scale_fill_gradient2(low = mako(10)[3], high = mako(10)[7]) + geom_text(aes(label = round(proportion, 2)), color = "black") + labs(x = "Reference model species", y = "Predicted model species") +
    theme_classic() + theme(axis.text.x = element_text(
        color = "black",
        size = 11,
        angle = 30,
        vjust = 0.8,
        hjust = 0.8
    ))

2.4.1 Predict mimetic bellbird vocalizations and evaluate similarity to model species

Code

embs_mimetic_and_models <- embs[grep("babbling", embs$species.vocalization, invert = TRUE), ]

X_mimic_labeled <- embs_mimetic_and_models[embs_mimetic_and_models$source == "mimetic_bellbird" & embs_mimetic_and_models$model.species != "", ]

X_mimic <- X_mimic_labeled[, embedding_columns]

X_mimic_scaled <- scale(X_mimic, center = train_mean, scale = train_sd)


pred_mimic <- X_mimic_labeled$pred_mimic <- predict(knn_model, newdata = X_mimic_scaled)

## ggplot bars for predicted species
ggplot(data.frame(pred_mimic), aes(x = pred_mimic)) +
  geom_bar(fill = mako(10)[9]) +
  theme_classic() +
  labs(x = "Predicted model species", y = "Count") +
  theme(axis.text.x = element_text(
    color = "black",
    size = 11,
    angle = 30,
    vjust = 0.8,
    hjust = 0.8
  )) +
  scale_x_discrete(labels = function(x) gsub("_", " ", x))

2.4.2 Compare to manual labels

Code

y_mimic <- X_mimic_labeled$model.species
y_mimic <- gsub(" ", "_", as.character(y_mimic))
conf_mat <- confusionMatrix(pred_mimic, as.factor(y_mimic))


conf_df <- as.data.frame(conf_mat$table)

conf_df$total <- sapply(conf_df$Reference, function(x)
    sum(y_mimic ==
            x))

conf_df$proportion <- conf_df$Freq / conf_df$total

conf_df <- conf_df[complete.cases(conf_df), ]

conf_df$Reference <- gsub("_", " ", conf_df$Reference)
conf_df$Prediction <- gsub("_", " ", conf_df$Prediction)

ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
    geom_tile() + theme_bw() + coord_equal() +     scale_fill_gradient2(low = mako(10)[3], high = mako(10)[7]) + geom_text(aes(label = round(proportion, 2)), color = "black") + labs(x = "Reference model species", y = "Predicted model species") +
    theme_classic() + theme(axis.text.x = element_text(
        color = "black",
        size = 11,
        angle = 30,
        vjust = 0.8,
        hjust = 0.8
    ))

3 Explore mimicry over time

Proportion of predicted model species per recording date, including only dates with at least 10 recordings of model species (to avoid noise from small sample sizes)

Code

embs_babling <- embs[grepl("babbling", embs$species.vocalization) & embs$source == "mimetic_bellbird", ]

embs_babling$pred_mimic <- "babbling"

mimic_data <- rbind(embs_babling[, names(X_mimic_labeled)], X_mimic_labeled)

rec_info <- read.table("./data/raw/Recording_dates_.csv", head = TRUE, sep = ";")

rec_info$date <- gsub("\\/", "-", rec_info$date)
rec_info$date <- gsub("out", "ago", rec_info$date)
rec_info$date <- gsub("set", "sep", rec_info$date)

rec_info$org.date <- rec_info$date
rec_info$date <- as.Date(rec_info$date, format = "%d-%b-%y")


mimic_data$date <- sapply(mimic_data$orig.sound.files, function(x) as.character(rec_info$date[rec_info$sound.files ==
    x][1]))

mimic_data$date <- as.Date(mimic_data$date)

agg_pred <- aggregate(orig.sound.files ~ date + pred_mimic, mimic_data, length)


out <- lapply(unique(agg_pred$date), function(x) {

    X <- agg_pred[agg_pred$date == x, ]
    X$prop <- X$orig.sound.files/sum(X$orig.sound.files)

    
    if (length(unique(X$pred_mimic)) < 4) {

        new_df <- data.frame(date = X$date[1], pred_mimic = setdiff(c("Procnias_tricarunculatus",
            "Pteroglossus_torquatus", "Ramphastos_sulfuratus", "Psarocolius_montezuma", "babbling"), X$pred_mimic),
            orig.sound.files = 0, prop = 0)
  
        X <- rbind(new_df, X)
    }
    
      X$total <- sum(X$orig.sound.files)
    return(X)
})

prop_pred <- do.call(rbind, out)

prop_pred$date.fact <- as.factor(as.character(prop_pred$date))

count_date <- aggregate(orig.sound.files ~ date, prop_pred, sum)


ggplot(prop_pred[prop_pred$date %in% count_date$date[count_date$orig.sound.files >=
    10], ], aes(x = date.fact, y = prop, fill = pred_mimic)) + geom_bar(stat = "identity") +
    scale_fill_viridis_d(alpha = 0.7, end = 0.8, option = "G") + theme_classic() + theme(axis.text.x = element_text(angle = 45,
    vjust = 0.5))

Excluding P. montezuma, which is the only model species that is rarely predicted in the mimetic bellbird vocalizations

Code

ggplot(
  prop_pred[
    prop_pred$date %in%
      count_date$date[count_date$orig.sound.files >= 10] &
      prop_pred$pred_mimic != "Psarocolius_montezuma",
  ],
  aes(x = date.fact, y = prop, fill = pred_mimic)
) +
  geom_bar(stat = "identity") +
  scale_fill_viridis_d(alpha = 0.7, end = 0.8, option = "G") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5))

3.1 Regression model

Code

# weakly informative priors
priors <- c(
# Weakly informative logistic intercept
prior(normal(0, 2.5), class = "Intercept"),
# Temporal slope
prior(normal(0, 1), class = "b"),
# Random effect standard deviations
prior(exponential(1), class = "sd")
)

dat <- prop_pred[prop_pred$pred_mimic != "Psarocolius_montezuma",]

dat$julian_day <- as.numeric(format(as.Date(dat$date), "%j"))

fit <- brm(
  orig.sound.files | trials(total) ~ 
    scale(julian_day) +
    (1 + scale(julian_day) | pred_mimic),
  family = binomial,
  data = dat,
  prior = priors,
  chains = 4,
  cores = 4,
  iter = 4000,
  control = list(adapt_delta = 0.95), file_refit = "on_change",
  file = "data/processed/brms_model_prop_pred_mimicry.rds"
)

null_priors <- c(
# Weakly informative logistic intercept
prior(normal(0, 2.5), class = "Intercept"),

# Random effect standard deviations
prior(exponential(1), class = "sd")
)

null_fit <- brm(
  orig.sound.files | trials(total) ~ 
    1 +
    (1 | pred_mimic),
  family = binomial,
  data = dat,
  prior = null_priors,
  chains = 4,
  cores = 4,
  iter = 4000,
  control = list(adapt_delta = 0.95),
  file_refit = "on_change",
  file = "data/processed/brms_null_model_prop_pred_mimicry.rds"
)

Code

fit <- readRDS("data/processed/brms_model_prop_pred_mimicry.rds")

# Extract posterior draws as base R data.frame
post <- as.data.frame(fit)

# Get species levels
species_levels <- unique(fit$data$pred_mimic)

# Global slope
beta_global <- post[, "b_scalejulian_day"]
# Find all random slope columns automatically
rand_cols <- grep("^r_pred_mimic\\[.*,scalejulian_day\\]$",
names(post),
value = TRUE)
# Extract species names from column names
species_levels <- sub("r_pred_mimic\\[(.*),scalejulian_day\\]", "\\1", rand_cols)


# Storage matrix
results <- matrix(NA, nrow = length(species_levels), ncol = 3)
colnames(results) <- c("mean_slope",
                       "l95",
                       "u95")

rownames(results) <- species_levels


# Loop through species
for (i in seq_along(rand_cols)) {

  slope_draws <- beta_global + post[, rand_cols[i]]

  results[i, "mean_slope"]    <- mean(slope_draws)
  results[i, "l95"]           <- quantile(slope_draws, 0.025)
  results[i, "u95"]           <- quantile(slope_draws, 0.975)
}

# Print results
print(round(results, 4))

                         mean_slope     l95     u95
babbling                    -0.0700 -0.2415  0.0987
Procnias_tricarunculatus     1.1138  0.8618  1.3894
Pteroglossus_torquatus       0.1652  0.0068  0.3295
Ramphastos_sulfuratus       -0.3854 -0.5018 -0.2693

Code

dat <- prop_pred[prop_pred$pred_mimic != "Psarocolius_montezuma",]

dat$julian_day <- as.numeric(format(as.Date(dat$date), "%j"))


# Create prediction grid
newdata <- expand.grid(
  julian_day = seq(min(dat$julian_day),
                   max(dat$julian_day),
                   length.out = 100),
  pred_mimic = unique(dat$pred_mimic)
)

# IMPORTANT: match scaling used in model
newdata$scalejulian_day <- scale(dat$julian_day)[1]  # dummy to get attributes
newdata$scalejulian_day <- (newdata$julian_day - mean(dat$julian_day)) /
                           sd(dat$julian_day)

newdata$total <- round(mean(dat$total), 0)  # use mean total for predictions

# Get posterior expected probabilities
epred <- posterior_epred(
  fit,
  newdata = newdata,
  re_formula = NULL
)

# Posterior mean prediction
newdata$fit <- colMeans(epred)

# Convert julian day back to date for plotting
origin_year <- format(as.Date(dat$date[1]), "%Y")
newdata$date <- as.Date(newdata$julian_day - 1,
                        origin = paste0(origin_year, "-01-01"))

newdata$fit_prop <- newdata$fit / 100

# include only those for which CI did not include 0
results_df <- as.data.frame(results)
sig_spp <- rownames(results_df)[results_df$l95 * results_df$u95 > 0]

newdata <- newdata[newdata$pred_mimic %in% sig_spp, ]

# Plot
ggplot(
  dat[dat$prop > 0, ],
  aes(
    x = as.Date(date),
    y = prop,
    fill = pred_mimic,
    col = pred_mimic,
    size = orig.sound.files
  )
) +
  geom_point(pch = 21) +
  geom_line(
    data = newdata,
    aes(x = date, y = fit_prop, color = pred_mimic),
    size = 1.2
  ) +
  scale_fill_viridis_d(alpha = 0.6, end = 0.8) +
  scale_color_viridis_d(alpha = 0.8, end = 0.8) +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5)) +
  labs(
    x = "Date",
    y = "Proportion of vocalizations",
    fill = "Predicted species",
    color = "Predicted species",
    size = "Number of recorded\nvocalizations"
  )

Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

Takeaways

Session information

─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.5.2 (2025-10-31)
 os       Ubuntu 22.04.4 LTS
 system   x86_64, linux-gnu
 ui       X11
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       America/Costa_Rica
 date     2026-03-05
 pandoc   3.6.3 @ /usr/lib/rstudio/resources/app/bin/quarto/bin/tools/x86_64/ (via rmarkdown)
 quarto   1.8.25 @ /usr/lib/rstudio/resources/app/bin/quarto/bin/quarto

─ Packages ───────────────────────────────────────────────────────────────────
 package        * version    date (UTC) lib source
 abind            1.4-8      2024-09-12 [1] CRAN (R 4.5.2)
 askpass          1.2.1      2024-10-04 [1] CRAN (R 4.5.2)
 backports        1.5.0      2024-05-23 [1] CRAN (R 4.5.2)
 bayesplot        1.15.0     2025-12-12 [1] CRAN (R 4.5.2)
 bridgesampling   1.2-1      2025-11-19 [1] CRAN (R 4.5.2)
 brms           * 2.23.0     2025-09-09 [1] CRAN (R 4.5.2)
 Brobdingnag      1.2-9      2022-10-19 [1] CRAN (R 4.5.2)
 cachem           1.1.0      2024-05-16 [1] CRAN (R 4.5.2)
 caret          * 7.0-1      2024-12-10 [1] CRAN (R 4.5.2)
 checkmate        2.3.4      2026-02-03 [1] CRAN (R 4.5.2)
 class            7.3-23     2025-01-01 [1] CRAN (R 4.5.2)
 cli              3.6.5      2025-04-23 [1] CRAN (R 4.5.2)
 coda             0.19-4.1   2024-01-31 [1] CRAN (R 4.5.2)
 codetools        0.2-20     2024-03-31 [1] CRAN (R 4.5.2)
 cowplot        * 1.2.0      2025-07-07 [1] CRAN (R 4.5.2)
 crayon           1.5.3      2024-06-20 [1] CRAN (R 4.5.2)
 data.table       1.18.2.1   2026-01-27 [1] CRAN (R 4.5.2)
 dbscan           1.2.4      2025-12-19 [1] CRAN (R 4.5.2)
 devtools         2.4.6      2025-10-03 [1] CRAN (R 4.5.2)
 digest           0.6.39     2025-11-19 [1] CRAN (R 4.5.2)
 distributional   0.6.0      2026-01-14 [1] CRAN (R 4.5.2)
 doParallel     * 1.0.17     2022-02-07 [1] CRAN (R 4.5.2)
 dplyr            1.2.0      2026-02-03 [1] CRAN (R 4.5.2)
 e1071            1.7-17     2025-12-18 [1] CRAN (R 4.5.2)
 ellipsis         0.3.2      2021-04-29 [3] CRAN (R 4.1.1)
 evaluate         1.0.5      2025-08-27 [1] CRAN (R 4.5.2)
 farver           2.1.2      2024-05-13 [1] CRAN (R 4.5.2)
 fastmap          1.2.0      2024-05-15 [1] CRAN (R 4.5.2)
 FNN              1.1.4.1    2024-09-22 [1] CRAN (R 4.5.2)
 foreach        * 1.5.2      2022-02-02 [3] CRAN (R 4.1.2)
 fs               1.6.6      2025-04-12 [1] CRAN (R 4.5.2)
 future           1.69.0     2026-01-16 [1] CRAN (R 4.5.2)
 future.apply     1.20.2     2026-02-20 [1] CRAN (R 4.5.2)
 generics         0.1.4      2025-05-09 [1] CRAN (R 4.5.2)
 ggplot2        * 4.0.2      2026-02-03 [1] CRAN (R 4.5.2)
 globals          0.19.0     2026-02-02 [1] CRAN (R 4.5.2)
 glue             1.8.0      2024-09-30 [1] CRAN (R 4.5.2)
 gower            1.0.2      2024-12-17 [1] CRAN (R 4.5.2)
 gridExtra        2.3        2017-09-09 [3] CRAN (R 4.0.1)
 gtable           0.3.6      2024-10-25 [1] CRAN (R 4.5.2)
 hardhat          1.4.2      2025-08-20 [1] CRAN (R 4.5.2)
 htmltools        0.5.9      2025-12-04 [1] CRAN (R 4.5.2)
 htmlwidgets      1.6.4      2023-12-06 [1] CRAN (R 4.5.2)
 igraph           2.2.2      2026-02-12 [1] CRAN (R 4.5.2)
 inline           0.3.21     2025-01-09 [1] CRAN (R 4.5.2)
 ipred            0.9-15     2024-07-18 [1] CRAN (R 4.5.2)
 iterators      * 1.0.14     2022-02-05 [3] CRAN (R 4.1.2)
 jsonlite         2.0.0      2025-03-27 [1] CRAN (R 4.5.2)
 knitr          * 1.51       2025-12-20 [1] CRAN (R 4.5.2)
 labeling         0.4.3      2023-08-29 [1] CRAN (R 4.5.2)
 lattice        * 0.22-9     2026-02-09 [1] CRAN (R 4.5.2)
 lava             1.8.2      2025-10-30 [1] CRAN (R 4.5.2)
 lifecycle        1.0.5      2026-01-08 [1] CRAN (R 4.5.2)
 listenv          0.10.0     2025-11-02 [1] CRAN (R 4.5.2)
 loo              2.9.0      2025-12-23 [1] CRAN (R 4.5.2)
 lubridate        1.9.5      2026-02-04 [1] CRAN (R 4.5.2)
 magrittr         2.0.4      2025-09-12 [1] CRAN (R 4.5.2)
 MASS             7.3-65     2025-02-28 [1] CRAN (R 4.5.2)
 Matrix           1.7-4      2025-08-28 [1] CRAN (R 4.5.2)
 matrixStats      1.5.0      2025-01-07 [1] CRAN (R 4.5.2)
 memoise          2.0.1      2021-11-26 [3] CRAN (R 4.1.2)
 ModelMetrics     1.2.2.2    2020-03-17 [3] CRAN (R 4.0.1)
 mvtnorm          1.3-3      2025-01-10 [1] CRAN (R 4.5.2)
 nlme             3.1-168    2025-03-31 [1] CRAN (R 4.5.2)
 nnet             7.3-20     2025-01-01 [1] CRAN (R 4.5.2)
 openssl          2.3.5      2026-02-26 [1] CRAN (R 4.5.2)
 otel             0.2.0      2025-08-29 [1] CRAN (R 4.5.2)
 packrat          0.9.3      2025-06-16 [1] CRAN (R 4.5.2)
 parallelly       1.46.1     2026-01-08 [1] CRAN (R 4.5.2)
 pillar           1.11.1     2025-09-17 [1] CRAN (R 4.5.2)
 pkgbuild         1.4.8      2025-05-26 [1] CRAN (R 4.5.2)
 pkgconfig        2.0.3      2019-09-22 [3] CRAN (R 4.0.1)
 pkgload          1.5.0      2026-02-03 [1] CRAN (R 4.5.2)
 plyr             1.8.9      2023-10-02 [1] CRAN (R 4.5.2)
 png              0.1-8      2022-11-29 [1] CRAN (R 4.5.2)
 posterior        1.6.1      2025-02-27 [1] CRAN (R 4.5.2)
 pROC           * 1.19.0.1   2025-07-31 [1] CRAN (R 4.5.2)
 prodlim          2025.04.28 2025-04-28 [1] CRAN (R 4.5.2)
 proxy            0.4-29     2025-12-29 [1] CRAN (R 4.5.2)
 purrr            1.2.1      2026-01-09 [1] CRAN (R 4.5.2)
 QuickJSR         1.9.0      2026-01-25 [1] CRAN (R 4.5.2)
 R6               2.6.1      2025-02-15 [1] CRAN (R 4.5.2)
 RColorBrewer     1.1-3      2022-04-03 [1] CRAN (R 4.5.2)
 Rcpp           * 1.1.1      2026-01-10 [1] CRAN (R 4.5.2)
 RcppParallel     5.1.11-2   2026-03-05 [1] CRAN (R 4.5.2)
 recipes          1.3.1      2025-05-21 [1] CRAN (R 4.5.2)
 remotes          2.5.0      2024-03-17 [1] CRAN (R 4.5.2)
 reshape2       * 1.4.5      2025-11-12 [1] CRAN (R 4.5.2)
 reticulate       1.45.0     2026-02-13 [1] CRAN (R 4.5.2)
 rlang            1.1.7      2026-01-09 [1] CRAN (R 4.5.2)
 rmarkdown        2.30       2025-09-28 [1] CRAN (R 4.5.2)
 rpart            4.1.24     2025-01-07 [1] CRAN (R 4.5.2)
 RSpectra         0.16-2     2024-07-18 [1] CRAN (R 4.5.2)
 rstan            2.32.7     2025-03-10 [1] CRAN (R 4.5.2)
 rstantools       2.6.0      2026-01-10 [1] CRAN (R 4.5.2)
 rstudioapi       0.18.0     2026-01-16 [1] CRAN (R 4.5.2)
 Rtsne          * 0.17       2023-12-07 [1] CRAN (R 4.5.2)
 S7               0.2.1      2025-11-14 [1] CRAN (R 4.5.2)
 scales           1.4.0      2025-04-24 [1] CRAN (R 4.5.2)
 sessioninfo      1.2.3      2025-02-05 [1] CRAN (R 4.5.2)
 sketchy          1.0.7      2026-03-03 [1] CRANs (R 4.5.2)
 smotefamily    * 1.4.0      2024-03-14 [1] CRAN (R 4.5.2)
 StanHeaders      2.32.10    2024-07-15 [1] CRAN (R 4.5.2)
 stringi          1.8.7      2025-03-27 [1] CRAN (R 4.5.2)
 stringr          1.6.0      2025-11-04 [1] CRAN (R 4.5.2)
 survival         3.8-6      2026-01-16 [1] CRAN (R 4.5.2)
 tensorA          0.36.2.1   2023-12-13 [1] CRAN (R 4.5.2)
 tibble           3.3.1      2026-01-11 [1] CRAN (R 4.5.2)
 tidyselect       1.2.1      2024-03-11 [1] CRAN (R 4.5.2)
 timechange       0.4.0      2026-01-29 [1] CRAN (R 4.5.2)
 timeDate         4052.112   2026-01-28 [1] CRAN (R 4.5.2)
 umap           * 0.2.10.0   2023-02-01 [1] CRAN (R 4.5.2)
 usethis          3.2.1      2025-09-06 [1] CRAN (R 4.5.2)
 vctrs            0.7.1      2026-01-23 [1] CRAN (R 4.5.2)
 viridis        * 0.6.5      2024-01-29 [1] CRAN (R 4.5.2)
 viridisLite    * 0.4.3      2026-02-04 [1] CRAN (R 4.5.2)
 withr            3.0.2      2024-10-28 [1] CRAN (R 4.5.2)
 xaringanExtra    0.8.0      2024-05-19 [1] CRAN (R 4.5.2)
 xfun             0.56       2026-01-18 [1] CRAN (R 4.5.2)
 xgboost        * 3.2.0.1    2026-02-10 [1] CRAN (R 4.5.2)
 yaml             2.3.12     2025-12-10 [1] CRAN (R 4.5.2)

 [1] /home/m/R/x86_64-pc-linux-gnu-library/4.5
 [2] /usr/local/lib/R/site-library
 [3] /usr/lib/R/site-library
 [4] /usr/lib/R/library
 * ── Packages attached to the search path.

──────────────────────────────────────────────────────────────────────────────