Statistical analysis

Bellbird mimicry

Author
Published

June 11, 2026

Code
# options to customize chunk outputs
knitr::opts_chunk$set(
  message = FALSE
 )

Purpose

  • Evaluate similarity of vocalizations of a mimic bellbird and its model species using birdNET embeddings
    • Are focal belbird vocalizations more similar to sympatric than to allopatric species?
    • Are mimic vocalizations (resembling sympatric species) more common in the focal bellbird than in other bellbirds?

Analysis flowchart

flowchart LR
  A[Read birdNET output] --> B(Format data) 
  B --> C(Graphs)
  C --> D(Statistical analysis)
  D --> E(Model summary) 

style A fill:#382A5433
style B fill:#395D9C33
style C fill:#3497A933
style D fill:#60CEAC33

Load packages and custom functions

Code
# knitr is require for creating html/pdf/word reports
# formatR is used for soft-wrapping code

# install knitr package if not installed
if (!requireNamespace("sketchy", quietly = TRUE)) {
  install.packages("sketchy")
}


packages <- c(
  "knitr",
  "Rtsne",
  "caret",
  "ggplot2",
  "viridis",
  "doParallel",
  "brms",
  "brmsish",
  "cowplot",
  "Rraven",
  "kableExtra"
)

# install/ load packages
sketchy::load_packages(packages = packages)


options(knitr.kable.NA = '')

print <- function(x, row.names = FALSE) {
    kb <- kable(x, row.names = row.names, digits = 4, "html")
    kb <- kable_styling(kb,
                        bootstrap_options = c("striped", "hover", "condensed", "responsive"))
    scroll_box(kb, width = "100%")
}

common_names <- c(
  "Procnias tricarunculatus" = "Three-wattled Bellbird",
  "Psarocolius guatimozinus" = "Chestnut-headed Oropendola",
  "Psarocolius montezuma" = "Montezuma Oropendola",
  "Pteroglossus erythropygius" = "Pale-mandibled Aracari",
  "Pteroglossus torquatus" = "Collared Aracari",
  "Ramphastos brevis" = "Choco Toucan",
  "Ramphastos sulfuratus" = "Keel-billed Toucan",
  "nocall" = "Unassigned"
)

# to create several posterior predictive check plots out of a brms fit
custom_ppc <- function(fit, group = NULL, ndraws = 30) {
  by_group  <- if (!is.null(group)) {
    TRUE
  } else
    FALSE
  
  if (by_group)
    by_group  <-  if (any(names(fit$data) == group)) {
      TRUE
    } else
      FALSE
  
  if (by_group)
    by_group <-
      if (is.character(fit$data[, group]) |
          is.factor(fit$data[, group])) {
        TRUE
      } else
        FALSE
  
  
  if (by_group) {
    ppc_dens <- pp_check(fit,
                         ndraws = ndraws,
                         type = 'dens_overlay_grouped',
                         group = group)
    
    pp_mean <- pp_check(
      fit,
      type = "stat_grouped",
      stat = "mean",
      group = group,
      ndraws = ndraws
    )  + theme_classic()
    
  } else {
    ppc_dens <- pp_check(fit, ndraws = ndraws, type = 'dens_overlay')
    
    pp_mean <- pp_check(fit,
                        type = "stat",
                        stat = "mean",
                        ndraws = ndraws) + theme_classic()
    
  }
  
  pp_plot_list <-
    list(ppc_dens, pp_mean)
  
  pp_plot_list <-
    lapply(pp_plot_list, function(x)
      x   + scale_fill_viridis_d(
        begin = 0.3,
        end = 0.8,
        alpha = 0.5,
        option = "mako"
      ) + theme_classic())
  
  
  ppc_plot <- plot_grid(plotlist = pp_plot_list, ncol = 2)
 
   ppc_plot
}

1 Sympatric and allopatric species

1.1 BirdNET model

Code
mod_eval_symp_allo <- read.csv("./data/processed/birdnet_models/sympatric_and_allopatric/sympatric_and_allopatric_evaluation.csv")

names(mod_eval_symp_allo) <- gsub("..", " ", names(mod_eval_symp_allo), fixed = TRUE)

print(mod_eval_symp_allo)
Class Precision 0.5. Recall 0.5. F1.Score 0.5. Precision opt. Recall opt. F1.Score opt. AUPRC AUROC Optimal.Threshold True.Positives False.Positives True.Negatives False.Negatives Samples Percentage
OVERALL (Macro-avg) 0.8908 0.9627 0.9232 0.9180 0.9504 0.9312 0.9630 0.9968
Procnias tricarunculatus 0.9712 0.9854 0.9783 1.0000 0.9708 0.9852 0.9990 0.9998 0.80 133 0 884 4 137 13.42
Psarocolius guatimozinus 0.8909 0.9608 0.9245 0.9423 0.9608 0.9515 0.9867 0.9992 0.55 49 3 967 2 51 5.00
Psarocolius montezuma 0.9014 0.9846 0.9412 0.9538 0.9538 0.9538 0.9873 0.9991 0.75 62 3 953 3 65 6.37
Pteroglossus erythropygius 0.8943 0.9486 0.9206 0.9336 0.9206 0.9271 0.9844 0.9959 0.70 197 14 793 17 214 20.96
Pteroglossus torquatus 0.6216 0.8679 0.7244 0.6216 0.8679 0.7244 0.7844 0.9842 0.50 46 28 940 7 53 5.19
Ramphastos brevis 0.9840 0.9946 0.9892 0.9840 0.9946 0.9892 0.9995 0.9999 0.50 184 3 833 1 185 18.12
Ramphastos sulfuratus 0.9722 0.9968 0.9844 0.9904 0.9842 0.9873 0.9994 0.9997 0.80 311 3 702 5 316 30.95

Keeping low confidence classifications (confidence < optimal threshold)

Code
# read data
sympatric_allopatric_class <- imp_raven(
  path = "./data/processed/birdnet_classification_outputs/sympatrics_and_allopactrics/sympatrics_and_allopatrics_model/",
  all.data = TRUE,
  warbler.format = TRUE,
  pb = FALSE
)

# keep only test data
test_files <- list.files("/home/m/Dropbox/Projects/bellbird_mimicry/data/processed/birdnet_data/test_data/", full.names = FALSE, recursive = TRUE)

test_files <- basename(gsub("\\\\", "/", test_files))


sympatric_allopatric_class$sound.files <- basename(gsub("\\\\", "/", sympatric_allopatric_class$sound.files))

# all(test_files %in% sympatric_allopatric_class$sound.files)
# 
# setdiff(test_files, sympatric_allopatric_class$sound.files)
 
 
sympatric_allopatric_test <- sympatric_allopatric_class[sympatric_allopatric_class$sound.files %in% test_files, ] 

# filter low confidence classifications
sympatric_allopatric_test$true.positive <- sapply(seq_len(nrow(sympatric_allopatric_test)), function(x){
    sympatric_allopatric_test$Confidence[x] >= mod_eval_symp_allo$Optimal.Threshold[mod_eval_symp_allo$Class == sympatric_allopatric_test$`Common Name`[x]]
    })

sympatric_allopatric_test$`Common Name`[!sympatric_allopatric_test$true.positive] <- "nocall"


# rename column
sympatric_allopatric_test$predicted_sp <- sympatric_allopatric_test$`Common Name`

sympatric_allopatric_test$`Common Name` <- NULL

# read metadata
est <- readRDS(
  "./data/processed/extended_selection_table_sympatric_and_allopatric_species.RDS"
)

# add metadata
sympatric_allopatric_test$sound.files <- basename(gsub("\\\\", "/", sympatric_allopatric_test$sound.files))



sympatric_allopatric_test$vocalizing_sp <- sapply(sympatric_allopatric_test$sound.files, function(x) {
  est$source[paste0(est$sound.files, ".wav") == x][1]
})


# save confusion matrix
conf_mat <- confusionMatrix(
  data = as.factor(sympatric_allopatric_test$predicted_sp),
  reference = factor(sympatric_allopatric_test$vocalizing_sp, levels = levels(as.factor(sympatric_allopatric_test$predicted_sp))
)
)

conf_df_fp <- as.data.frame(conf_mat$table)

conf_df_fp$total <- sapply(conf_df_fp$Reference, function(x) {
  sum(sympatric_allopatric_test$vocalizing_sp == x)
})

conf_df_fp$Proportion <- conf_df_fp$Freq / conf_df_fp$total

conf_df_fp$pred_common_name <- common_names[as.character(conf_df_fp$Prediction)]
conf_df_fp$ref_common_name <- common_names[as.character(conf_df_fp$Reference)]

# sort levels by genus
conf_df_fp$pred_common_name <- factor(
  conf_df_fp$pred_common_name,
  levels = c(
    "Three-wattled Bellbird",
    "Chestnut-headed Oropendola",
    "Montezuma Oropendola",
    "Collared Aracari",
    "Pale-mandibled Aracari",
    "Keel-billed Toucan",
    "Choco Toucan",
    "Unassigned"
  )
)

conf_df_fp$ref_common_name <- factor(
  conf_df_fp$ref_common_name,
  levels = c(
    "Three-wattled Bellbird",
    "Chestnut-headed Oropendola",
    "Montezuma Oropendola",
    "Collared Aracari",
    "Pale-mandibled Aracari",
    "Keel-billed Toucan",
    "Choco Toucan",
    "Unassigned"
  )
)

ggplot(conf_df_fp[conf_df_fp$ref_common_name != "Unassigned", ],
       aes(x = ref_common_name, y = pred_common_name, fill = Proportion)) +
    geom_tile() +
    theme_bw() +
    coord_equal() +
    scale_fill_gradient2(low = mako(10)[3], high = mako(10)[7]) +
    labs(x = "Reference species", y = "Predicted species") +
    geom_text(aes(label = round(Proportion, 2)), color = "black") +
    theme_classic() +
    theme(axis.text.x = element_text(
        color = "black",
        size = 11,
        angle = 30,
        hjust = 1
    ))

Excluding low confidence classifications (confidence >= optimal threshold)

Code
sympatric_allopatric_test_tp <- sympatric_allopatric_test[sympatric_allopatric_test$true.positive, ]

# save confusion matrix
conf_mat <- confusionMatrix(
  data = as.factor(sympatric_allopatric_test_tp$predicted_sp),
  reference = factor(sympatric_allopatric_test_tp$vocalizing_sp, levels = levels(as.factor(sympatric_allopatric_test_tp$predicted_sp))
)
)

conf_df <- as.data.frame(conf_mat$table)

conf_df$total <- sapply(conf_df$Reference, function(x) {
  sum(sympatric_allopatric_test_tp$vocalizing_sp == x)
})

conf_df$Proportion <- conf_df$Freq / conf_df$total

conf_df$pred_common_name <- common_names[as.character(conf_df$Prediction)]
conf_df$ref_common_name <- common_names[as.character(conf_df$Reference)]

# sort levels by genus
conf_df$pred_common_name <- factor(
  conf_df$pred_common_name,
  levels = c(
    "Three-wattled Bellbird",
    "Chestnut-headed Oropendola",
    "Montezuma Oropendola",
    "Collared Aracari",
    "Pale-mandibled Aracari",
    "Keel-billed Toucan",
    "Choco Toucan",
    "Unassigned"
  )
)

conf_df$ref_common_name <- factor(
  conf_df$ref_common_name,
  levels = c(
    "Three-wattled Bellbird",
    "Chestnut-headed Oropendola",
    "Montezuma Oropendola",
    "Collared Aracari",
    "Pale-mandibled Aracari",
    "Keel-billed Toucan",
    "Choco Toucan",
    "Unassigned"
  )
)

ggplot(conf_df, aes(x = ref_common_name, y = pred_common_name, fill = Proportion)) +
  geom_tile() +
  theme_bw() +
  coord_equal() +
  scale_fill_gradient2(low = mako(10)[3], high = mako(10)[7]) +
  labs(x = "Reference species", y = "Predicted species") +
  geom_text(aes(label = round(Proportion, 2)), color = "black") +
  theme_classic() +
  theme(
    axis.text.x = element_text(
      color = "black",
      size = 11,
      angle = 30,
      hjust = 1
    )
  )

1.1.1 Embeddings

Code
embs <- read.csv("./data/processed/embeddings/embeddings_sympatric_allopatric_spp.csv")

# Split into separate columns
emb_matrix <- do.call(rbind, strsplit(embs$embedding, ","))
colnames(emb_matrix) <- paste0("emb", 1:ncol(emb_matrix))
emb_matrix <- apply(emb_matrix, 2, as.numeric)

# Combine with the first 3 columns
embs <- as.data.frame(cbind(embs[, 1:3], emb_matrix))

# add metadata
embs$file_path <- gsub("Nueva-grabaci\xf3n", "Nueva-grabación", embs$file_path, useBytes = TRUE)
embs$sound.files <- basename(gsub("\\\\", "/", embs$file_path, useBytes = TRUE))

embs$vocalizing_sp <- sapply(embs$sound.files, function(x) {
  est$source[paste0(est$sound.files, ".wav") == x][1]
})

set.seed(123)
tsne_out <- Rtsne(as.matrix(embs[, grep("emb", names(embs))]))

tsne_all <- data.frame(vocalizing_sp = embs$vocalizing_sp, tsne1 = tsne_out$Y[,1], tsne2 = tsne_out$Y[,2])

saveRDS(tsne_all, "./data/processed/tsne_dims_sympatric_and_allopatric_species.rds")
Code
# colors and shapes by species for scatterplots
cols <- c(adjustcolor("tomato", alpha.f = 0.4), mako(4, alpha = 0.4, begin = 0.2, end = 0.8))
cols2 <- c(adjustcolor("tomato", alpha.f = 0.9), mako(4, alpha = 0.9, begin = 0.2, end = 0.8))

cols <- c(cols[1:2], rep(cols[3:5], each = 2))
cols2 <- c(cols2[1:2], rep(cols2[3:5], each = 2))

shapes <- c(16, 16, 16, 1, 17, 2, 15, 0)

names(shapes) <- names(cols) <- names(cols2) <- c(
  "mimic_bellbird",
  "Three-wattled Bellbird",
  "Montezuma Oropendola",
  "Chestnut-headed Oropendola",
  "Collared Aracari",
  "Pale-mandibled Aracari",
  "Keel-billed Toucan",
  "Choco Toucan"
)

tsne_all <- readRDS("./data/processed/tsne_dims_sympatric_and_allopatric_species.rds")

tsne_only_model_species <- tsne_all[tsne_all$vocalizing_sp != "mimic_bellbird", ]

tsne_only_model_species$common_name <- common_names[as.character(tsne_only_model_species$vocalizing_sp)] 

tsne_only_model_species$common_name <- factor(
  tsne_only_model_species$common_name,
  levels = c(
    "Three-wattled Bellbird",
    "Chestnut-headed Oropendola",
    "Montezuma Oropendola",
    "Collared Aracari",
    "Pale-mandibled Aracari",
    "Keel-billed Toucan",
    "Choco Toucan"
  )
)

ggplot(tsne_only_model_species, aes(x = tsne1, y = tsne2, color = common_name, shape = common_name)) +
  geom_point(size = 2) +
  theme_minimal() +
  labs(x = "t-SNE 1", y = "t-SNE 2", color = "", shape = "") +
    scale_color_manual(values = cols[names(cols) %in% unique(tsne_only_model_species$common_name)]) +
    scale_shape_manual(values = shapes[names(shapes) %in% unique(tsne_only_model_species$common_name)]) +
    theme_classic()

1.1.2 Focal bellbird classification

Code
# read data
mimic_class <- imp_raven(
  path = "./data/processed/birdnet_classification_outputs/mimic_bellbird/sympatrics_and_allopatrics_model/",
  all.data = TRUE,
  warbler.format = TRUE,
  pb = FALSE
)

# filter low confidence classifications
mimic_class$true.positive <- sapply(seq_len(nrow(mimic_class)), function(x){
    mimic_class$Confidence[x] >= mod_eval_symp_allo$Optimal.Threshold[mod_eval_symp_allo$Class == mimic_class$`Common Name`[x]]
    })

mimic_class$`Common Name`[!mimic_class$true.positive] <- "nocall"



# add metadata
mimic_class$sound.files <- basename(gsub(
  "\\\\",
  "/",
  mimic_class$sound.files
))

# rename column
mimic_class$predicted_sp <- mimic_class$`Common Name`
mimic_class$vocalizing_sp <- "Procnias tricarunculatus"

mimic_class$`Common Name` <- NULL

prop_mimic_class <- table(mimic_class$predicted_sp)

prop_mimic_class <- data.frame(prop_mimic_class)
names(prop_mimic_class) <- c("Prediction", "Freq")
prop_mimic_class$total <- sum(prop_mimic_class$Freq)
prop_mimic_class$Proportion <- prop_mimic_class$Freq / prop_mimic_class$total
prop_mimic_class$Reference <- "Focal\nbellbird"

others <- conf_df_fp[conf_df_fp$Reference == "Procnias tricarunculatus", ]
others$Reference <- "Other\nbellbirds"

prop_mimic_psarocolius <- prop_mimic_class[prop_mimic_class$Prediction == "Psarocolius montezuma",]

prop_mimic_psarocolius$Freq <- 0
prop_mimic_psarocolius$Proportion <- 0

prop_mimic_psarocolius$Prediction <- "Psarocolius guatimozinus" 


# proportions observed in adult bellbird
prop_class <- rbind(
  prop_mimic_class,
  prop_mimic_psarocolius,
  others[, c("Prediction", "Freq", "total", "Proportion", "Reference")]
)




prop_class$genus <- sapply(
  strsplit(as.character(prop_class$Prediction), " "),
  "[[",
  1
)

prop_class$type <- ifelse(
  grepl("guati|eryth|brevis", prop_class$Prediction),
  "Allopatric",
  "Sympatric"
)

prop_class$type[prop_class$genus == "Procnias"] <- "Own\nspecies"

prop_class$type <- factor(
  prop_class$type,
  levels = c("Allopatric", "Sympatric", "Own\nspecies")
)

prop_class$genus <- factor(
  prop_class$genus,
  levels = c("Procnias", "Pteroglossus", "Psarocolius", "Ramphastos")
)

prop_class$Reference <- factor(
  prop_class$Reference,
  levels = c("Other\nbellbirds", "Focal\nbellbird")
)


genus_labels <- c(
  "Procnias" = "Three-wattled bellbird",
  "Psarocolius" = "Oropendolas",
  "Pteroglossus" = "Aracaris",
  "Ramphastos" = "Toucans"
)

prop_class$common_name <- common_names[as.character(prop_class$Prediction)]

prop_class$common_name <- gsub("-", "\n", prop_class$common_name)


prop_class$sp_label <- sapply(
  strsplit(as.character(prop_class$common_name), " "),
  "[[",
  1
)

prop_class$genus[prop_class$sp_label == "Unassigned"] <- "Procnias"

prop_class$sp_label[prop_class$sp_label == "Three\nwattled" & prop_class$sp_label == "Unassigned"] <- ""


prop_class$sp_label_y <- ave(
  prop_class$Proportion,
  prop_class$genus,
  FUN = function(x) {
    x + log10(max(x) + 0.999) * 0.44
  }
)
Code
# settings

base_size <- 14

percent_text_size <- 4
species_text_size <- 4


# GENERA TO PLOT

loop_df <- data.frame(
  gen = c(
    "Procnias",
    "Pteroglossus",
    "Psarocolius",
    "Ramphastos"
  ),
  label = c("A", "B", "C", "D"),
  y_max = c(
  1.3,
  0.057,
  0.004,
  0.16
 )
)


prop_class <- prop_class[order(prop_class$genus, prop_class$Reference),]

prop_class$sp_label_y[prop_class$genus == "Procnias"] <- c(
   0.4, 1.03, 0.9, 0.4
) - 0.05


prop_class$sp_label_y[prop_class$genus == "Pteroglossus"] <- c(
    0.01, 0.01, 0.019, 0.047
) - 0.003

prop_class$sp_label_y[prop_class$genus == "Psarocolius"] <- c(
    0.0004, 0.0014, 0.00255, 0.0004
) + 0.0001
    

prop_class$sp_label_y[prop_class$genus == "Ramphastos"] <- c(
    0.026, 0.02, 0.02, 0.13
) + 0.006

bar_cols <- c("gray", mako(10, alpha = 0.6, begin = 0.2, end = 0.8)[c(2, 8)])

prop_class$prop_label <- paste(
  round(prop_class$Proportion * 100, 1),
  "%\n n = ",
 prop_class$Freq
)

# PANEL FUNCTION
make_panel <- function(gen, label, y_max){

  # subset data
  dat_sub <- subset(prop_class, genus == gen)

  if(gen == "Procnias"){

  dat_sub$type <- factor(
    dat_sub$type,
    levels = c(
        "Own\nspecies",
      "Allopatric",
      "Sympatric"
    )
  )

  }
  
  # exactly 3 tick marks
  y_breaks <- c(
    0,
    y_max / 2,
    y_max
  )

  
    if(gen == "Procnias"){

  fill_scale <- scale_fill_manual(
    name = "",
    values = c(
      "Own\nspecies" = "#28192FB3",
      "Allopatric" = "#28192FB3",     # not used
      "Sympatric" = "#DEF5E5B3"      # bellbird color
    ),
    breaks = "Sympatric"
  )

} else {

  fill_scale <- scale_fill_manual(
    name = "",
    values = c(
      "Own\nspecies" = bar_cols[1],
      "Allopatric" = bar_cols[2],
      "Sympatric" = bar_cols[3]
    ),
    breaks = c(
      "Allopatric",
      "Sympatric"
    )
  )

}
  
  # y-axis formatting
  if(gen == "Psarocolius"){

    y_scale <- scale_y_continuous(
      limits = c(0, y_max),
      breaks = y_breaks,
      labels = function(x) sprintf("%.1f%%", x * 100)
    )

  } else {

    y_scale <- scale_y_continuous(
      limits = c(0, y_max),
      breaks = y_breaks,
      labels = scales::percent
    )

  }

  ggplot(dat_sub, aes(x = Reference, y = Proportion)) +

    geom_bar(
      aes(
        group = interaction(Reference, type),
        fill = type
      ),
      position = position_dodge(),
      stat = "identity"
    ) +

    # percentages
    geom_text(
        lineheight = 0.7,
      aes(
        group = interaction(Reference, type),
        label = dat_sub$prop_label,
      ),
      position = position_dodge(width = 0.9),
      vjust = -0.1,
      size = percent_text_size
    ) +

    # species labels
    geom_text(
      aes(
        group = interaction(Reference, type),
        label = sp_label,
        y = sp_label_y
      ),
      position = position_dodge(width = 0.9),
      angle = 90,
      hjust = 0,
      vjust = 0.5,
      size = species_text_size
    ) +

    facet_wrap(
      ~ genus,
      labeller = labeller(genus = genus_labels)
    ) +

    y_scale +
      fill_scale +

    labs(
      title = label,
      x = NULL
    ) +

    theme_classic(base_size = base_size) +

    theme(

      # legend only in Pteroglossus panel
      legend.position = if(gen == "Pteroglossus"){
        c(0.3, 0.8)
      } else {
        "none"
      },

      legend.background = element_rect(
        fill = "white",
        color = NA
      ),

      axis.title.y = element_blank(),

      panel.grid.major.x = element_blank(),

      strip.background = element_rect(
        fill = "white",
        color = "black"
      ),

      strip.text = element_text(
        size = base_size
      ),

      axis.text.x = element_text(
        size = base_size
      ),

      axis.text.y = element_text(
        size = base_size
      ),

      legend.text = element_text(
        size = base_size
      ),

      plot.title = element_text(
        face = "bold",
        hjust = 0
      ),

      axis.line = element_line(
        linewidth = 0.8
      ),

      axis.ticks = element_line(
        linewidth = 0.8
      )
    )
}


# CREATE PANELS

plot_list <- lapply(
  seq_len(nrow(loop_df)),
  function(i){

    make_panel(
      gen = loop_df$gen[i],
      label = loop_df$label[i],
      y_max = loop_df$y_max[i]
    )

  }
)


# COMBINE PANELS

combined_plot <- plot_grid(
  plotlist = plot_list,
  ncol = 2,
  align = "hv"
)

# ADD SHARED Y-AXIS TITLE

# SHARED Y-AXIS LABEL

y_label <- ggdraw() +
  draw_label(
    "Percentage of vocalizations predicted as",
    angle = 90,
    size = base_size + 2
  )


# FINAL PLOT

final_plot <- plot_grid(
  y_label,
  combined_plot,
  ncol = 2,
  rel_widths = c(0.06, 1)
)


if (!isTRUE(getOption('knitr.in.progress'))) {
ggsave(
  filename = "./scripts/focal_vs_other_bellbirds_prediction.png",
  plot = final_plot,
  width = 10,
  height = 8,
  dpi = 300,
  device = grDevices::png
  ) 
final_plot    
}

Predictions from the sympatric + allopatric species model. Bars show the proportion of vocalizations from focal bellbirds and other bellbirds assigned by a bird-sound classifier to different taxonomic groups. (A) Vocalizations classified as three-wattled bellbirds (Procnias tricarunculatus) or left unassigned. (B–D) Vocalizations classified as aracaris, oropendolas, and toucans. Names above bars indicate the species-level classifications within each genus. Percentages above bars indicate the proportion of vocalizations assigned to each category.

1.1.3 Statistical analysis

Evaluate whether the proportion of vocalizations predicted as sympatric (model) species vs allopatric species is significantly higher in the focal bellbird than in other bellbirds

Code
mimic_other_bellbirds <- sympatric_allopatric_test[sympatric_allopatric_test$predicted_sp != "Procnias tricarunculatus", ]

mimic_focal <- mimic_class[mimic_class$predicted_sp != "Procnias tricarunculatus", ]

mimic_focal$vocalizing_type <- "focal"
mimic_other_bellbirds$vocalizing_type <- "others"

mimic_bellbirds <- rbind(
  mimic_focal[, c("predicted_sp", "vocalizing_type")],
  mimic_other_bellbirds[, c("predicted_sp", "vocalizing_type")]
)

mimic_bellbirds$vocalizing_type <- factor(
  mimic_bellbirds$vocalizing_type,
  levels = c("others", "focal")
)

mimic_bellbirds$mimic_type <- 
ifelse(
  grepl("guati|eryth|brevis", mimic_bellbirds$predicted_sp),
  0,
  1
)

mimic_bellbirds$genus <- sapply(
  strsplit(as.character(mimic_bellbirds$predicted_sp), " "),
  "[[",
  1
)

cores <- 4
chains <- 4
iter <- 4000

priors <- c(
  prior(normal(0, 1.5), class = "Intercept"),
  prior(normal(0, 0.7), class = "b"),
  prior(exponential(1), class = "sd")
)

only_prior_fit  <- brm(
  mimic_type ~ vocalizing_type + (1 | genus),
  data = mimic_bellbirds[mimic_bellbirds$predicted_sp != "nocall",],
  family = bernoulli(),
  prior = priors,
  sample_prior = "only",
  chains = chains,
  iter = iter,
  cores = cores,
          file = "./data/processed/mimicry_prob_by_simpatry_model_only_priors.RDS", 
  file_refit = "on_change"
)


mod1 <- brm(
  mimic_type ~ vocalizing_type  + (1 | genus),
  data = mimic_bellbirds[mimic_bellbirds$predicted_sp != "nocall",],
  family = bernoulli(),
  cores = cores,
    prior = priors,
        chains = chains,
        iter = iter,
        file = "./data/processed/mimicry_prob_by_simpatry_model.RDS", 
  file_refit = "on_change"
)

1.1.3.1 Prior check

Priors

Code
prior_summary(mod1)
prior class coef group resp dpar nlpar lb ub tag source
normal(0, 0.7) b user
b vocalizing_typefocal default
normal(0, 1.5) Intercept user
exponential(1) sd 0 user
sd genus default
sd Intercept genus default

Prior predictive checks

Code
custom_ppc(only_prior_fit, group = "vocalizing_type", ndraws = 100)

1.1.3.2 Fit summary

Code
fill_color <- mako(10)[8]

extended_summary(
  mod1,
  n.posterior = 1000,
  fill = fill_color,
  trace.palette = mako,
  remove.intercepts = FALSE,
  highlight = TRUE,
  print.name = FALSE
)
priors formula iterations chains thinning warmup diverg_transitions rhats > 1.05 min_bulk_ESS min_tail_ESS seed
1 b-normal(0, 0.7) Intercept-normal(0, 1.5) sd-exponential(1) mimic_type ~ vocalizing_type + (1 | genus) 4000 4 1 2000 2 (0.00025%) 0 2382.72 3132.15 637637388
Estimate l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
b_Intercept -0.713 -2.137 0.502 1.001 2382.72 3132.15
b_vocalizing_typefocal 3.153 2.781 3.528 1 4738.91 4324.30

Posterior predictive check

Code
custom_ppc(mod1, group = "vocalizing_type", ndraws = 100)

Code
post <- as_draws_df(mod1)

# Odds ratio: focal vs other birds
focal_vs_others <- exp(post$b_vocalizing_typefocal)

# Predicted probability of sympatric mimicry in focal birds
p_focal <- posterior_epred(
  mod1,
  newdata = data.frame(
    vocalizing_type = "focal",
    genus = unique(mimic_bellbirds$genus)[1]
  ),
  re_formula = NA
)

p_focal <- as.numeric(p_focal)

# Predicted probability of sympatric mimicry in other birds
p_others <- posterior_epred(
  mod1,
  newdata = data.frame(
    vocalizing_type = "others",
    genus = unique(mimic_bellbirds$genus)[1]
  ),
  re_formula = NA
)

p_others <- as.numeric(p_others)

# Within-group sympatric vs allopatric odds
focal_ratio <- p_focal / (1 - p_focal)
other_ratio <- p_others / (1 - p_others)

1.1.3.3 Summary

  • Focal bellbird vocalizations had 23.43 times greater odds [95% UI: 16.14–34.04] of being classified as the sympatric rather than the allopatric model species compared with vocalizations from other bellbirds (predicted probability of sympatric classification for focal birds = 0.92; for other bellbirds = 0.34).

  • Within focal bellbird vocalizations, the odds of classification as the sympatric model species were 11.84 times greater [95% UI: 2.77–40.39] than the odds of classification as closely related allopatric species (predicted probability of sympatric classification for focal birds = 0.92; predicted probability of allopatric classification for focal birds = 0.08).

  • For vocalizations from other bellbirds, the odds of classification as sympatric and allopatric species were similar (odds = 0.51 [95% UI: 0.12–1.65]; predicted probability of sympatric classification for other bellbirds = 0.34; predicted probability of allopatric classification for other bellbirds = 0.66).

2 Only sympatric species

2.1 BirdNET model

Code
mod_eval_symp <- read.csv("./data/processed/birdnet_models/sympatric/sympatric_evaluation.csv")

names(mod_eval_symp) <- gsub("..", " ", names(mod_eval_symp), fixed = TRUE)

print(mod_eval_symp)
Class Precision 0.5. Recall 0.5. F1.Score 0.5. Precision opt. Recall opt. F1.Score opt. AUPRC AUROC Optimal.Threshold True.Positives False.Positives True.Negatives False.Negatives Samples Percentage
OVERALL (Macro-avg) 0.9884 0.9908 0.9896 0.9939 0.9890 0.9914 0.9994 0.9998
Procnias tricarunculatus 0.9783 0.9854 0.9818 1.0000 0.9781 0.9889 0.9994 0.9998 0.60 134 0 434 3 137 23.99
Psarocolius montezuma 0.9848 1.0000 0.9924 0.9848 1.0000 0.9924 0.9985 0.9998 0.15 65 1 505 0 65 11.38
Pteroglossus torquatus 1.0000 0.9811 0.9905 1.0000 0.9811 0.9905 0.9997 1.0000 0.15 52 0 518 1 53 9.28
Ramphastos sulfuratus 0.9906 0.9968 0.9937 0.9906 0.9968 0.9937 0.9998 0.9998 0.50 315 3 252 1 316 55.34

Keeping low confidence classifications (confidence < optimal threshold)

Code
# read data
sympatric_class <- imp_raven(
  path = "./data/processed/birdnet_classification_outputs/sympatrics/sympatrics_model/",
  all.data = TRUE,
  warbler.format = TRUE,
  pb = FALSE
)


# keep only test data
test_files <- list.files("/home/m/Dropbox/Projects/bellbird_mimicry/data/processed/birdnet_data/test_data/", full.names = FALSE, recursive = TRUE)

test_files <- basename(gsub("\\\\", "/", test_files))


sympatric_class$sound.files <- basename(gsub("\\\\", "/", sympatric_class$sound.files))

# all(sympatric_class$sound.files %in% test_files)

# setdiff(test_files, sympatric_class$sound.files)

# setdiff(sympatric_class$sound.files, test_files)

sympatric_class_test <- sympatric_class[sympatric_class$sound.files %in% test_files, ] 


sympatric_class_test$true.positive <- sapply(seq_len(nrow(sympatric_class_test)), function(x){
    sympatric_class_test$Confidence[x] >= mod_eval_symp$Optimal.Threshold[mod_eval_symp$Class == sympatric_class_test$`Common Name`[x]]
    })
    
sympatric_class_test$`Common Name`[!sympatric_class_test$true.positive] <- "nocall"


# rename column
sympatric_class_test$predicted_sp <- sympatric_class_test$`Common Name`

sympatric_class_test$`Common Name` <- NULL

# read metadata
est <- readRDS(
  "./data/processed/extended_selection_table_sympatric_and_allopatric_species.RDS"
)

# add metadata
sympatric_class_test$sound.files <- basename(gsub("\\\\", "/", sympatric_class_test$sound.files))


sympatric_class_test$vocalizing_sp <- sapply(sympatric_class_test$sound.files, function(x) {
  est$source[paste0(est$sound.files, ".wav") == x][1]
})


sympatric_class_test <- sympatric_class_test[sympatric_class_test$vocalizing_sp %in% unique(sympatric_class_test$predicted_sp),]

# save confusion matrix
conf_mat <- confusionMatrix(
  data = as.factor(sympatric_class_test$predicted_sp),
  reference = factor(sympatric_class_test$vocalizing_sp, levels = levels(as.factor(sympatric_class_test$predicted_sp)))
)

conf_df <- as.data.frame(conf_mat$table)

conf_df$total <- sapply(conf_df$Reference, function(x) {
  sum(sympatric_class_test$vocalizing_sp == x)
})

conf_df$Proportion <- conf_df$Freq / conf_df$total

conf_df$pred_common_name <- common_names[as.character(conf_df$Prediction)]
conf_df$ref_common_name <- common_names[as.character(conf_df$Reference)]

# sort levels by genus
conf_df$pred_common_name <- factor(
  conf_df$pred_common_name,
  levels = c(
    "Three-wattled Bellbird",
    "Montezuma Oropendola",
    "Collared Aracari",
    "Keel-billed Toucan",
    "Unassigned"
  )
)

conf_df$ref_common_name <- factor(
  conf_df$ref_common_name,
  levels = c(
    "Three-wattled Bellbird",
    "Montezuma Oropendola",
    "Collared Aracari",
    "Keel-billed Toucan",
    "Unassigned"
  )
)

ggplot(conf_df[conf_df$ref_common_name != "Unassigned",], aes(x = ref_common_name, y = pred_common_name, fill = Proportion)) +
  geom_tile() +
  theme_bw() +
  coord_equal() +
  scale_fill_gradient2(low = mako(10)[3], high = mako(10)[7]) +
  labs(x = "Reference species", y = "Predicted species") +
  geom_text(aes(label = round(Proportion, 2)), color = "black") +
  theme_classic() +
  theme(
    axis.text.x = element_text(
      color = "black",
      size = 11,
      angle = 30,
      hjust = 1
    )
  )

Excluding low confidence classifications (confidence >= optimal threshold)

Code
# save confusion matrix
conf_mat <- confusionMatrix(
  data = as.factor(sympatric_class_test$predicted_sp[sympatric_class_test$true.positive]),
  reference = factor(sympatric_class_test$vocalizing_sp[sympatric_class_test$true.positive], levels = levels(as.factor(sympatric_class_test$predicted_sp[sympatric_class_test$true.positive])))
)

conf_df <- as.data.frame(conf_mat$table)

conf_df$total <- sapply(conf_df$Reference, function(x) {
  sum(sympatric_class_test$vocalizing_sp[sympatric_class_test$true.positive] == x)
})

conf_df$Proportion <- conf_df$Freq / conf_df$total

conf_df$pred_common_name <- common_names[as.character(conf_df$Prediction)]
conf_df$ref_common_name <- common_names[as.character(conf_df$Reference)]

# sort levels by genus
conf_df$pred_common_name <- factor(
  conf_df$pred_common_name,
  levels = c(
    "Three-wattled Bellbird",
    "Montezuma Oropendola",
    "Collared Aracari",
    "Keel-billed Toucan"
  )
)

conf_df$ref_common_name <- factor(
  conf_df$ref_common_name,
  levels = c(
    "Three-wattled Bellbird",
    "Montezuma Oropendola",
    "Collared Aracari",
    "Keel-billed Toucan"
  )
)

ggplot(conf_df, aes(x = ref_common_name, y = pred_common_name, fill = Proportion)) +
  geom_tile() +
  theme_bw() +
  coord_equal() +
  scale_fill_gradient2(low = mako(10)[3], high = mako(10)[7]) +
  labs(x = "Reference species", y = "Predicted species") +
  geom_text(aes(label = round(Proportion, 2)), color = "black") +
  theme_classic() +
  theme(
    axis.text.x = element_text(
      color = "black",
      size = 11,
      angle = 30,
      hjust = 1
    )
  )

Code
# read data
sympatric_focal <- imp_raven(
  path = "./data/processed/birdnet_classification_outputs/mimic_bellbird/sympatric_model/",
  all.data = TRUE,
  warbler.format = TRUE,
  pb = FALSE
)

# filter out low confidence calls
sympatric_focal$true.positive <- sapply(seq_len(nrow(sympatric_focal)), function(x){
    if (sympatric_focal$`Common Name`[x] == "nocall")
        return(FALSE)
        
    sympatric_focal$Confidence[x] >= mod_eval_symp$Optimal.Threshold[mod_eval_symp$Class == sympatric_focal$`Common Name`[x]]
    })



sympatric_focal$Reference <- "Focal\nbellbird"

sympatric_focal_unfiltered <- sympatric_focal

sympatric_focal$`Common Name`[!sympatric_focal$true.positive] <- "nocall"

# rename column
sympatric_focal$predicted_sp <- sympatric_focal$`Common Name`

sympatric_focal$`Common Name` <- NULL

# get other bellbirds from previous data
sympatric_others <- sympatric_class_test[sympatric_class_test$vocalizing_sp == "Procnias tricarunculatus", ]

sympatric_others$vocalizing_sp <- NULL

sympatric_others$Reference <- "Other\nbellbirds"

sympatric <- rbind(
  sympatric_focal,
  sympatric_others
)

prop_sympatric <- table(sympatric$predicted_sp, sympatric$Reference)

prop_sympatric <- data.frame(prop_sympatric)
names(prop_sympatric) <- c("Prediction", "Vocalizing", "Freq")

# add Psarocolius
# prop_sympatric2 <- prop_sympatric[3:4, ]
# prop_sympatric2$Prediction <- "Psarocolius montezuma"
# prop_sympatric2$Freq <- 0

# prop_sympatric <- rbind(prop_sympatric, prop_sympatric2)

prop_sympatric$total <- sapply(prop_sympatric$Vocalizing, function(x) sum(prop_sympatric$Freq[prop_sympatric$Vocalizing == x]))

prop_sympatric$Proportion <- prop_sympatric$Freq / prop_sympatric$total

prop_sympatric$common_name <- common_names[as.character(
  prop_sympatric$Prediction
)]

# order levels
prop_sympatric$common_name <- factor(
  prop_sympatric$common_name,
  levels = c(
    "Three-wattled Bellbird",
    "Collared Aracari",
    "Keel-billed Toucan",
    "Montezuma Oropendola",
    "Unassigned"
  )
)
Code
# prepare data

prop_sympatric$panel <- as.character(prop_sympatric$common_name)

prop_sympatric$panel[
  prop_sympatric$common_name %in%
    c("Three-wattled Bellbird", "Unassigned")
] <- "Three-wattled Bellbird"


prop_sympatric$Vocalizing <- factor(
  prop_sympatric$Vocalizing,
  levels = c("Other\nbellbirds", "Focal\nbellbird")
)

# User settings

base_size <- 14
percent_text_size <- 4

loop_df <- data.frame(
  panel = c(
    "Three-wattled Bellbird",
    "Collared Aracari",
    "Montezuma Oropendola",
    "Keel-billed Toucan"
  ),
  label = c(
    "A",
    "B",
    "C",
    "D"
  ),
  y_max = c(
    1.4,
    0.1,
    0.022,
    0.27
  )
)


prop_sympatric$prop_label <- paste(
  round(prop_sympatric$Proportion * 100, 1),
  "%\n n = ",
 prop_sympatric$Freq
)


prop_sympatric$sp_label <- ""

prop_sympatric$sp_label[prop_sympatric$panel == "Three-wattled Bellbird"] <- as.character(prop_sympatric$common_name[prop_sympatric$panel == "Three-wattled Bellbird"])

prop_sympatric$sp_label <- gsub("Three-wattled Bellbird", "Three\nwattled", prop_sympatric$sp_label)

prop_sympatric$sp_label <- as.character(prop_sympatric$sp_label)

prop_sympatric$sp_label_y <- prop_sympatric$Proportion + 0.18


# panel function

make_panel <- function(panel_name, label, y_max){

  dat_sub <- subset(
    prop_sympatric, subset = 
        prop_sympatric$panel == panel_name
  )

  y_breaks <- c(
    0,
    y_max / 2,
    y_max
  )

  if(panel_name == "Montezuma Oropendola"){

    y_scale <- scale_y_continuous(
      limits = c(0, y_max),
      breaks = y_breaks,
      labels = function(x) sprintf("%.1f%%", x * 100)
    )

  } else {

    y_scale <- scale_y_continuous(
      limits = c(0, y_max),
      breaks = y_breaks,
      labels = scales::percent
    )

  }

    if(panel_name == "Three-wattled Bellbird"){

  fill_scale <- scale_fill_manual(
    name = "",
    values = c(
      "Unassigned" = "#DEF5E5B3",
      "Three-wattled Bellbird" = "#28192FB3"      # bellbird color
    ),
    breaks = "Sympatric"
  ) 
} else {

  fill_scale <- scale_fill_manual(
    name = "",
    values = c(bar_cols[3]),
  )
}
  
  ggplot(
    dat_sub,
    aes(
      x = Vocalizing,
      y = Proportion,
      fill = common_name
    )
  )  +
    geom_text(
      aes(
        group = interaction(Vocalizing, common_name),
        label = sp_label,
        y = sp_label_y
      ),
      position = position_dodge(width = 0.9),
      angle = 90,
      hjust = 0,
      vjust = 0.5,
      size = species_text_size
    ) +
    geom_bar(
      stat = "identity",
      position = position_dodge(width = 0.9)
    ) +
      
    geom_text(
          lineheight = 0.7,
      aes(
        label = dat_sub$prop_label
      ),
      position = position_dodge(width = 0.9),
      vjust = -0.1,
      size = percent_text_size
    ) +

    facet_wrap(
      ~ panel,
      scales = "free_x"
    ) +

    y_scale +

    fill_scale +

    labs(
      title = label,
      x = NULL
    ) +

    theme_classic(base_size = base_size) +

    theme(

      legend.position = "none",

      legend.background = element_rect(
        fill = "white",
        color = NA
      ),

      axis.title.y = element_blank(),

      strip.background = element_rect(
        fill = "white",
        color = "black"
      ),

      strip.text = element_text(
        size = base_size
      ),

      axis.text.y = element_text(
        size = base_size
      ),

      legend.text = element_text(
        size = base_size
      ),

      plot.title = element_text(
        face = "bold",
        hjust = 0
      ),

      axis.line = element_line(
        linewidth = 0.8
      ),

      axis.ticks = element_line(
        linewidth = 0.8
      )
    )
}


# Create panels

plot_list <- lapply(
  seq_len(nrow(loop_df)),
  function(i){

    make_panel(
      panel_name = loop_df$panel[i],
      label = loop_df$label[i],
      y_max = loop_df$y_max[i]
    )

  }
)


# Combine panels
combined_plot <- plot_grid(
  plotlist = plot_list,
  ncol = 2,
  align = "hv"
)

y_label <- ggdraw() +
  draw_label(
    "Percentage of vocalizations predicted as",
    angle = 90,
    size = base_size
  )


# Final plot
final_plot <- plot_grid(
  y_label,
  combined_plot,
  ncol = 2,
  rel_widths = c(0.06, 1)
)

if (!isTRUE(getOption('knitr.in.progress'))) {
ggsave(
  filename = "./scripts/focal_vs_other_bellbirds_sympatric_prediction.png",
  plot = final_plot,
  width = 10,
  height = 8,
  dpi = 300,
  device = grDevices::png
  ) 
    
final_plot    
} 

Predictions from the sympatric species only model. Bars show the proportion of vocalizations from focal bellbirds and other bellbirds assigned by a bird-sound classifier to model species. (A) Vocalizations classified as three-wattled bellbirds (Procnias tricarunculatus) or left unassigned. (B–D) Vocalizations classified as aracaris, oropendolas, and toucans.

2.1.1 Statistical analysis

Code
sympatric$prediction_type <- ifelse(
  grepl("rocnias", sympatric$predicted_sp),
  0,
  1
)

sympatric$vocalizing_type <- ifelse(
  grepl("ocal", sympatric$Reference),
  "focal",
  "others"
)

# order levels
sympatric$vocalizing_type <- factor(
  sympatric$vocalizing_type,
  levels = c("others", "focal")
)

cores <- 4
chains <- 4
iter <- 4000

priors <- c(
  prior(normal(-3, 1), class = "Intercept"),
  prior(normal(0, 2), class = "b")
)

only_prior_fit2  <- brm(
  prediction_type ~ vocalizing_type + predicted_sp,
  data = sympatric[sympatric$predicted_sp != "nocall",],
  family = bernoulli(),
  prior = priors,
sample_prior = "only",
  chains = chains,
  iter = iter,
  cores = cores,
  file = "./data/processed/mimicry_simpatric_only_model_only_priors.RDS",
  file_refit = "on_change"
)


mod2 <- brm(
  prediction_type ~ vocalizing_type,
  data = sympatric[sympatric$predicted_sp != "nocall",],
  family = bernoulli(),
  prior = priors,
  chains = chains,
  iter = iter,
  cores = cores,
  control = list(
  adapt_delta = 0.999,
  max_treedepth = 15
),
  file = "./data/processed/mimicry_simpatric_only_model.RDS",
  file_refit = "on_change"
)

2.1.1.1 Prior check

Priors

Code
prior_summary(mod2)
prior class coef group resp dpar nlpar lb ub tag source
normal(0, 2) b user
b vocalizing_typefocal default
normal(-3, 1) Intercept user

Prior predictive checks

Code
custom_ppc(only_prior_fit2, group = "vocalizing_type", ndraws = 100)

2.1.2 Fit summary

Code
fill_color <- mako(10)[8]

extended_summary(
  mod2,
  n.posterior = 1000,
  fill = fill_color,
  trace.palette = mako,
  remove.intercepts = FALSE,
  highlight = TRUE,
  print.name = FALSE
)
priors formula iterations chains thinning warmup diverg_transitions rhats > 1.05 min_bulk_ESS min_tail_ESS seed
1 b-normal(0, 2) Intercept-normal(-3, 1) prediction_type ~ vocalizing_type 4000 4 1 2000 0 (0%) 0 1528.37 1785.78 2052028757
Estimate l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
b_Intercept -5.140 -7.251 -3.595 1.002 1528.37 1785.78
b_vocalizing_typefocal 5.417 3.869 7.528 1.001 1551.31 1853.54

Posterior predictive check

Code
custom_ppc(mod1, group = "vocalizing_type", ndraws = 100)

Code
# Odds ratio: focal vs other bellbirds
post2 <- as_draws_df(mod2)

# Odds ratio: focal vs other bellbirds
focal_vs_others <- exp(post2$b_vocalizing_typefocal)

# Predicted probability of sympatric classification for focal birds
p_focal <- posterior_epred(
  mod2,
  newdata = data.frame(vocalizing_type = "focal")
)

p_focal <- as.numeric(p_focal)

# Predicted probability of sympatric classification for other bellbirds
p_others <- posterior_epred(
  mod2,
  newdata = data.frame(vocalizing_type = "others")
)

p_others <- as.numeric(p_others)

2.1.2.1 Summary

  • Focal bellbird vocalizations had 203.89 times greater odds [95% UI: 47.92–1859.37] of being classified as a sympatric model species compared with vocalizations from other bellbirds (predicted probability of sympatric classification for focal birds = 0.57; for other bellbirds = 0.01).

3 Mimicry through time

Code
mimic_data <- est[est$source == "mimic_bellbird", ]

rec_info <- read.table("./data/raw/Recording_dates_.csv", head = TRUE, sep = ";")

rec_info$date <- gsub("\\/", "-", rec_info$date)
rec_info$date <- gsub("out", "ago", rec_info$date)
rec_info$date <- gsub("set", "sep", rec_info$date)

rec_info$org.date <- rec_info$date
rec_info$date <- as.Date(rec_info$date, format = "%d-%b-%y")


mimic_data$orig.sound.files <- attr(mimic_data, "check.res")$orig.sound.files


mimic_data$date <- sapply(mimic_data$orig.sound.files, function(x) as.character(rec_info$date[rec_info$sound.files ==
    x][1]))

mimic_data$date <- as.Date(mimic_data$date)

sympatric_focal_unfiltered$fixed.sound.files <- gsub("Nueva-grabaci\xf3n", "Nueva-grabación", sympatric_focal_unfiltered$sound.files, useBytes = TRUE)

sympatric_focal_unfiltered$fixed.sound.files <- basename(gsub("\\\\", "/", sympatric_focal_unfiltered$fixed.sound.files, useBytes = TRUE))

mimic_data$sound.files <- gsub("Nueva-grabaci\xf3n", "Nueva-grabación", mimic_data$sound.files, useBytes = TRUE)


mimic_data$sound.files <- paste0(mimic_data$sound.files, ".wav")

mimic_data$pred_mimic <- sapply(mimic_data$sound.files, function(x) {
  sympatric_focal_unfiltered$`Common Name`[sympatric_focal_unfiltered$fixed.sound.files == x][1]
})

# mimic_data <- mimic_data[mimic_data$pred_mimic != "babbling",]

# mimic_data$pred_mimic <- ifelse(is.na(mimic_data$pred_mimic), "nocall", mimic_data$pred_mimic)

agg_pred <- aggregate(orig.sound.files ~ date + pred_mimic, mimic_data, length)


out <- lapply(unique(agg_pred$date), function(x) {

    X <- agg_pred[agg_pred$date == x, ]
    X$prop <- X$orig.sound.files/sum(X$orig.sound.files)

    if (length(unique(X$pred_mimic)) < length(unique(mimic_data$pred_mimic))) {

        new_df <- data.frame(date = X$date[1], pred_mimic = setdiff(c("Procnias tricarunculatus",
            "Pteroglossus torquatus", "Ramphastos sulfuratus", "Psarocolius montezuma", "nocall"), X$pred_mimic),
            orig.sound.files = 0, prop = 0)
  
        X <- rbind(new_df, X)
    }
    
      X$total <- sum(X$orig.sound.files)
    return(X)
})

prop_pred <- do.call(rbind, out)

prop_pred$date.fact <- as.factor(as.character(prop_pred$date))

count_date <- aggregate(orig.sound.files ~ date, prop_pred, sum)

prop_pred$pred_common_name <- common_names[as.character(prop_pred$pred_mimic)]

dat <- prop_pred[prop_pred$pred_mimic != "Psarocolius montezuma",]

dat$julian_day <- as.numeric(format(as.Date(dat$date), "%j"))


dat$pred_common_name <- common_names[as.character(dat$pred_mimic)]


dat$pred_common_name <- factor(dat$pred_common_name, levels = c( "Three-wattled Bellbird", "Collared Aracari",     "Keel-billed Toucan", "Unassigned"))

3.1 Statistical analysis

We modeled the probability of producing original sounds as a function of Julian day while allowing both intercepts and temporal slopes to vary among mimic categories (pred_mimic). Rather than fitting a full fixed interaction between date and mimic category, we used a hierarchical varying-slope structure to account for heterogeneity among mimic groups while enabling partial pooling across categories. This approach reduces overparameterization and yields more stable estimates, particularly for mimic groups with limited sample sizes, while still allowing different groups to exhibit distinct temporal trajectories.

Code
# weakly informative priors
priors <- c(
# Weakly informative logistic intercept
prior(normal(0, 2.5), class = "Intercept"),
# Temporal slope
prior(normal(0, 1), class = "b"),
# Random effect standard deviations
prior(exponential(1), class = "sd")
)

mod2 <- brm(
  orig.sound.files | trials(total) ~ 
    scale(julian_day) +
    (1 + scale(julian_day) | pred_common_name),
  family = binomial,
  data = dat,
  prior = priors,
  chains = 4,
  cores = 4,
  iter = 4000,
  control = list(adapt_delta = 0.95), file_refit = "on_change",
  file = "data/processed/brms_model_prop_pred_mimicry.rds"
)

# mod2 <- add_criterion(
#       mod2,
#       criterion = "loo")


only_prior_mod2 <- brm(
  orig.sound.files | trials(total) ~ 
    scale(julian_day) +
    (1 + scale(julian_day) | pred_common_name),
  family = binomial,
  data = dat,
  prior = priors,
  sample_prior = "only",
  chains = 4,
  cores = 4,
  iter = 4000,
  control = list(adapt_delta = 0.95), file_refit = "on_change",
  file = "data/processed/brms_model_prop_pred_mimicry_only_prior.rds"
)

3.1.1 Prior check

Priors

Code
prior_summary(mod2)
prior class coef group resp dpar nlpar lb ub tag source
normal(0, 1) b user
b scalejulian_day default
normal(0, 2.5) Intercept user
lkj_corr_cholesky(1) L default
L pred_common_name default
exponential(1) sd 0 user
sd pred_common_name default
sd Intercept pred_common_name default
sd scalejulian_day pred_common_name default

Prior predictive checks

Code
custom_ppc(only_prior_mod2, group = "vocalizing_type", ndraws = 100)

3.1.2 Fit summary

Code
fill_color <- mako(10)[8]

extended_summary(
  mod2,
  n.posterior = 1000,
  fill = fill_color,
  trace.palette = mako,
  remove.intercepts = FALSE,
  highlight = TRUE,
  print.name = FALSE
)
priors formula iterations chains thinning warmup diverg_transitions rhats > 1.05 min_bulk_ESS min_tail_ESS seed
1 b-normal(0, 1) Intercept-normal(0, 2.5) L-lkj_corr_cholesky(1) sd-exponential(1) orig.sound.files | trials(total) ~ scale(julian_day) + (1 + scale(julian_day) | pred_common_name) 4000 4 1 2000 2 (0.00025%) 0 2720.74 3719.75 263265829
Estimate l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
b_Intercept -1.704 -4.106 0.708 1 2720.74 3719.75
b_scalejulian_day -0.140 -1.097 0.816 1 3674.43 4492.18

Posterior predictive check

Code
custom_ppc(mod2, group = "vocalizing_type", ndraws = 100)

Code
# Extract posterior draws as base R data.frame
post <- as.data.frame(mod2)

# Get species levels
species_levels <- unique(mod2$data$pred_common_name)

# Global slope
beta_global <- post[, "b_scalejulian_day"]
# Find all random slope columns automatically
rand_cols <- grep("^r_pred_common_name\\[.*,scalejulian_day\\]$",
names(post),
value = TRUE)
# Extract species names from column names
species_levels <- sub("r_pred_common_name\\[(.*),scalejulian_day\\]", "\\1", rand_cols)


# Storage matrix
results <- matrix(NA, nrow = length(species_levels), ncol = 3)
colnames(results) <- c("mean_slope",
                       "l95",
                       "u95")

rownames(results) <- species_levels


# Loop through species
for (i in seq_along(rand_cols)) {

  slope_draws <- beta_global + post[, rand_cols[i]]

  results[i, "mean_slope"]    <- mean(slope_draws)
  results[i, "l95"]           <- quantile(slope_draws, 0.025)
  results[i, "u95"]           <- quantile(slope_draws, 0.975)
}

# Print results
print(round(results, 4), row.names = TRUE)
mean_slope l95 u95
Three-wattled.Bellbird 0.7808 0.6627 0.9048
Collared.Aracari -0.1110 -0.2725 0.0511
Keel-billed.Toucan -0.7156 -0.8308 -0.6031
Unassigned -0.8273 -3.2819 1.3958
Code
# Create prediction grid
newdata <- expand.grid(
  julian_day = seq(min(dat$julian_day),
                   max(dat$julian_day),
                   length.out = 100),
  pred_common_name = unique(dat$pred_common_name)
)

# IMPORTANT: match scaling used in model
newdata$scalejulian_day <- scale(dat$julian_day)[1]  # dummy to get attributes
newdata$scalejulian_day <- (newdata$julian_day - mean(dat$julian_day)) /
                           sd(dat$julian_day)

newdata$total <- round(mean(dat$total), 0)  # use mean total for predictions

# Get posterior expected probabilities
epred <- posterior_epred(
  mod2,
  newdata = newdata,
  re_formula = NULL
)

# Posterior mean prediction
newdata$fit <- colMeans(epred)

# Convert julian day back to date for plotting
origin_year <- format(as.Date(dat$date[1]), "%Y")
newdata$date <- as.Date(newdata$julian_day - 1,
                        origin = paste0(origin_year, "-01-01"))

newdata$fit_prop <- newdata$fit / 100

# include only those for which CI did not include 0
results_df <- as.data.frame(results)
sig_spp <- rownames(results_df)[results_df$l95 * results_df$u95 > 0]

newdata <- newdata[newdata$pred_common_name %in% c("Three-wattled Bellbird", "Collared Aracari",     "Keel-billed Toucan", "Unassigned"), ]


# Plot
ggplot(
  dat[dat$prop > 0, ],
  aes(
    x = as.Date(date),
    y = prop,
    fill = pred_common_name,
    col = pred_common_name,
    size = orig.sound.files
  )
) +
  geom_point(pch = 21) +
  geom_line(
    data = newdata,
    aes(x = date, y = fit_prop, color = pred_common_name),
    size = 1.2
  ) +
  scale_fill_viridis_d(alpha = 0.6, end = 0.8) +
  scale_color_viridis_d(alpha = 0.8, end = 0.8) +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5)) +
  labs(
    x = "Date",
    y = "Percentage of vocalizations\npredicted as",
    fill = "Predicted species",
    color = "Predicted species",
    size = "Number of recorded\nvocalizations"
  ) +
 scale_y_continuous(
    labels = scales::percent)

3.1.3 Summary

  • The proportion of vocalizations of its own species increased true time while the proportion for Killed-billed Toucan and unassigned calls decreased
  • Proportions for Collared Araraci did not show a definitive pattern
─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.5.2 (2025-10-31)
 os       Ubuntu 22.04.4 LTS
 system   x86_64, linux-gnu
 ui       X11
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       America/Costa_Rica
 date     2026-06-11
 pandoc   3.6.3 @ /usr/lib/rstudio/resources/app/bin/quarto/bin/tools/x86_64/ (via rmarkdown)
 quarto   1.8.25 @ /usr/lib/rstudio/resources/app/bin/quarto/bin/quarto

─ Packages ───────────────────────────────────────────────────────────────────
 package        * version    date (UTC) lib source
 abind            1.4-8      2024-09-12 [1] CRAN (R 4.5.2)
 ape              5.8-1      2024-12-16 [1] CRAN (R 4.5.2)
 arrayhelpers     1.1-0      2020-02-04 [1] CRAN (R 4.5.2)
 backports        1.5.1      2026-04-03 [1] CRAN (R 4.5.2)
 bayesplot        1.15.0     2025-12-12 [1] CRAN (R 4.5.2)
 bitops           1.0-9      2024-10-03 [1] CRAN (R 4.5.2)
 bridgesampling   1.2-1      2025-11-19 [1] CRAN (R 4.5.2)
 brio             1.1.5      2024-04-24 [1] CRAN (R 4.5.2)
 brms           * 2.23.0     2025-09-09 [1] CRAN (R 4.5.2)
 brmsish        * 1.0.0      2026-03-03 [1] Github (maRce10/brmsish@81ab826)
 Brobdingnag      1.2-9      2022-10-19 [1] CRAN (R 4.5.2)
 cachem           1.1.0      2024-05-16 [1] CRAN (R 4.5.2)
 caret          * 7.0-1      2024-12-10 [1] CRAN (R 4.5.2)
 checkmate        2.3.4      2026-02-03 [1] CRAN (R 4.5.2)
 class            7.3-23     2025-01-01 [1] CRAN (R 4.5.2)
 cli              3.6.6      2026-04-09 [1] CRAN (R 4.5.2)
 coda             0.19-4.1   2024-01-31 [1] CRAN (R 4.5.2)
 codetools        0.2-20     2024-03-31 [1] CRAN (R 4.5.2)
 cowplot        * 1.2.0      2025-07-07 [1] CRAN (R 4.5.2)
 crayon           1.5.3      2024-06-20 [1] CRAN (R 4.5.2)
 curl             7.0.0      2025-08-19 [1] CRAN (R 4.5.2)
 data.table       1.18.2.1   2026-01-27 [1] CRAN (R 4.5.2)
 devtools         2.4.6      2025-10-03 [1] CRAN (R 4.5.2)
 digest           0.6.39     2025-11-19 [1] CRAN (R 4.5.2)
 distributional   0.6.0      2026-01-14 [1] CRAN (R 4.5.2)
 doParallel     * 1.0.17     2022-02-07 [1] CRAN (R 4.5.2)
 dplyr            1.2.1      2026-04-03 [1] CRAN (R 4.5.2)
 dtw              1.23-1     2022-09-19 [1] CRAN (R 4.5.2)
 e1071            1.7-17     2025-12-18 [1] CRAN (R 4.5.2)
 ellipsis         0.3.2      2021-04-29 [3] CRAN (R 4.1.1)
 evaluate         1.0.5      2025-08-27 [1] CRAN (R 4.5.2)
 farver           2.1.2      2024-05-13 [1] CRAN (R 4.5.2)
 fastmap          1.2.0      2024-05-15 [1] CRAN (R 4.5.2)
 fftw             1.0-9      2024-09-20 [1] CRAN (R 4.5.2)
 foreach        * 1.5.2      2022-02-02 [3] CRAN (R 4.1.2)
 fs               2.0.1      2026-03-24 [1] CRAN (R 4.5.2)
 future           1.70.0     2026-03-14 [1] CRAN (R 4.5.2)
 future.apply     1.20.2     2026-02-20 [1] CRAN (R 4.5.2)
 generics         0.1.4      2025-05-09 [1] CRAN (R 4.5.2)
 ggdist           3.3.3      2025-04-23 [1] CRAN (R 4.5.2)
 ggplot2        * 4.0.2      2026-02-03 [1] CRAN (R 4.5.2)
 globals          0.19.1     2026-03-13 [1] CRAN (R 4.5.2)
 glue             1.8.0      2024-09-30 [1] CRAN (R 4.5.2)
 gower            1.0.2      2024-12-17 [1] CRAN (R 4.5.2)
 gridExtra        2.3        2017-09-09 [3] CRAN (R 4.0.1)
 gtable           0.3.6      2024-10-25 [1] CRAN (R 4.5.2)
 hardhat          1.4.2      2025-08-20 [1] CRAN (R 4.5.2)
 htmltools        0.5.9      2025-12-04 [1] CRAN (R 4.5.2)
 htmlwidgets      1.6.4      2023-12-06 [1] CRAN (R 4.5.2)
 httr             1.4.8      2026-02-13 [1] CRAN (R 4.5.2)
 inline           0.3.21     2025-01-09 [1] CRAN (R 4.5.2)
 ipred            0.9-15     2024-07-18 [1] CRAN (R 4.5.2)
 iterators      * 1.0.14     2022-02-05 [3] CRAN (R 4.1.2)
 jsonlite         2.0.0      2025-03-27 [1] CRAN (R 4.5.2)
 kableExtra     * 1.4.0      2024-01-24 [1] CRAN (R 4.5.2)
 knitr          * 1.51       2025-12-20 [1] CRAN (R 4.5.2)
 labeling         0.4.3      2023-08-29 [1] CRAN (R 4.5.2)
 lattice        * 0.22-9     2026-02-09 [1] CRAN (R 4.5.2)
 lava             1.9.0      2026-04-05 [1] CRAN (R 4.5.2)
 lifecycle        1.0.5      2026-01-08 [1] CRAN (R 4.5.2)
 listenv          0.10.1     2026-03-10 [1] CRAN (R 4.5.2)
 loo              2.9.0      2025-12-23 [1] CRAN (R 4.5.2)
 lubridate        1.9.5      2026-02-04 [1] CRAN (R 4.5.2)
 magrittr         2.0.5      2026-04-04 [1] CRAN (R 4.5.2)
 MASS             7.3-65     2025-02-28 [1] CRAN (R 4.5.2)
 Matrix           1.7-4      2025-08-28 [1] CRAN (R 4.5.2)
 matrixStats      1.5.0      2025-01-07 [1] CRAN (R 4.5.2)
 memoise          2.0.1      2021-11-26 [3] CRAN (R 4.1.2)
 ModelMetrics     1.2.2.2    2020-03-17 [3] CRAN (R 4.0.1)
 mvtnorm          1.3-3      2025-01-10 [1] CRAN (R 4.5.2)
 NatureSounds     1.0.5      2025-01-17 [1] CRAN (R 4.5.2)
 nlme             3.1-168    2025-03-31 [1] CRAN (R 4.5.2)
 nnet             7.3-20     2025-01-01 [1] CRAN (R 4.5.2)
 otel             0.2.0      2025-08-29 [1] CRAN (R 4.5.2)
 packrat          0.9.3      2025-06-16 [1] CRAN (R 4.5.2)
 parallelly       1.46.1     2026-01-08 [1] CRAN (R 4.5.2)
 pbapply          1.7-4      2025-07-20 [1] CRAN (R 4.5.2)
 pillar           1.11.1     2025-09-17 [1] CRAN (R 4.5.2)
 pkgbuild         1.4.8      2025-05-26 [1] CRAN (R 4.5.2)
 pkgconfig        2.0.3      2019-09-22 [3] CRAN (R 4.0.1)
 pkgload          1.5.1      2026-04-01 [1] CRAN (R 4.5.2)
 plyr             1.8.9      2023-10-02 [1] CRAN (R 4.5.2)
 posterior        1.6.1      2025-02-27 [1] CRAN (R 4.5.2)
 pROC             1.19.0.1   2025-07-31 [1] CRAN (R 4.5.2)
 prodlim          2026.03.11 2026-03-11 [1] CRAN (R 4.5.2)
 proxy            0.4-29     2025-12-29 [1] CRAN (R 4.5.2)
 purrr            1.2.2      2026-04-10 [1] CRAN (R 4.5.2)
 QuickJSR         1.9.0      2026-01-25 [1] CRAN (R 4.5.2)
 R6               2.6.1      2025-02-15 [1] CRAN (R 4.5.2)
 RColorBrewer     1.1-3      2022-04-03 [1] CRAN (R 4.5.2)
 Rcpp           * 1.1.1      2026-01-10 [1] CRAN (R 4.5.2)
 RcppParallel     5.1.11-2   2026-03-05 [1] CRAN (R 4.5.2)
 RCurl            1.98-1.17  2025-03-22 [1] CRAN (R 4.5.2)
 recipes          1.3.1      2025-05-21 [1] CRAN (R 4.5.2)
 remotes          2.5.0      2024-03-17 [1] CRAN (R 4.5.2)
 reshape2         1.4.5      2025-11-12 [1] CRAN (R 4.5.2)
 rjson            0.2.23     2024-09-16 [1] CRAN (R 4.5.2)
 rlang            1.2.0      2026-04-06 [1] CRAN (R 4.5.2)
 rmarkdown        2.31       2026-03-26 [1] CRAN (R 4.5.2)
 rpart            4.1.24     2025-01-07 [1] CRAN (R 4.5.2)
 Rraven         * 1.0.16     2025-10-23 [1] CRAN (R 4.5.2)
 rstan            2.32.7     2025-03-10 [1] CRAN (R 4.5.2)
 rstantools       2.6.0      2026-01-10 [1] CRAN (R 4.5.2)
 rstudioapi       0.18.0     2026-01-16 [1] CRAN (R 4.5.2)
 Rtsne          * 0.17       2023-12-07 [1] CRAN (R 4.5.2)
 S7               0.2.1      2025-11-14 [1] CRAN (R 4.5.2)
 scales           1.4.0      2025-04-24 [1] CRAN (R 4.5.2)
 seewave          2.2.4      2025-08-19 [1] CRAN (R 4.5.2)
 sessioninfo      1.2.3      2025-02-05 [1] CRAN (R 4.5.2)
 signal           1.8-1      2024-06-26 [1] CRAN (R 4.5.2)
 sketchy          1.0.7      2026-03-03 [1] CRANs (R 4.5.2)
 StanHeaders      2.32.10    2024-07-15 [1] CRAN (R 4.5.2)
 stringi          1.8.7      2025-03-27 [1] CRAN (R 4.5.2)
 stringr          1.6.0      2025-11-04 [1] CRAN (R 4.5.2)
 survival         3.8-6      2026-01-16 [1] CRAN (R 4.5.2)
 svglite          2.2.2      2025-10-21 [1] CRAN (R 4.5.2)
 svUnit           1.0.8      2025-08-26 [1] CRAN (R 4.5.2)
 systemfonts      1.3.1      2025-10-01 [1] CRAN (R 4.5.2)
 tensorA          0.36.2.1   2023-12-13 [1] CRAN (R 4.5.2)
 testthat         3.3.2      2026-01-11 [1] CRAN (R 4.5.2)
 textshaping      1.0.4      2025-10-10 [1] CRAN (R 4.5.2)
 tibble           3.3.1      2026-01-11 [1] CRAN (R 4.5.2)
 tidybayes        3.0.7      2024-09-15 [1] CRAN (R 4.5.2)
 tidyr            1.3.2      2025-12-19 [1] CRAN (R 4.5.2)
 tidyselect       1.2.1      2024-03-11 [1] CRAN (R 4.5.2)
 timechange       0.4.0      2026-01-29 [1] CRAN (R 4.5.2)
 timeDate         4052.112   2026-01-28 [1] CRAN (R 4.5.2)
 tuneR            1.4.7      2024-04-17 [1] CRAN (R 4.5.2)
 usethis          3.2.1      2025-09-06 [1] CRAN (R 4.5.2)
 vctrs            0.7.3      2026-04-11 [1] CRAN (R 4.5.2)
 viridis        * 0.6.5      2024-01-29 [1] CRAN (R 4.5.2)
 viridisLite    * 0.4.3      2026-02-04 [1] CRAN (R 4.5.2)
 warbleR          1.1.37     2025-10-22 [1] CRAN (R 4.5.2)
 withr            3.0.2      2024-10-28 [1] CRAN (R 4.5.2)
 xaringanExtra    0.8.0      2024-05-19 [1] CRAN (R 4.5.2)
 xfun             0.57       2026-03-20 [1] CRAN (R 4.5.2)
 xml2             1.5.2      2026-01-17 [1] CRAN (R 4.5.2)
 yaml             2.3.12     2025-12-10 [1] CRAN (R 4.5.2)

 [1] /home/m/R/x86_64-pc-linux-gnu-library/4.5
 [2] /usr/local/lib/R/site-library
 [3] /usr/lib/R/site-library
 [4] /usr/lib/R/library
 * ── Packages attached to the search path.

──────────────────────────────────────────────────────────────────────────────