Statistical analysis

Bellbird mimicry

Author

Published

June 11, 2026

Code

# options to customize chunk outputs
knitr::opts_chunk$set(
  message = FALSE
 )

Purpose

Evaluate similarity of vocalizations of a mimic bellbird and its model species using birdNET embeddings
- Are focal belbird vocalizations more similar to sympatric than to allopatric species?
- Are mimic vocalizations (resembling sympatric species) more common in the focal bellbird than in other bellbirds?

Analysis flowchart

flowchart LR
  A[Read birdNET output] --> B(Format data) 
  B --> C(Graphs)
  C --> D(Statistical analysis)
  D --> E(Model summary) 

style A fill:#382A5433
style B fill:#395D9C33
style C fill:#3497A933
style D fill:#60CEAC33

Load packages and custom functions

Code

# knitr is require for creating html/pdf/word reports
# formatR is used for soft-wrapping code

# install knitr package if not installed
if (!requireNamespace("sketchy", quietly = TRUE)) {
  install.packages("sketchy")
}


packages <- c(
  "knitr",
  "Rtsne",
  "caret",
  "ggplot2",
  "viridis",
  "doParallel",
  "brms",
  "brmsish",
  "cowplot",
  "Rraven",
  "kableExtra"
)

# install/ load packages
sketchy::load_packages(packages = packages)


options(knitr.kable.NA = '')

print <- function(x, row.names = FALSE) {
    kb <- kable(x, row.names = row.names, digits = 4, "html")
    kb <- kable_styling(kb,
                        bootstrap_options = c("striped", "hover", "condensed", "responsive"))
    scroll_box(kb, width = "100%")
}

common_names <- c(
  "Procnias tricarunculatus" = "Three-wattled Bellbird",
  "Psarocolius guatimozinus" = "Chestnut-headed Oropendola",
  "Psarocolius montezuma" = "Montezuma Oropendola",
  "Pteroglossus erythropygius" = "Pale-mandibled Aracari",
  "Pteroglossus torquatus" = "Collared Aracari",
  "Ramphastos brevis" = "Choco Toucan",
  "Ramphastos sulfuratus" = "Keel-billed Toucan",
  "nocall" = "Unassigned"
)

# to create several posterior predictive check plots out of a brms fit
custom_ppc <- function(fit, group = NULL, ndraws = 30) {
  by_group  <- if (!is.null(group)) {
    TRUE
  } else
    FALSE
  
  if (by_group)
    by_group  <-  if (any(names(fit$data) == group)) {
      TRUE
    } else
      FALSE
  
  if (by_group)
    by_group <-
      if (is.character(fit$data[, group]) |
          is.factor(fit$data[, group])) {
        TRUE
      } else
        FALSE
  
  
  if (by_group) {
    ppc_dens <- pp_check(fit,
                         ndraws = ndraws,
                         type = 'dens_overlay_grouped',
                         group = group)
    
    pp_mean <- pp_check(
      fit,
      type = "stat_grouped",
      stat = "mean",
      group = group,
      ndraws = ndraws
    )  + theme_classic()
    
  } else {
    ppc_dens <- pp_check(fit, ndraws = ndraws, type = 'dens_overlay')
    
    pp_mean <- pp_check(fit,
                        type = "stat",
                        stat = "mean",
                        ndraws = ndraws) + theme_classic()
    
  }
  
  pp_plot_list <-
    list(ppc_dens, pp_mean)
  
  pp_plot_list <-
    lapply(pp_plot_list, function(x)
      x   + scale_fill_viridis_d(
        begin = 0.3,
        end = 0.8,
        alpha = 0.5,
        option = "mako"
      ) + theme_classic())
  
  
  ppc_plot <- plot_grid(plotlist = pp_plot_list, ncol = 2)
 
   ppc_plot
}

1 Sympatric and allopatric species

1.1 BirdNET model

Code

mod_eval_symp_allo <- read.csv("./data/processed/birdnet_models/sympatric_and_allopatric/sympatric_and_allopatric_evaluation.csv")

names(mod_eval_symp_allo) <- gsub("..", " ", names(mod_eval_symp_allo), fixed = TRUE)

print(mod_eval_symp_allo)

Class	Precision 0.5.	Recall 0.5.	F1.Score 0.5.	Precision opt.	Recall opt.	F1.Score opt.	AUPRC	AUROC	Optimal.Threshold	True.Positives	False.Positives	True.Negatives	False.Negatives	Samples	Percentage
OVERALL (Macro-avg)	0.8908	0.9627	0.9232	0.9180	0.9504	0.9312	0.9630	0.9968
Procnias tricarunculatus	0.9712	0.9854	0.9783	1.0000	0.9708	0.9852	0.9990	0.9998	0.80	133	0	884	4	137	13.42
Psarocolius guatimozinus	0.8909	0.9608	0.9245	0.9423	0.9608	0.9515	0.9867	0.9992	0.55	49	3	967	2	51	5.00
Psarocolius montezuma	0.9014	0.9846	0.9412	0.9538	0.9538	0.9538	0.9873	0.9991	0.75	62	3	953	3	65	6.37
Pteroglossus erythropygius	0.8943	0.9486	0.9206	0.9336	0.9206	0.9271	0.9844	0.9959	0.70	197	14	793	17	214	20.96
Pteroglossus torquatus	0.6216	0.8679	0.7244	0.6216	0.8679	0.7244	0.7844	0.9842	0.50	46	28	940	7	53	5.19
Ramphastos brevis	0.9840	0.9946	0.9892	0.9840	0.9946	0.9892	0.9995	0.9999	0.50	184	3	833	1	185	18.12
Ramphastos sulfuratus	0.9722	0.9968	0.9844	0.9904	0.9842	0.9873	0.9994	0.9997	0.80	311	3	702	5	316	30.95

Keeping low confidence classifications (confidence < optimal threshold)

Code

# read data
sympatric_allopatric_class <- imp_raven(
  path = "./data/processed/birdnet_classification_outputs/sympatrics_and_allopactrics/sympatrics_and_allopatrics_model/",
  all.data = TRUE,
  warbler.format = TRUE,
  pb = FALSE
)

# keep only test data
test_files <- list.files("/home/m/Dropbox/Projects/bellbird_mimicry/data/processed/birdnet_data/test_data/", full.names = FALSE, recursive = TRUE)

test_files <- basename(gsub("\\\\", "/", test_files))


sympatric_allopatric_class$sound.files <- basename(gsub("\\\\", "/", sympatric_allopatric_class$sound.files))

# all(test_files %in% sympatric_allopatric_class$sound.files)
# 
# setdiff(test_files, sympatric_allopatric_class$sound.files)
 
 
sympatric_allopatric_test <- sympatric_allopatric_class[sympatric_allopatric_class$sound.files %in% test_files, ] 

# filter low confidence classifications
sympatric_allopatric_test$true.positive <- sapply(seq_len(nrow(sympatric_allopatric_test)), function(x){
    sympatric_allopatric_test$Confidence[x] >= mod_eval_symp_allo$Optimal.Threshold[mod_eval_symp_allo$Class == sympatric_allopatric_test$`Common Name`[x]]
    })

sympatric_allopatric_test$`Common Name`[!sympatric_allopatric_test$true.positive] <- "nocall"


# rename column
sympatric_allopatric_test$predicted_sp <- sympatric_allopatric_test$`Common Name`

sympatric_allopatric_test$`Common Name` <- NULL

# read metadata
est <- readRDS(
  "./data/processed/extended_selection_table_sympatric_and_allopatric_species.RDS"
)

# add metadata
sympatric_allopatric_test$sound.files <- basename(gsub("\\\\", "/", sympatric_allopatric_test$sound.files))



sympatric_allopatric_test$vocalizing_sp <- sapply(sympatric_allopatric_test$sound.files, function(x) {
  est$source[paste0(est$sound.files, ".wav") == x][1]
})


# save confusion matrix
conf_mat <- confusionMatrix(
  data = as.factor(sympatric_allopatric_test$predicted_sp),
  reference = factor(sympatric_allopatric_test$vocalizing_sp, levels = levels(as.factor(sympatric_allopatric_test$predicted_sp))
)
)

conf_df_fp <- as.data.frame(conf_mat$table)

conf_df_fp$total <- sapply(conf_df_fp$Reference, function(x) {
  sum(sympatric_allopatric_test$vocalizing_sp == x)
})

conf_df_fp$Proportion <- conf_df_fp$Freq / conf_df_fp$total

conf_df_fp$pred_common_name <- common_names[as.character(conf_df_fp$Prediction)]
conf_df_fp$ref_common_name <- common_names[as.character(conf_df_fp$Reference)]

# sort levels by genus
conf_df_fp$pred_common_name <- factor(
  conf_df_fp$pred_common_name,
  levels = c(
    "Three-wattled Bellbird",
    "Chestnut-headed Oropendola",
    "Montezuma Oropendola",
    "Collared Aracari",
    "Pale-mandibled Aracari",
    "Keel-billed Toucan",
    "Choco Toucan",
    "Unassigned"
  )
)

conf_df_fp$ref_common_name <- factor(
  conf_df_fp$ref_common_name,
  levels = c(
    "Three-wattled Bellbird",
    "Chestnut-headed Oropendola",
    "Montezuma Oropendola",
    "Collared Aracari",
    "Pale-mandibled Aracari",
    "Keel-billed Toucan",
    "Choco Toucan",
    "Unassigned"
  )
)

ggplot(conf_df_fp[conf_df_fp$ref_common_name != "Unassigned", ],
       aes(x = ref_common_name, y = pred_common_name, fill = Proportion)) +
    geom_tile() +
    theme_bw() +
    coord_equal() +
    scale_fill_gradient2(low = mako(10)[3], high = mako(10)[7]) +
    labs(x = "Reference species", y = "Predicted species") +
    geom_text(aes(label = round(Proportion, 2)), color = "black") +
    theme_classic() +
    theme(axis.text.x = element_text(
        color = "black",
        size = 11,
        angle = 30,
        hjust = 1
    ))

Excluding low confidence classifications (confidence >= optimal threshold)

Code

sympatric_allopatric_test_tp <- sympatric_allopatric_test[sympatric_allopatric_test$true.positive, ]

# save confusion matrix
conf_mat <- confusionMatrix(
  data = as.factor(sympatric_allopatric_test_tp$predicted_sp),
  reference = factor(sympatric_allopatric_test_tp$vocalizing_sp, levels = levels(as.factor(sympatric_allopatric_test_tp$predicted_sp))
)
)

conf_df <- as.data.frame(conf_mat$table)

conf_df$total <- sapply(conf_df$Reference, function(x) {
  sum(sympatric_allopatric_test_tp$vocalizing_sp == x)
})

conf_df$Proportion <- conf_df$Freq / conf_df$total

conf_df$pred_common_name <- common_names[as.character(conf_df$Prediction)]
conf_df$ref_common_name <- common_names[as.character(conf_df$Reference)]

# sort levels by genus
conf_df$pred_common_name <- factor(
  conf_df$pred_common_name,
  levels = c(
    "Three-wattled Bellbird",
    "Chestnut-headed Oropendola",
    "Montezuma Oropendola",
    "Collared Aracari",
    "Pale-mandibled Aracari",
    "Keel-billed Toucan",
    "Choco Toucan",
    "Unassigned"
  )
)

conf_df$ref_common_name <- factor(
  conf_df$ref_common_name,
  levels = c(
    "Three-wattled Bellbird",
    "Chestnut-headed Oropendola",
    "Montezuma Oropendola",
    "Collared Aracari",
    "Pale-mandibled Aracari",
    "Keel-billed Toucan",
    "Choco Toucan",
    "Unassigned"
  )
)

ggplot(conf_df, aes(x = ref_common_name, y = pred_common_name, fill = Proportion)) +
  geom_tile() +
  theme_bw() +
  coord_equal() +
  scale_fill_gradient2(low = mako(10)[3], high = mako(10)[7]) +
  labs(x = "Reference species", y = "Predicted species") +
  geom_text(aes(label = round(Proportion, 2)), color = "black") +
  theme_classic() +
  theme(
    axis.text.x = element_text(
      color = "black",
      size = 11,
      angle = 30,
      hjust = 1
    )
  )

1.1.1 Embeddings

Code

embs <- read.csv("./data/processed/embeddings/embeddings_sympatric_allopatric_spp.csv")

# Split into separate columns
emb_matrix <- do.call(rbind, strsplit(embs$embedding, ","))
colnames(emb_matrix) <- paste0("emb", 1:ncol(emb_matrix))
emb_matrix <- apply(emb_matrix, 2, as.numeric)

# Combine with the first 3 columns
embs <- as.data.frame(cbind(embs[, 1:3], emb_matrix))

# add metadata
embs$file_path <- gsub("Nueva-grabaci\xf3n", "Nueva-grabación", embs$file_path, useBytes = TRUE)
embs$sound.files <- basename(gsub("\\\\", "/", embs$file_path, useBytes = TRUE))

embs$vocalizing_sp <- sapply(embs$sound.files, function(x) {
  est$source[paste0(est$sound.files, ".wav") == x][1]
})

set.seed(123)
tsne_out <- Rtsne(as.matrix(embs[, grep("emb", names(embs))]))

tsne_all <- data.frame(vocalizing_sp = embs$vocalizing_sp, tsne1 = tsne_out$Y[,1], tsne2 = tsne_out$Y[,2])

saveRDS(tsne_all, "./data/processed/tsne_dims_sympatric_and_allopatric_species.rds")

Code

# colors and shapes by species for scatterplots
cols <- c(adjustcolor("tomato", alpha.f = 0.4), mako(4, alpha = 0.4, begin = 0.2, end = 0.8))
cols2 <- c(adjustcolor("tomato", alpha.f = 0.9), mako(4, alpha = 0.9, begin = 0.2, end = 0.8))

cols <- c(cols[1:2], rep(cols[3:5], each = 2))
cols2 <- c(cols2[1:2], rep(cols2[3:5], each = 2))

shapes <- c(16, 16, 16, 1, 17, 2, 15, 0)

names(shapes) <- names(cols) <- names(cols2) <- c(
  "mimic_bellbird",
  "Three-wattled Bellbird",
  "Montezuma Oropendola",
  "Chestnut-headed Oropendola",
  "Collared Aracari",
  "Pale-mandibled Aracari",
  "Keel-billed Toucan",
  "Choco Toucan"
)

tsne_all <- readRDS("./data/processed/tsne_dims_sympatric_and_allopatric_species.rds")

tsne_only_model_species <- tsne_all[tsne_all$vocalizing_sp != "mimic_bellbird", ]

tsne_only_model_species$common_name <- common_names[as.character(tsne_only_model_species$vocalizing_sp)] 

tsne_only_model_species$common_name <- factor(
  tsne_only_model_species$common_name,
  levels = c(
    "Three-wattled Bellbird",
    "Chestnut-headed Oropendola",
    "Montezuma Oropendola",
    "Collared Aracari",
    "Pale-mandibled Aracari",
    "Keel-billed Toucan",
    "Choco Toucan"
  )
)

ggplot(tsne_only_model_species, aes(x = tsne1, y = tsne2, color = common_name, shape = common_name)) +
  geom_point(size = 2) +
  theme_minimal() +
  labs(x = "t-SNE 1", y = "t-SNE 2", color = "", shape = "") +
    scale_color_manual(values = cols[names(cols) %in% unique(tsne_only_model_species$common_name)]) +
    scale_shape_manual(values = shapes[names(shapes) %in% unique(tsne_only_model_species$common_name)]) +
    theme_classic()

1.1.2 Focal bellbird classification

Code

# read data
mimic_class <- imp_raven(
  path = "./data/processed/birdnet_classification_outputs/mimic_bellbird/sympatrics_and_allopatrics_model/",
  all.data = TRUE,
  warbler.format = TRUE,
  pb = FALSE
)

# filter low confidence classifications
mimic_class$true.positive <- sapply(seq_len(nrow(mimic_class)), function(x){
    mimic_class$Confidence[x] >= mod_eval_symp_allo$Optimal.Threshold[mod_eval_symp_allo$Class == mimic_class$`Common Name`[x]]
    })

mimic_class$`Common Name`[!mimic_class$true.positive] <- "nocall"



# add metadata
mimic_class$sound.files <- basename(gsub(
  "\\\\",
  "/",
  mimic_class$sound.files
))

# rename column
mimic_class$predicted_sp <- mimic_class$`Common Name`
mimic_class$vocalizing_sp <- "Procnias tricarunculatus"

mimic_class$`Common Name` <- NULL

prop_mimic_class <- table(mimic_class$predicted_sp)

prop_mimic_class <- data.frame(prop_mimic_class)
names(prop_mimic_class) <- c("Prediction", "Freq")
prop_mimic_class$total <- sum(prop_mimic_class$Freq)
prop_mimic_class$Proportion <- prop_mimic_class$Freq / prop_mimic_class$total
prop_mimic_class$Reference <- "Focal\nbellbird"

others <- conf_df_fp[conf_df_fp$Reference == "Procnias tricarunculatus", ]
others$Reference <- "Other\nbellbirds"

prop_mimic_psarocolius <- prop_mimic_class[prop_mimic_class$Prediction == "Psarocolius montezuma",]

prop_mimic_psarocolius$Freq <- 0
prop_mimic_psarocolius$Proportion <- 0

prop_mimic_psarocolius$Prediction <- "Psarocolius guatimozinus" 


# proportions observed in adult bellbird
prop_class <- rbind(
  prop_mimic_class,
  prop_mimic_psarocolius,
  others[, c("Prediction", "Freq", "total", "Proportion", "Reference")]
)




prop_class$genus <- sapply(
  strsplit(as.character(prop_class$Prediction), " "),
  "[[",
  1
)

prop_class$type <- ifelse(
  grepl("guati|eryth|brevis", prop_class$Prediction),
  "Allopatric",
  "Sympatric"
)

prop_class$type[prop_class$genus == "Procnias"] <- "Own\nspecies"

prop_class$type <- factor(
  prop_class$type,
  levels = c("Allopatric", "Sympatric", "Own\nspecies")
)

prop_class$genus <- factor(
  prop_class$genus,
  levels = c("Procnias", "Pteroglossus", "Psarocolius", "Ramphastos")
)

prop_class$Reference <- factor(
  prop_class$Reference,
  levels = c("Other\nbellbirds", "Focal\nbellbird")
)


genus_labels <- c(
  "Procnias" = "Three-wattled bellbird",
  "Psarocolius" = "Oropendolas",
  "Pteroglossus" = "Aracaris",
  "Ramphastos" = "Toucans"
)

prop_class$common_name <- common_names[as.character(prop_class$Prediction)]

prop_class$common_name <- gsub("-", "\n", prop_class$common_name)


prop_class$sp_label <- sapply(
  strsplit(as.character(prop_class$common_name), " "),
  "[[",
  1
)

prop_class$genus[prop_class$sp_label == "Unassigned"] <- "Procnias"

prop_class$sp_label[prop_class$sp_label == "Three\nwattled" & prop_class$sp_label == "Unassigned"] <- ""


prop_class$sp_label_y <- ave(
  prop_class$Proportion,
  prop_class$genus,
  FUN = function(x) {
    x + log10(max(x) + 0.999) * 0.44
  }
)

Code

# settings

base_size <- 14

percent_text_size <- 4
species_text_size <- 4


# GENERA TO PLOT

loop_df <- data.frame(
  gen = c(
    "Procnias",
    "Pteroglossus",
    "Psarocolius",
    "Ramphastos"
  ),
  label = c("A", "B", "C", "D"),
  y_max = c(
  1.3,
  0.057,
  0.004,
  0.16
 )
)


prop_class <- prop_class[order(prop_class$genus, prop_class$Reference),]

prop_class$sp_label_y[prop_class$genus == "Procnias"] <- c(
   0.4, 1.03, 0.9, 0.4
) - 0.05


prop_class$sp_label_y[prop_class$genus == "Pteroglossus"] <- c(
    0.01, 0.01, 0.019, 0.047
) - 0.003

prop_class$sp_label_y[prop_class$genus == "Psarocolius"] <- c(
    0.0004, 0.0014, 0.00255, 0.0004
) + 0.0001
    

prop_class$sp_label_y[prop_class$genus == "Ramphastos"] <- c(
    0.026, 0.02, 0.02, 0.13
) + 0.006

bar_cols <- c("gray", mako(10, alpha = 0.6, begin = 0.2, end = 0.8)[c(2, 8)])

prop_class$prop_label <- paste(
  round(prop_class$Proportion * 100, 1),
  "%\n n = ",
 prop_class$Freq
)

# PANEL FUNCTION
make_panel <- function(gen, label, y_max){

  # subset data
  dat_sub <- subset(prop_class, genus == gen)

  if(gen == "Procnias"){

  dat_sub$type <- factor(
    dat_sub$type,
    levels = c(
        "Own\nspecies",
      "Allopatric",
      "Sympatric"
    )
  )

  }
  
  # exactly 3 tick marks
  y_breaks <- c(
    0,
    y_max / 2,
    y_max
  )

  
    if(gen == "Procnias"){

  fill_scale <- scale_fill_manual(
    name = "",
    values = c(
      "Own\nspecies" = "#28192FB3",
      "Allopatric" = "#28192FB3",     # not used
      "Sympatric" = "#DEF5E5B3"      # bellbird color
    ),
    breaks = "Sympatric"
  )

} else {

  fill_scale <- scale_fill_manual(
    name = "",
    values = c(
      "Own\nspecies" = bar_cols[1],
      "Allopatric" = bar_cols[2],
      "Sympatric" = bar_cols[3]
    ),
    breaks = c(
      "Allopatric",
      "Sympatric"
    )
  )

}
  
  # y-axis formatting
  if(gen == "Psarocolius"){

    y_scale <- scale_y_continuous(
      limits = c(0, y_max),
      breaks = y_breaks,
      labels = function(x) sprintf("%.1f%%", x * 100)
    )

  } else {

    y_scale <- scale_y_continuous(
      limits = c(0, y_max),
      breaks = y_breaks,
      labels = scales::percent
    )

  }

  ggplot(dat_sub, aes(x = Reference, y = Proportion)) +

    geom_bar(
      aes(
        group = interaction(Reference, type),
        fill = type
      ),
      position = position_dodge(),
      stat = "identity"
    ) +

    # percentages
    geom_text(
        lineheight = 0.7,
      aes(
        group = interaction(Reference, type),
        label = dat_sub$prop_label,
      ),
      position = position_dodge(width = 0.9),
      vjust = -0.1,
      size = percent_text_size
    ) +

    # species labels
    geom_text(
      aes(
        group = interaction(Reference, type),
        label = sp_label,
        y = sp_label_y
      ),
      position = position_dodge(width = 0.9),
      angle = 90,
      hjust = 0,
      vjust = 0.5,
      size = species_text_size
    ) +

    facet_wrap(
      ~ genus,
      labeller = labeller(genus = genus_labels)
    ) +

    y_scale +
      fill_scale +

    labs(
      title = label,
      x = NULL
    ) +

    theme_classic(base_size = base_size) +

    theme(

      # legend only in Pteroglossus panel
      legend.position = if(gen == "Pteroglossus"){
        c(0.3, 0.8)
      } else {
        "none"
      },

      legend.background = element_rect(
        fill = "white",
        color = NA
      ),

      axis.title.y = element_blank(),

      panel.grid.major.x = element_blank(),

      strip.background = element_rect(
        fill = "white",
        color = "black"
      ),

      strip.text = element_text(
        size = base_size
      ),

      axis.text.x = element_text(
        size = base_size
      ),

      axis.text.y = element_text(
        size = base_size
      ),

      legend.text = element_text(
        size = base_size
      ),

      plot.title = element_text(
        face = "bold",
        hjust = 0
      ),

      axis.line = element_line(
        linewidth = 0.8
      ),

      axis.ticks = element_line(
        linewidth = 0.8
      )
    )
}


# CREATE PANELS

plot_list <- lapply(
  seq_len(nrow(loop_df)),
  function(i){

    make_panel(
      gen = loop_df$gen[i],
      label = loop_df$label[i],
      y_max = loop_df$y_max[i]
    )

  }
)


# COMBINE PANELS

combined_plot <- plot_grid(
  plotlist = plot_list,
  ncol = 2,
  align = "hv"
)

# ADD SHARED Y-AXIS TITLE

# SHARED Y-AXIS LABEL

y_label <- ggdraw() +
  draw_label(
    "Percentage of vocalizations predicted as",
    angle = 90,
    size = base_size + 2
  )


# FINAL PLOT

final_plot <- plot_grid(
  y_label,
  combined_plot,
  ncol = 2,
  rel_widths = c(0.06, 1)
)


if (!isTRUE(getOption('knitr.in.progress'))) {
ggsave(
  filename = "./scripts/focal_vs_other_bellbirds_prediction.png",
  plot = final_plot,
  width = 10,
  height = 8,
  dpi = 300,
  device = grDevices::png
  ) 
final_plot    
}

1.1.3 Statistical analysis

Evaluate whether the proportion of vocalizations predicted as sympatric (model) species vs allopatric species is significantly higher in the focal bellbird than in other bellbirds

Code

mimic_other_bellbirds <- sympatric_allopatric_test[sympatric_allopatric_test$predicted_sp != "Procnias tricarunculatus", ]

mimic_focal <- mimic_class[mimic_class$predicted_sp != "Procnias tricarunculatus", ]

mimic_focal$vocalizing_type <- "focal"
mimic_other_bellbirds$vocalizing_type <- "others"

mimic_bellbirds <- rbind(
  mimic_focal[, c("predicted_sp", "vocalizing_type")],
  mimic_other_bellbirds[, c("predicted_sp", "vocalizing_type")]
)

mimic_bellbirds$vocalizing_type <- factor(
  mimic_bellbirds$vocalizing_type,
  levels = c("others", "focal")
)

mimic_bellbirds$mimic_type <- 
ifelse(
  grepl("guati|eryth|brevis", mimic_bellbirds$predicted_sp),
  0,
  1
)

mimic_bellbirds$genus <- sapply(
  strsplit(as.character(mimic_bellbirds$predicted_sp), " "),
  "[[",
  1
)

cores <- 4
chains <- 4
iter <- 4000

priors <- c(
  prior(normal(0, 1.5), class = "Intercept"),
  prior(normal(0, 0.7), class = "b"),
  prior(exponential(1), class = "sd")
)

only_prior_fit  <- brm(
  mimic_type ~ vocalizing_type + (1 | genus),
  data = mimic_bellbirds[mimic_bellbirds$predicted_sp != "nocall",],
  family = bernoulli(),
  prior = priors,
  sample_prior = "only",
  chains = chains,
  iter = iter,
  cores = cores,
          file = "./data/processed/mimicry_prob_by_simpatry_model_only_priors.RDS", 
  file_refit = "on_change"
)


mod1 <- brm(
  mimic_type ~ vocalizing_type  + (1 | genus),
  data = mimic_bellbirds[mimic_bellbirds$predicted_sp != "nocall",],
  family = bernoulli(),
  cores = cores,
    prior = priors,
        chains = chains,
        iter = iter,
        file = "./data/processed/mimicry_prob_by_simpatry_model.RDS", 
  file_refit = "on_change"
)

1.1.3.1 Prior check

Priors

Code

prior_summary(mod1)

prior	class	coef	group	lb	source
normal(0, 0.7)	b				user
	b	vocalizing_typefocal			default
normal(0, 1.5)	Intercept				user
exponential(1)	sd			0	user
	sd		genus		default
	sd	Intercept	genus		default

Prior predictive checks

Code

custom_ppc(only_prior_fit, group = "vocalizing_type", ndraws = 100)

1.1.3.2 Fit summary

Code

fill_color <- mako(10)[8]

extended_summary(
  mod1,
  n.posterior = 1000,
  fill = fill_color,
  trace.palette = mako,
  remove.intercepts = FALSE,
  highlight = TRUE,
  print.name = FALSE
)

	priors	formula	iterations	chains	thinning	warmup	diverg_transitions	rhats > 1.05	min_bulk_ESS	min_tail_ESS	seed
1	b-normal(0, 0.7) Intercept-normal(0, 1.5) sd-exponential(1)	mimic_type ~ vocalizing_type + (1 \| genus)	4000	4	1	2000	2 (0.00025%)	0	2382.72	3132.15	637637388

	Estimate	l-95% CI	u-95% CI	Rhat	Bulk_ESS	Tail_ESS
b_Intercept	-0.713	-2.137	0.502	1.001	2382.72	3132.15
b_vocalizing_typefocal	3.153	2.781	3.528	1	4738.91	4324.30

Posterior predictive check

Code

custom_ppc(mod1, group = "vocalizing_type", ndraws = 100)

Code

post <- as_draws_df(mod1)

# Odds ratio: focal vs other birds
focal_vs_others <- exp(post$b_vocalizing_typefocal)

# Predicted probability of sympatric mimicry in focal birds
p_focal <- posterior_epred(
  mod1,
  newdata = data.frame(
    vocalizing_type = "focal",
    genus = unique(mimic_bellbirds$genus)[1]
  ),
  re_formula = NA
)

p_focal <- as.numeric(p_focal)

# Predicted probability of sympatric mimicry in other birds
p_others <- posterior_epred(
  mod1,
  newdata = data.frame(
    vocalizing_type = "others",
    genus = unique(mimic_bellbirds$genus)[1]
  ),
  re_formula = NA
)

p_others <- as.numeric(p_others)

# Within-group sympatric vs allopatric odds
focal_ratio <- p_focal / (1 - p_focal)
other_ratio <- p_others / (1 - p_others)

1.1.3.3 Summary

Focal bellbird vocalizations had 23.43 times greater odds [95% UI: 16.14–34.04] of being classified as the sympatric rather than the allopatric model species compared with vocalizations from other bellbirds (predicted probability of sympatric classification for focal birds = 0.92; for other bellbirds = 0.34).
Within focal bellbird vocalizations, the odds of classification as the sympatric model species were 11.84 times greater [95% UI: 2.77–40.39] than the odds of classification as closely related allopatric species (predicted probability of sympatric classification for focal birds = 0.92; predicted probability of allopatric classification for focal birds = 0.08).
For vocalizations from other bellbirds, the odds of classification as sympatric and allopatric species were similar (odds = 0.51 [95% UI: 0.12–1.65]; predicted probability of sympatric classification for other bellbirds = 0.34; predicted probability of allopatric classification for other bellbirds = 0.66).

2 Only sympatric species

2.1 BirdNET model

Code

mod_eval_symp <- read.csv("./data/processed/birdnet_models/sympatric/sympatric_evaluation.csv")

names(mod_eval_symp) <- gsub("..", " ", names(mod_eval_symp), fixed = TRUE)

print(mod_eval_symp)

Class	Precision 0.5.	Recall 0.5.	F1.Score 0.5.	Precision opt.	Recall opt.	F1.Score opt.	AUPRC	AUROC	Optimal.Threshold	True.Positives	False.Positives	True.Negatives	False.Negatives	Samples	Percentage
OVERALL (Macro-avg)	0.9884	0.9908	0.9896	0.9939	0.9890	0.9914	0.9994	0.9998
Procnias tricarunculatus	0.9783	0.9854	0.9818	1.0000	0.9781	0.9889	0.9994	0.9998	0.60	134	0	434	3	137	23.99
Psarocolius montezuma	0.9848	1.0000	0.9924	0.9848	1.0000	0.9924	0.9985	0.9998	0.15	65	1	505	0	65	11.38
Pteroglossus torquatus	1.0000	0.9811	0.9905	1.0000	0.9811	0.9905	0.9997	1.0000	0.15	52	0	518	1	53	9.28
Ramphastos sulfuratus	0.9906	0.9968	0.9937	0.9906	0.9968	0.9937	0.9998	0.9998	0.50	315	3	252	1	316	55.34

Keeping low confidence classifications (confidence < optimal threshold)

Code

# read data
sympatric_class <- imp_raven(
  path = "./data/processed/birdnet_classification_outputs/sympatrics/sympatrics_model/",
  all.data = TRUE,
  warbler.format = TRUE,
  pb = FALSE
)


# keep only test data
test_files <- list.files("/home/m/Dropbox/Projects/bellbird_mimicry/data/processed/birdnet_data/test_data/", full.names = FALSE, recursive = TRUE)

test_files <- basename(gsub("\\\\", "/", test_files))


sympatric_class$sound.files <- basename(gsub("\\\\", "/", sympatric_class$sound.files))

# all(sympatric_class$sound.files %in% test_files)

# setdiff(test_files, sympatric_class$sound.files)

# setdiff(sympatric_class$sound.files, test_files)

sympatric_class_test <- sympatric_class[sympatric_class$sound.files %in% test_files, ] 


sympatric_class_test$true.positive <- sapply(seq_len(nrow(sympatric_class_test)), function(x){
    sympatric_class_test$Confidence[x] >= mod_eval_symp$Optimal.Threshold[mod_eval_symp$Class == sympatric_class_test$`Common Name`[x]]
    })
    
sympatric_class_test$`Common Name`[!sympatric_class_test$true.positive] <- "nocall"


# rename column
sympatric_class_test$predicted_sp <- sympatric_class_test$`Common Name`

sympatric_class_test$`Common Name` <- NULL

# read metadata
est <- readRDS(
  "./data/processed/extended_selection_table_sympatric_and_allopatric_species.RDS"
)

# add metadata
sympatric_class_test$sound.files <- basename(gsub("\\\\", "/", sympatric_class_test$sound.files))


sympatric_class_test$vocalizing_sp <- sapply(sympatric_class_test$sound.files, function(x) {
  est$source[paste0(est$sound.files, ".wav") == x][1]
})


sympatric_class_test <- sympatric_class_test[sympatric_class_test$vocalizing_sp %in% unique(sympatric_class_test$predicted_sp),]

# save confusion matrix
conf_mat <- confusionMatrix(
  data = as.factor(sympatric_class_test$predicted_sp),
  reference = factor(sympatric_class_test$vocalizing_sp, levels = levels(as.factor(sympatric_class_test$predicted_sp)))
)

conf_df <- as.data.frame(conf_mat$table)

conf_df$total <- sapply(conf_df$Reference, function(x) {
  sum(sympatric_class_test$vocalizing_sp == x)
})

conf_df$Proportion <- conf_df$Freq / conf_df$total

conf_df$pred_common_name <- common_names[as.character(conf_df$Prediction)]
conf_df$ref_common_name <- common_names[as.character(conf_df$Reference)]

# sort levels by genus
conf_df$pred_common_name <- factor(
  conf_df$pred_common_name,
  levels = c(
    "Three-wattled Bellbird",
    "Montezuma Oropendola",
    "Collared Aracari",
    "Keel-billed Toucan",
    "Unassigned"
  )
)

conf_df$ref_common_name <- factor(
  conf_df$ref_common_name,
  levels = c(
    "Three-wattled Bellbird",
    "Montezuma Oropendola",
    "Collared Aracari",
    "Keel-billed Toucan",
    "Unassigned"
  )
)

ggplot(conf_df[conf_df$ref_common_name != "Unassigned",], aes(x = ref_common_name, y = pred_common_name, fill = Proportion)) +
  geom_tile() +
  theme_bw() +
  coord_equal() +
  scale_fill_gradient2(low = mako(10)[3], high = mako(10)[7]) +
  labs(x = "Reference species", y = "Predicted species") +
  geom_text(aes(label = round(Proportion, 2)), color = "black") +
  theme_classic() +
  theme(
    axis.text.x = element_text(
      color = "black",
      size = 11,
      angle = 30,
      hjust = 1
    )
  )

Excluding low confidence classifications (confidence >= optimal threshold)

Code

# save confusion matrix
conf_mat <- confusionMatrix(
  data = as.factor(sympatric_class_test$predicted_sp[sympatric_class_test$true.positive]),
  reference = factor(sympatric_class_test$vocalizing_sp[sympatric_class_test$true.positive], levels = levels(as.factor(sympatric_class_test$predicted_sp[sympatric_class_test$true.positive])))
)

conf_df <- as.data.frame(conf_mat$table)

conf_df$total <- sapply(conf_df$Reference, function(x) {
  sum(sympatric_class_test$vocalizing_sp[sympatric_class_test$true.positive] == x)
})

conf_df$Proportion <- conf_df$Freq / conf_df$total

conf_df$pred_common_name <- common_names[as.character(conf_df$Prediction)]
conf_df$ref_common_name <- common_names[as.character(conf_df$Reference)]

# sort levels by genus
conf_df$pred_common_name <- factor(
  conf_df$pred_common_name,
  levels = c(
    "Three-wattled Bellbird",
    "Montezuma Oropendola",
    "Collared Aracari",
    "Keel-billed Toucan"
  )
)

conf_df$ref_common_name <- factor(
  conf_df$ref_common_name,
  levels = c(
    "Three-wattled Bellbird",
    "Montezuma Oropendola",
    "Collared Aracari",
    "Keel-billed Toucan"
  )
)

ggplot(conf_df, aes(x = ref_common_name, y = pred_common_name, fill = Proportion)) +
  geom_tile() +
  theme_bw() +
  coord_equal() +
  scale_fill_gradient2(low = mako(10)[3], high = mako(10)[7]) +
  labs(x = "Reference species", y = "Predicted species") +
  geom_text(aes(label = round(Proportion, 2)), color = "black") +
  theme_classic() +
  theme(
    axis.text.x = element_text(
      color = "black",
      size = 11,
      angle = 30,
      hjust = 1
    )
  )

Code

# read data
sympatric_focal <- imp_raven(
  path = "./data/processed/birdnet_classification_outputs/mimic_bellbird/sympatric_model/",
  all.data = TRUE,
  warbler.format = TRUE,
  pb = FALSE
)

# filter out low confidence calls
sympatric_focal$true.positive <- sapply(seq_len(nrow(sympatric_focal)), function(x){
    if (sympatric_focal$`Common Name`[x] == "nocall")
        return(FALSE)
        
    sympatric_focal$Confidence[x] >= mod_eval_symp$Optimal.Threshold[mod_eval_symp$Class == sympatric_focal$`Common Name`[x]]
    })



sympatric_focal$Reference <- "Focal\nbellbird"

sympatric_focal_unfiltered <- sympatric_focal

sympatric_focal$`Common Name`[!sympatric_focal$true.positive] <- "nocall"

# rename column
sympatric_focal$predicted_sp <- sympatric_focal$`Common Name`

sympatric_focal$`Common Name` <- NULL

# get other bellbirds from previous data
sympatric_others <- sympatric_class_test[sympatric_class_test$vocalizing_sp == "Procnias tricarunculatus", ]

sympatric_others$vocalizing_sp <- NULL

sympatric_others$Reference <- "Other\nbellbirds"

sympatric <- rbind(
  sympatric_focal,
  sympatric_others
)

prop_sympatric <- table(sympatric$predicted_sp, sympatric$Reference)

prop_sympatric <- data.frame(prop_sympatric)
names(prop_sympatric) <- c("Prediction", "Vocalizing", "Freq")

# add Psarocolius
# prop_sympatric2 <- prop_sympatric[3:4, ]
# prop_sympatric2$Prediction <- "Psarocolius montezuma"
# prop_sympatric2$Freq <- 0

# prop_sympatric <- rbind(prop_sympatric, prop_sympatric2)

prop_sympatric$total <- sapply(prop_sympatric$Vocalizing, function(x) sum(prop_sympatric$Freq[prop_sympatric$Vocalizing == x]))

prop_sympatric$Proportion <- prop_sympatric$Freq / prop_sympatric$total

prop_sympatric$common_name <- common_names[as.character(
  prop_sympatric$Prediction
)]

# order levels
prop_sympatric$common_name <- factor(
  prop_sympatric$common_name,
  levels = c(
    "Three-wattled Bellbird",
    "Collared Aracari",
    "Keel-billed Toucan",
    "Montezuma Oropendola",
    "Unassigned"
  )
)

Code

# prepare data

prop_sympatric$panel <- as.character(prop_sympatric$common_name)

prop_sympatric$panel[
  prop_sympatric$common_name %in%
    c("Three-wattled Bellbird", "Unassigned")
] <- "Three-wattled Bellbird"


prop_sympatric$Vocalizing <- factor(
  prop_sympatric$Vocalizing,
  levels = c("Other\nbellbirds", "Focal\nbellbird")
)

# User settings

base_size <- 14
percent_text_size <- 4

loop_df <- data.frame(
  panel = c(
    "Three-wattled Bellbird",
    "Collared Aracari",
    "Montezuma Oropendola",
    "Keel-billed Toucan"
  ),
  label = c(
    "A",
    "B",
    "C",
    "D"
  ),
  y_max = c(
    1.4,
    0.1,
    0.022,
    0.27
  )
)


prop_sympatric$prop_label <- paste(
  round(prop_sympatric$Proportion * 100, 1),
  "%\n n = ",
 prop_sympatric$Freq
)


prop_sympatric$sp_label <- ""

prop_sympatric$sp_label[prop_sympatric$panel == "Three-wattled Bellbird"] <- as.character(prop_sympatric$common_name[prop_sympatric$panel == "Three-wattled Bellbird"])

prop_sympatric$sp_label <- gsub("Three-wattled Bellbird", "Three\nwattled", prop_sympatric$sp_label)

prop_sympatric$sp_label <- as.character(prop_sympatric$sp_label)

prop_sympatric$sp_label_y <- prop_sympatric$Proportion + 0.18


# panel function

make_panel <- function(panel_name, label, y_max){

  dat_sub <- subset(
    prop_sympatric, subset = 
        prop_sympatric$panel == panel_name
  )

  y_breaks <- c(
    0,
    y_max / 2,
    y_max
  )

  if(panel_name == "Montezuma Oropendola"){

    y_scale <- scale_y_continuous(
      limits = c(0, y_max),
      breaks = y_breaks,
      labels = function(x) sprintf("%.1f%%", x * 100)
    )

  } else {

    y_scale <- scale_y_continuous(
      limits = c(0, y_max),
      breaks = y_breaks,
      labels = scales::percent
    )

  }

    if(panel_name == "Three-wattled Bellbird"){

  fill_scale <- scale_fill_manual(
    name = "",
    values = c(
      "Unassigned" = "#DEF5E5B3",
      "Three-wattled Bellbird" = "#28192FB3"      # bellbird color
    ),
    breaks = "Sympatric"
  ) 
} else {

  fill_scale <- scale_fill_manual(
    name = "",
    values = c(bar_cols[3]),
  )
}
  
  ggplot(
    dat_sub,
    aes(
      x = Vocalizing,
      y = Proportion,
      fill = common_name
    )
  )  +
    geom_text(
      aes(
        group = interaction(Vocalizing, common_name),
        label = sp_label,
        y = sp_label_y
      ),
      position = position_dodge(width = 0.9),
      angle = 90,
      hjust = 0,
      vjust = 0.5,
      size = species_text_size
    ) +
    geom_bar(
      stat = "identity",
      position = position_dodge(width = 0.9)
    ) +
      
    geom_text(
          lineheight = 0.7,
      aes(
        label = dat_sub$prop_label
      ),
      position = position_dodge(width = 0.9),
      vjust = -0.1,
      size = percent_text_size
    ) +

    facet_wrap(
      ~ panel,
      scales = "free_x"
    ) +

    y_scale +

    fill_scale +

    labs(
      title = label,
      x = NULL
    ) +

    theme_classic(base_size = base_size) +

    theme(

      legend.position = "none",

      legend.background = element_rect(
        fill = "white",
        color = NA
      ),

      axis.title.y = element_blank(),

      strip.background = element_rect(
        fill = "white",
        color = "black"
      ),

      strip.text = element_text(
        size = base_size
      ),

      axis.text.y = element_text(
        size = base_size
      ),

      legend.text = element_text(
        size = base_size
      ),

      plot.title = element_text(
        face = "bold",
        hjust = 0
      ),

      axis.line = element_line(
        linewidth = 0.8
      ),

      axis.ticks = element_line(
        linewidth = 0.8
      )
    )
}


# Create panels

plot_list <- lapply(
  seq_len(nrow(loop_df)),
  function(i){

    make_panel(
      panel_name = loop_df$panel[i],
      label = loop_df$label[i],
      y_max = loop_df$y_max[i]
    )

  }
)


# Combine panels
combined_plot <- plot_grid(
  plotlist = plot_list,
  ncol = 2,
  align = "hv"
)

y_label <- ggdraw() +
  draw_label(
    "Percentage of vocalizations predicted as",
    angle = 90,
    size = base_size
  )


# Final plot
final_plot <- plot_grid(
  y_label,
  combined_plot,
  ncol = 2,
  rel_widths = c(0.06, 1)
)

if (!isTRUE(getOption('knitr.in.progress'))) {
ggsave(
  filename = "./scripts/focal_vs_other_bellbirds_sympatric_prediction.png",
  plot = final_plot,
  width = 10,
  height = 8,
  dpi = 300,
  device = grDevices::png
  ) 
    
final_plot    
}

Predictions from the sympatric species only model. Bars show the proportion of vocalizations from focal bellbirds and other bellbirds assigned by a bird-sound classifier to model species. (A) Vocalizations classified as three-wattled bellbirds (Procnias tricarunculatus) or left unassigned. (B–D) Vocalizations classified as aracaris, oropendolas, and toucans.

2.1.1 Statistical analysis

Code

sympatric$prediction_type <- ifelse(
  grepl("rocnias", sympatric$predicted_sp),
  0,
  1
)

sympatric$vocalizing_type <- ifelse(
  grepl("ocal", sympatric$Reference),
  "focal",
  "others"
)

# order levels
sympatric$vocalizing_type <- factor(
  sympatric$vocalizing_type,
  levels = c("others", "focal")
)

cores <- 4
chains <- 4
iter <- 4000

priors <- c(
  prior(normal(-3, 1), class = "Intercept"),
  prior(normal(0, 2), class = "b")
)

only_prior_fit2  <- brm(
  prediction_type ~ vocalizing_type + predicted_sp,
  data = sympatric[sympatric$predicted_sp != "nocall",],
  family = bernoulli(),
  prior = priors,
sample_prior = "only",
  chains = chains,
  iter = iter,
  cores = cores,
  file = "./data/processed/mimicry_simpatric_only_model_only_priors.RDS",
  file_refit = "on_change"
)


mod2 <- brm(
  prediction_type ~ vocalizing_type,
  data = sympatric[sympatric$predicted_sp != "nocall",],
  family = bernoulli(),
  prior = priors,
  chains = chains,
  iter = iter,
  cores = cores,
  control = list(
  adapt_delta = 0.999,
  max_treedepth = 15
),
  file = "./data/processed/mimicry_simpatric_only_model.RDS",
  file_refit = "on_change"
)

2.1.1.1 Prior check

Priors

Code

prior_summary(mod2)

prior	class	coef	source
normal(0, 2)	b		user
	b	vocalizing_typefocal	default
normal(-3, 1)	Intercept		user

Prior predictive checks

Code

custom_ppc(only_prior_fit2, group = "vocalizing_type", ndraws = 100)

2.1.2 Fit summary

Code

fill_color <- mako(10)[8]

extended_summary(
  mod2,
  n.posterior = 1000,
  fill = fill_color,
  trace.palette = mako,
  remove.intercepts = FALSE,
  highlight = TRUE,
  print.name = FALSE
)

	priors	formula	iterations	chains	thinning	warmup	diverg_transitions	rhats > 1.05	min_bulk_ESS	min_tail_ESS	seed
1	b-normal(0, 2) Intercept-normal(-3, 1)	prediction_type ~ vocalizing_type	4000	4	1	2000	0 (0%)	0	1528.37	1785.78	2052028757

	Estimate	l-95% CI	u-95% CI	Rhat	Bulk_ESS	Tail_ESS
b_Intercept	-5.140	-7.251	-3.595	1.002	1528.37	1785.78
b_vocalizing_typefocal	5.417	3.869	7.528	1.001	1551.31	1853.54

Posterior predictive check

Code

custom_ppc(mod1, group = "vocalizing_type", ndraws = 100)

Code

# Odds ratio: focal vs other bellbirds
post2 <- as_draws_df(mod2)

# Odds ratio: focal vs other bellbirds
focal_vs_others <- exp(post2$b_vocalizing_typefocal)

# Predicted probability of sympatric classification for focal birds
p_focal <- posterior_epred(
  mod2,
  newdata = data.frame(vocalizing_type = "focal")
)

p_focal <- as.numeric(p_focal)

# Predicted probability of sympatric classification for other bellbirds
p_others <- posterior_epred(
  mod2,
  newdata = data.frame(vocalizing_type = "others")
)

p_others <- as.numeric(p_others)

2.1.2.1 Summary

Focal bellbird vocalizations had 203.89 times greater odds [95% UI: 47.92–1859.37] of being classified as a sympatric model species compared with vocalizations from other bellbirds (predicted probability of sympatric classification for focal birds = 0.57; for other bellbirds = 0.01).

3 Mimicry through time

Code

mimic_data <- est[est$source == "mimic_bellbird", ]

rec_info <- read.table("./data/raw/Recording_dates_.csv", head = TRUE, sep = ";")

rec_info$date <- gsub("\\/", "-", rec_info$date)
rec_info$date <- gsub("out", "ago", rec_info$date)
rec_info$date <- gsub("set", "sep", rec_info$date)

rec_info$org.date <- rec_info$date
rec_info$date <- as.Date(rec_info$date, format = "%d-%b-%y")


mimic_data$orig.sound.files <- attr(mimic_data, "check.res")$orig.sound.files


mimic_data$date <- sapply(mimic_data$orig.sound.files, function(x) as.character(rec_info$date[rec_info$sound.files ==
    x][1]))

mimic_data$date <- as.Date(mimic_data$date)

sympatric_focal_unfiltered$fixed.sound.files <- gsub("Nueva-grabaci\xf3n", "Nueva-grabación", sympatric_focal_unfiltered$sound.files, useBytes = TRUE)

sympatric_focal_unfiltered$fixed.sound.files <- basename(gsub("\\\\", "/", sympatric_focal_unfiltered$fixed.sound.files, useBytes = TRUE))

mimic_data$sound.files <- gsub("Nueva-grabaci\xf3n", "Nueva-grabación", mimic_data$sound.files, useBytes = TRUE)


mimic_data$sound.files <- paste0(mimic_data$sound.files, ".wav")

mimic_data$pred_mimic <- sapply(mimic_data$sound.files, function(x) {
  sympatric_focal_unfiltered$`Common Name`[sympatric_focal_unfiltered$fixed.sound.files == x][1]
})

# mimic_data <- mimic_data[mimic_data$pred_mimic != "babbling",]

# mimic_data$pred_mimic <- ifelse(is.na(mimic_data$pred_mimic), "nocall", mimic_data$pred_mimic)

agg_pred <- aggregate(orig.sound.files ~ date + pred_mimic, mimic_data, length)


out <- lapply(unique(agg_pred$date), function(x) {

    X <- agg_pred[agg_pred$date == x, ]
    X$prop <- X$orig.sound.files/sum(X$orig.sound.files)

    if (length(unique(X$pred_mimic)) < length(unique(mimic_data$pred_mimic))) {

        new_df <- data.frame(date = X$date[1], pred_mimic = setdiff(c("Procnias tricarunculatus",
            "Pteroglossus torquatus", "Ramphastos sulfuratus", "Psarocolius montezuma", "nocall"), X$pred_mimic),
            orig.sound.files = 0, prop = 0)
  
        X <- rbind(new_df, X)
    }
    
      X$total <- sum(X$orig.sound.files)
    return(X)
})

prop_pred <- do.call(rbind, out)

prop_pred$date.fact <- as.factor(as.character(prop_pred$date))

count_date <- aggregate(orig.sound.files ~ date, prop_pred, sum)

prop_pred$pred_common_name <- common_names[as.character(prop_pred$pred_mimic)]

dat <- prop_pred[prop_pred$pred_mimic != "Psarocolius montezuma",]

dat$julian_day <- as.numeric(format(as.Date(dat$date), "%j"))


dat$pred_common_name <- common_names[as.character(dat$pred_mimic)]


dat$pred_common_name <- factor(dat$pred_common_name, levels = c( "Three-wattled Bellbird", "Collared Aracari",     "Keel-billed Toucan", "Unassigned"))

3.1 Statistical analysis

We modeled the probability of producing original sounds as a function of Julian day while allowing both intercepts and temporal slopes to vary among mimic categories (pred_mimic). Rather than fitting a full fixed interaction between date and mimic category, we used a hierarchical varying-slope structure to account for heterogeneity among mimic groups while enabling partial pooling across categories. This approach reduces overparameterization and yields more stable estimates, particularly for mimic groups with limited sample sizes, while still allowing different groups to exhibit distinct temporal trajectories.

Code

# weakly informative priors
priors <- c(
# Weakly informative logistic intercept
prior(normal(0, 2.5), class = "Intercept"),
# Temporal slope
prior(normal(0, 1), class = "b"),
# Random effect standard deviations
prior(exponential(1), class = "sd")
)

mod2 <- brm(
  orig.sound.files | trials(total) ~ 
    scale(julian_day) +
    (1 + scale(julian_day) | pred_common_name),
  family = binomial,
  data = dat,
  prior = priors,
  chains = 4,
  cores = 4,
  iter = 4000,
  control = list(adapt_delta = 0.95), file_refit = "on_change",
  file = "data/processed/brms_model_prop_pred_mimicry.rds"
)

# mod2 <- add_criterion(
#       mod2,
#       criterion = "loo")


only_prior_mod2 <- brm(
  orig.sound.files | trials(total) ~ 
    scale(julian_day) +
    (1 + scale(julian_day) | pred_common_name),
  family = binomial,
  data = dat,
  prior = priors,
  sample_prior = "only",
  chains = 4,
  cores = 4,
  iter = 4000,
  control = list(adapt_delta = 0.95), file_refit = "on_change",
  file = "data/processed/brms_model_prop_pred_mimicry_only_prior.rds"
)

3.1.1 Prior check

Priors

Code

prior_summary(mod2)

prior	class	coef	group	lb	source
normal(0, 1)	b				user
	b	scalejulian_day			default
normal(0, 2.5)	Intercept				user
lkj_corr_cholesky(1)	L				default
	L		pred_common_name		default
exponential(1)	sd			0	user
	sd		pred_common_name		default
	sd	Intercept	pred_common_name		default
	sd	scalejulian_day	pred_common_name		default

Prior predictive checks

Code

custom_ppc(only_prior_mod2, group = "vocalizing_type", ndraws = 100)

3.1.2 Fit summary

Code

fill_color <- mako(10)[8]

extended_summary(
  mod2,
  n.posterior = 1000,
  fill = fill_color,
  trace.palette = mako,
  remove.intercepts = FALSE,
  highlight = TRUE,
  print.name = FALSE
)

	priors	formula	iterations	chains	thinning	warmup	diverg_transitions	rhats > 1.05	min_bulk_ESS	min_tail_ESS	seed
1	b-normal(0, 1) Intercept-normal(0, 2.5) L-lkj_corr_cholesky(1) sd-exponential(1)	orig.sound.files \| trials(total) ~ scale(julian_day) + (1 + scale(julian_day) \| pred_common_name)	4000	4	1	2000	2 (0.00025%)	0	2720.74	3719.75	263265829

	Estimate	l-95% CI	u-95% CI	Rhat	Bulk_ESS	Tail_ESS
b_Intercept	-1.704	-4.106	0.708	1	2720.74	3719.75
b_scalejulian_day	-0.140	-1.097	0.816	1	3674.43	4492.18

Posterior predictive check

Code

custom_ppc(mod2, group = "vocalizing_type", ndraws = 100)

Code

# Extract posterior draws as base R data.frame
post <- as.data.frame(mod2)

# Get species levels
species_levels <- unique(mod2$data$pred_common_name)

# Global slope
beta_global <- post[, "b_scalejulian_day"]
# Find all random slope columns automatically
rand_cols <- grep("^r_pred_common_name\\[.*,scalejulian_day\\]$",
names(post),
value = TRUE)
# Extract species names from column names
species_levels <- sub("r_pred_common_name\\[(.*),scalejulian_day\\]", "\\1", rand_cols)


# Storage matrix
results <- matrix(NA, nrow = length(species_levels), ncol = 3)
colnames(results) <- c("mean_slope",
                       "l95",
                       "u95")

rownames(results) <- species_levels


# Loop through species
for (i in seq_along(rand_cols)) {

  slope_draws <- beta_global + post[, rand_cols[i]]

  results[i, "mean_slope"]    <- mean(slope_draws)
  results[i, "l95"]           <- quantile(slope_draws, 0.025)
  results[i, "u95"]           <- quantile(slope_draws, 0.975)
}

# Print results
print(round(results, 4), row.names = TRUE)

	mean_slope	l95	u95
Three-wattled.Bellbird	0.7808	0.6627	0.9048
Collared.Aracari	-0.1110	-0.2725	0.0511
Keel-billed.Toucan	-0.7156	-0.8308	-0.6031
Unassigned	-0.8273	-3.2819	1.3958

Code

# Create prediction grid
newdata <- expand.grid(
  julian_day = seq(min(dat$julian_day),
                   max(dat$julian_day),
                   length.out = 100),
  pred_common_name = unique(dat$pred_common_name)
)

# IMPORTANT: match scaling used in model
newdata$scalejulian_day <- scale(dat$julian_day)[1]  # dummy to get attributes
newdata$scalejulian_day <- (newdata$julian_day - mean(dat$julian_day)) /
                           sd(dat$julian_day)

newdata$total <- round(mean(dat$total), 0)  # use mean total for predictions

# Get posterior expected probabilities
epred <- posterior_epred(
  mod2,
  newdata = newdata,
  re_formula = NULL
)

# Posterior mean prediction
newdata$fit <- colMeans(epred)

# Convert julian day back to date for plotting
origin_year <- format(as.Date(dat$date[1]), "%Y")
newdata$date <- as.Date(newdata$julian_day - 1,
                        origin = paste0(origin_year, "-01-01"))

newdata$fit_prop <- newdata$fit / 100

# include only those for which CI did not include 0
results_df <- as.data.frame(results)
sig_spp <- rownames(results_df)[results_df$l95 * results_df$u95 > 0]

newdata <- newdata[newdata$pred_common_name %in% c("Three-wattled Bellbird", "Collared Aracari",     "Keel-billed Toucan", "Unassigned"), ]


# Plot
ggplot(
  dat[dat$prop > 0, ],
  aes(
    x = as.Date(date),
    y = prop,
    fill = pred_common_name,
    col = pred_common_name,
    size = orig.sound.files
  )
) +
  geom_point(pch = 21) +
  geom_line(
    data = newdata,
    aes(x = date, y = fit_prop, color = pred_common_name),
    size = 1.2
  ) +
  scale_fill_viridis_d(alpha = 0.6, end = 0.8) +
  scale_color_viridis_d(alpha = 0.8, end = 0.8) +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5)) +
  labs(
    x = "Date",
    y = "Percentage of vocalizations\npredicted as",
    fill = "Predicted species",
    color = "Predicted species",
    size = "Number of recorded\nvocalizations"
  ) +
 scale_y_continuous(
    labels = scales::percent)

3.1.3 Summary

The proportion of vocalizations of its own species increased true time while the proportion for Killed-billed Toucan and unassigned calls decreased
Proportions for Collared Araraci did not show a definitive pattern

Session information

─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.5.2 (2025-10-31)
 os       Ubuntu 22.04.4 LTS
 system   x86_64, linux-gnu
 ui       X11
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       America/Costa_Rica
 date     2026-06-11
 pandoc   3.6.3 @ /usr/lib/rstudio/resources/app/bin/quarto/bin/tools/x86_64/ (via rmarkdown)
 quarto   1.8.25 @ /usr/lib/rstudio/resources/app/bin/quarto/bin/quarto

─ Packages ───────────────────────────────────────────────────────────────────
 package        * version    date (UTC) lib source
 abind            1.4-8      2024-09-12 [1] CRAN (R 4.5.2)
 ape              5.8-1      2024-12-16 [1] CRAN (R 4.5.2)
 arrayhelpers     1.1-0      2020-02-04 [1] CRAN (R 4.5.2)
 backports        1.5.1      2026-04-03 [1] CRAN (R 4.5.2)
 bayesplot        1.15.0     2025-12-12 [1] CRAN (R 4.5.2)
 bitops           1.0-9      2024-10-03 [1] CRAN (R 4.5.2)
 bridgesampling   1.2-1      2025-11-19 [1] CRAN (R 4.5.2)
 brio             1.1.5      2024-04-24 [1] CRAN (R 4.5.2)
 brms           * 2.23.0     2025-09-09 [1] CRAN (R 4.5.2)
 brmsish        * 1.0.0      2026-03-03 [1] Github (maRce10/brmsish@81ab826)
 Brobdingnag      1.2-9      2022-10-19 [1] CRAN (R 4.5.2)
 cachem           1.1.0      2024-05-16 [1] CRAN (R 4.5.2)
 caret          * 7.0-1      2024-12-10 [1] CRAN (R 4.5.2)
 checkmate        2.3.4      2026-02-03 [1] CRAN (R 4.5.2)
 class            7.3-23     2025-01-01 [1] CRAN (R 4.5.2)
 cli              3.6.6      2026-04-09 [1] CRAN (R 4.5.2)
 coda             0.19-4.1   2024-01-31 [1] CRAN (R 4.5.2)
 codetools        0.2-20     2024-03-31 [1] CRAN (R 4.5.2)
 cowplot        * 1.2.0      2025-07-07 [1] CRAN (R 4.5.2)
 crayon           1.5.3      2024-06-20 [1] CRAN (R 4.5.2)
 curl             7.0.0      2025-08-19 [1] CRAN (R 4.5.2)
 data.table       1.18.2.1   2026-01-27 [1] CRAN (R 4.5.2)
 devtools         2.4.6      2025-10-03 [1] CRAN (R 4.5.2)
 digest           0.6.39     2025-11-19 [1] CRAN (R 4.5.2)
 distributional   0.6.0      2026-01-14 [1] CRAN (R 4.5.2)
 doParallel     * 1.0.17     2022-02-07 [1] CRAN (R 4.5.2)
 dplyr            1.2.1      2026-04-03 [1] CRAN (R 4.5.2)
 dtw              1.23-1     2022-09-19 [1] CRAN (R 4.5.2)
 e1071            1.7-17     2025-12-18 [1] CRAN (R 4.5.2)
 ellipsis         0.3.2      2021-04-29 [3] CRAN (R 4.1.1)
 evaluate         1.0.5      2025-08-27 [1] CRAN (R 4.5.2)
 farver           2.1.2      2024-05-13 [1] CRAN (R 4.5.2)
 fastmap          1.2.0      2024-05-15 [1] CRAN (R 4.5.2)
 fftw             1.0-9      2024-09-20 [1] CRAN (R 4.5.2)
 foreach        * 1.5.2      2022-02-02 [3] CRAN (R 4.1.2)
 fs               2.0.1      2026-03-24 [1] CRAN (R 4.5.2)
 future           1.70.0     2026-03-14 [1] CRAN (R 4.5.2)
 future.apply     1.20.2     2026-02-20 [1] CRAN (R 4.5.2)
 generics         0.1.4      2025-05-09 [1] CRAN (R 4.5.2)
 ggdist           3.3.3      2025-04-23 [1] CRAN (R 4.5.2)
 ggplot2        * 4.0.2      2026-02-03 [1] CRAN (R 4.5.2)
 globals          0.19.1     2026-03-13 [1] CRAN (R 4.5.2)
 glue             1.8.0      2024-09-30 [1] CRAN (R 4.5.2)
 gower            1.0.2      2024-12-17 [1] CRAN (R 4.5.2)
 gridExtra        2.3        2017-09-09 [3] CRAN (R 4.0.1)
 gtable           0.3.6      2024-10-25 [1] CRAN (R 4.5.2)
 hardhat          1.4.2      2025-08-20 [1] CRAN (R 4.5.2)
 htmltools        0.5.9      2025-12-04 [1] CRAN (R 4.5.2)
 htmlwidgets      1.6.4      2023-12-06 [1] CRAN (R 4.5.2)
 httr             1.4.8      2026-02-13 [1] CRAN (R 4.5.2)
 inline           0.3.21     2025-01-09 [1] CRAN (R 4.5.2)
 ipred            0.9-15     2024-07-18 [1] CRAN (R 4.5.2)
 iterators      * 1.0.14     2022-02-05 [3] CRAN (R 4.1.2)
 jsonlite         2.0.0      2025-03-27 [1] CRAN (R 4.5.2)
 kableExtra     * 1.4.0      2024-01-24 [1] CRAN (R 4.5.2)
 knitr          * 1.51       2025-12-20 [1] CRAN (R 4.5.2)
 labeling         0.4.3      2023-08-29 [1] CRAN (R 4.5.2)
 lattice        * 0.22-9     2026-02-09 [1] CRAN (R 4.5.2)
 lava             1.9.0      2026-04-05 [1] CRAN (R 4.5.2)
 lifecycle        1.0.5      2026-01-08 [1] CRAN (R 4.5.2)
 listenv          0.10.1     2026-03-10 [1] CRAN (R 4.5.2)
 loo              2.9.0      2025-12-23 [1] CRAN (R 4.5.2)
 lubridate        1.9.5      2026-02-04 [1] CRAN (R 4.5.2)
 magrittr         2.0.5      2026-04-04 [1] CRAN (R 4.5.2)
 MASS             7.3-65     2025-02-28 [1] CRAN (R 4.5.2)
 Matrix           1.7-4      2025-08-28 [1] CRAN (R 4.5.2)
 matrixStats      1.5.0      2025-01-07 [1] CRAN (R 4.5.2)
 memoise          2.0.1      2021-11-26 [3] CRAN (R 4.1.2)
 ModelMetrics     1.2.2.2    2020-03-17 [3] CRAN (R 4.0.1)
 mvtnorm          1.3-3      2025-01-10 [1] CRAN (R 4.5.2)
 NatureSounds     1.0.5      2025-01-17 [1] CRAN (R 4.5.2)
 nlme             3.1-168    2025-03-31 [1] CRAN (R 4.5.2)
 nnet             7.3-20     2025-01-01 [1] CRAN (R 4.5.2)
 otel             0.2.0      2025-08-29 [1] CRAN (R 4.5.2)
 packrat          0.9.3      2025-06-16 [1] CRAN (R 4.5.2)
 parallelly       1.46.1     2026-01-08 [1] CRAN (R 4.5.2)
 pbapply          1.7-4      2025-07-20 [1] CRAN (R 4.5.2)
 pillar           1.11.1     2025-09-17 [1] CRAN (R 4.5.2)
 pkgbuild         1.4.8      2025-05-26 [1] CRAN (R 4.5.2)
 pkgconfig        2.0.3      2019-09-22 [3] CRAN (R 4.0.1)
 pkgload          1.5.1      2026-04-01 [1] CRAN (R 4.5.2)
 plyr             1.8.9      2023-10-02 [1] CRAN (R 4.5.2)
 posterior        1.6.1      2025-02-27 [1] CRAN (R 4.5.2)
 pROC             1.19.0.1   2025-07-31 [1] CRAN (R 4.5.2)
 prodlim          2026.03.11 2026-03-11 [1] CRAN (R 4.5.2)
 proxy            0.4-29     2025-12-29 [1] CRAN (R 4.5.2)
 purrr            1.2.2      2026-04-10 [1] CRAN (R 4.5.2)
 QuickJSR         1.9.0      2026-01-25 [1] CRAN (R 4.5.2)
 R6               2.6.1      2025-02-15 [1] CRAN (R 4.5.2)
 RColorBrewer     1.1-3      2022-04-03 [1] CRAN (R 4.5.2)
 Rcpp           * 1.1.1      2026-01-10 [1] CRAN (R 4.5.2)
 RcppParallel     5.1.11-2   2026-03-05 [1] CRAN (R 4.5.2)
 RCurl            1.98-1.17  2025-03-22 [1] CRAN (R 4.5.2)
 recipes          1.3.1      2025-05-21 [1] CRAN (R 4.5.2)
 remotes          2.5.0      2024-03-17 [1] CRAN (R 4.5.2)
 reshape2         1.4.5      2025-11-12 [1] CRAN (R 4.5.2)
 rjson            0.2.23     2024-09-16 [1] CRAN (R 4.5.2)
 rlang            1.2.0      2026-04-06 [1] CRAN (R 4.5.2)
 rmarkdown        2.31       2026-03-26 [1] CRAN (R 4.5.2)
 rpart            4.1.24     2025-01-07 [1] CRAN (R 4.5.2)
 Rraven         * 1.0.16     2025-10-23 [1] CRAN (R 4.5.2)
 rstan            2.32.7     2025-03-10 [1] CRAN (R 4.5.2)
 rstantools       2.6.0      2026-01-10 [1] CRAN (R 4.5.2)
 rstudioapi       0.18.0     2026-01-16 [1] CRAN (R 4.5.2)
 Rtsne          * 0.17       2023-12-07 [1] CRAN (R 4.5.2)
 S7               0.2.1      2025-11-14 [1] CRAN (R 4.5.2)
 scales           1.4.0      2025-04-24 [1] CRAN (R 4.5.2)
 seewave          2.2.4      2025-08-19 [1] CRAN (R 4.5.2)
 sessioninfo      1.2.3      2025-02-05 [1] CRAN (R 4.5.2)
 signal           1.8-1      2024-06-26 [1] CRAN (R 4.5.2)
 sketchy          1.0.7      2026-03-03 [1] CRANs (R 4.5.2)
 StanHeaders      2.32.10    2024-07-15 [1] CRAN (R 4.5.2)
 stringi          1.8.7      2025-03-27 [1] CRAN (R 4.5.2)
 stringr          1.6.0      2025-11-04 [1] CRAN (R 4.5.2)
 survival         3.8-6      2026-01-16 [1] CRAN (R 4.5.2)
 svglite          2.2.2      2025-10-21 [1] CRAN (R 4.5.2)
 svUnit           1.0.8      2025-08-26 [1] CRAN (R 4.5.2)
 systemfonts      1.3.1      2025-10-01 [1] CRAN (R 4.5.2)
 tensorA          0.36.2.1   2023-12-13 [1] CRAN (R 4.5.2)
 testthat         3.3.2      2026-01-11 [1] CRAN (R 4.5.2)
 textshaping      1.0.4      2025-10-10 [1] CRAN (R 4.5.2)
 tibble           3.3.1      2026-01-11 [1] CRAN (R 4.5.2)
 tidybayes        3.0.7      2024-09-15 [1] CRAN (R 4.5.2)
 tidyr            1.3.2      2025-12-19 [1] CRAN (R 4.5.2)
 tidyselect       1.2.1      2024-03-11 [1] CRAN (R 4.5.2)
 timechange       0.4.0      2026-01-29 [1] CRAN (R 4.5.2)
 timeDate         4052.112   2026-01-28 [1] CRAN (R 4.5.2)
 tuneR            1.4.7      2024-04-17 [1] CRAN (R 4.5.2)
 usethis          3.2.1      2025-09-06 [1] CRAN (R 4.5.2)
 vctrs            0.7.3      2026-04-11 [1] CRAN (R 4.5.2)
 viridis        * 0.6.5      2024-01-29 [1] CRAN (R 4.5.2)
 viridisLite    * 0.4.3      2026-02-04 [1] CRAN (R 4.5.2)
 warbleR          1.1.37     2025-10-22 [1] CRAN (R 4.5.2)
 withr            3.0.2      2024-10-28 [1] CRAN (R 4.5.2)
 xaringanExtra    0.8.0      2024-05-19 [1] CRAN (R 4.5.2)
 xfun             0.57       2026-03-20 [1] CRAN (R 4.5.2)
 xml2             1.5.2      2026-01-17 [1] CRAN (R 4.5.2)
 yaml             2.3.12     2025-12-10 [1] CRAN (R 4.5.2)

 [1] /home/m/R/x86_64-pc-linux-gnu-library/4.5
 [2] /usr/local/lib/R/site-library
 [3] /usr/lib/R/site-library
 [4] /usr/lib/R/library
 * ── Packages attached to the search path.

──────────────────────────────────────────────────────────────────────────────