Code
# options to customize chunk outputs
knitr::opts_chunk$set(
message = FALSE
)Bellbird mimicry
# options to customize chunk outputs
knitr::opts_chunk$set(
message = FALSE
)flowchart LR A[Read birdNET output] --> B(Format data) B --> C(Graphs) C --> D(Statistical analysis) D --> E(Model summary) style A fill:#382A5433 style B fill:#395D9C33 style C fill:#3497A933 style D fill:#60CEAC33
# knitr is require for creating html/pdf/word reports
# formatR is used for soft-wrapping code
# install knitr package if not installed
if (!requireNamespace("sketchy", quietly = TRUE)) {
install.packages("sketchy")
}
packages <- c(
"knitr",
"Rtsne",
"caret",
"ggplot2",
"viridis",
"doParallel",
"brms",
"brmsish",
"cowplot",
"Rraven",
"kableExtra"
)
# install/ load packages
sketchy::load_packages(packages = packages)
options(knitr.kable.NA = '')
print <- function(x, row.names = FALSE) {
kb <- kable(x, row.names = row.names, digits = 4, "html")
kb <- kable_styling(kb,
bootstrap_options = c("striped", "hover", "condensed", "responsive"))
scroll_box(kb, width = "100%")
}
common_names <- c(
"Procnias tricarunculatus" = "Three-wattled Bellbird",
"Psarocolius guatimozinus" = "Chestnut-headed Oropendola",
"Psarocolius montezuma" = "Montezuma Oropendola",
"Pteroglossus erythropygius" = "Pale-mandibled Aracari",
"Pteroglossus torquatus" = "Collared Aracari",
"Ramphastos brevis" = "Choco Toucan",
"Ramphastos sulfuratus" = "Keel-billed Toucan",
"nocall" = "Unassigned"
)
# to create several posterior predictive check plots out of a brms fit
custom_ppc <- function(fit, group = NULL, ndraws = 30) {
by_group <- if (!is.null(group)) {
TRUE
} else
FALSE
if (by_group)
by_group <- if (any(names(fit$data) == group)) {
TRUE
} else
FALSE
if (by_group)
by_group <-
if (is.character(fit$data[, group]) |
is.factor(fit$data[, group])) {
TRUE
} else
FALSE
if (by_group) {
ppc_dens <- pp_check(fit,
ndraws = ndraws,
type = 'dens_overlay_grouped',
group = group)
pp_mean <- pp_check(
fit,
type = "stat_grouped",
stat = "mean",
group = group,
ndraws = ndraws
) + theme_classic()
} else {
ppc_dens <- pp_check(fit, ndraws = ndraws, type = 'dens_overlay')
pp_mean <- pp_check(fit,
type = "stat",
stat = "mean",
ndraws = ndraws) + theme_classic()
}
pp_plot_list <-
list(ppc_dens, pp_mean)
pp_plot_list <-
lapply(pp_plot_list, function(x)
x + scale_fill_viridis_d(
begin = 0.3,
end = 0.8,
alpha = 0.5,
option = "mako"
) + theme_classic())
ppc_plot <- plot_grid(plotlist = pp_plot_list, ncol = 2)
ppc_plot
}mod_eval_symp_allo <- read.csv("./data/processed/birdnet_models/sympatric_and_allopatric/sympatric_and_allopatric_evaluation.csv")
names(mod_eval_symp_allo) <- gsub("..", " ", names(mod_eval_symp_allo), fixed = TRUE)
print(mod_eval_symp_allo)| Class | Precision 0.5. | Recall 0.5. | F1.Score 0.5. | Precision opt. | Recall opt. | F1.Score opt. | AUPRC | AUROC | Optimal.Threshold | True.Positives | False.Positives | True.Negatives | False.Negatives | Samples | Percentage |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| OVERALL (Macro-avg) | 0.8908 | 0.9627 | 0.9232 | 0.9180 | 0.9504 | 0.9312 | 0.9630 | 0.9968 | |||||||
| Procnias tricarunculatus | 0.9712 | 0.9854 | 0.9783 | 1.0000 | 0.9708 | 0.9852 | 0.9990 | 0.9998 | 0.80 | 133 | 0 | 884 | 4 | 137 | 13.42 |
| Psarocolius guatimozinus | 0.8909 | 0.9608 | 0.9245 | 0.9423 | 0.9608 | 0.9515 | 0.9867 | 0.9992 | 0.55 | 49 | 3 | 967 | 2 | 51 | 5.00 |
| Psarocolius montezuma | 0.9014 | 0.9846 | 0.9412 | 0.9538 | 0.9538 | 0.9538 | 0.9873 | 0.9991 | 0.75 | 62 | 3 | 953 | 3 | 65 | 6.37 |
| Pteroglossus erythropygius | 0.8943 | 0.9486 | 0.9206 | 0.9336 | 0.9206 | 0.9271 | 0.9844 | 0.9959 | 0.70 | 197 | 14 | 793 | 17 | 214 | 20.96 |
| Pteroglossus torquatus | 0.6216 | 0.8679 | 0.7244 | 0.6216 | 0.8679 | 0.7244 | 0.7844 | 0.9842 | 0.50 | 46 | 28 | 940 | 7 | 53 | 5.19 |
| Ramphastos brevis | 0.9840 | 0.9946 | 0.9892 | 0.9840 | 0.9946 | 0.9892 | 0.9995 | 0.9999 | 0.50 | 184 | 3 | 833 | 1 | 185 | 18.12 |
| Ramphastos sulfuratus | 0.9722 | 0.9968 | 0.9844 | 0.9904 | 0.9842 | 0.9873 | 0.9994 | 0.9997 | 0.80 | 311 | 3 | 702 | 5 | 316 | 30.95 |
Keeping low confidence classifications (confidence < optimal threshold)
# read data
sympatric_allopatric_class <- imp_raven(
path = "./data/processed/birdnet_classification_outputs/sympatrics_and_allopactrics/sympatrics_and_allopatrics_model/",
all.data = TRUE,
warbler.format = TRUE,
pb = FALSE
)
# keep only test data
test_files <- list.files("/home/m/Dropbox/Projects/bellbird_mimicry/data/processed/birdnet_data/test_data/", full.names = FALSE, recursive = TRUE)
test_files <- basename(gsub("\\\\", "/", test_files))
sympatric_allopatric_class$sound.files <- basename(gsub("\\\\", "/", sympatric_allopatric_class$sound.files))
# all(test_files %in% sympatric_allopatric_class$sound.files)
#
# setdiff(test_files, sympatric_allopatric_class$sound.files)
sympatric_allopatric_test <- sympatric_allopatric_class[sympatric_allopatric_class$sound.files %in% test_files, ]
# filter low confidence classifications
sympatric_allopatric_test$true.positive <- sapply(seq_len(nrow(sympatric_allopatric_test)), function(x){
sympatric_allopatric_test$Confidence[x] >= mod_eval_symp_allo$Optimal.Threshold[mod_eval_symp_allo$Class == sympatric_allopatric_test$`Common Name`[x]]
})
sympatric_allopatric_test$`Common Name`[!sympatric_allopatric_test$true.positive] <- "nocall"
# rename column
sympatric_allopatric_test$predicted_sp <- sympatric_allopatric_test$`Common Name`
sympatric_allopatric_test$`Common Name` <- NULL
# read metadata
est <- readRDS(
"./data/processed/extended_selection_table_sympatric_and_allopatric_species.RDS"
)
# add metadata
sympatric_allopatric_test$sound.files <- basename(gsub("\\\\", "/", sympatric_allopatric_test$sound.files))
sympatric_allopatric_test$vocalizing_sp <- sapply(sympatric_allopatric_test$sound.files, function(x) {
est$source[paste0(est$sound.files, ".wav") == x][1]
})
# save confusion matrix
conf_mat <- confusionMatrix(
data = as.factor(sympatric_allopatric_test$predicted_sp),
reference = factor(sympatric_allopatric_test$vocalizing_sp, levels = levels(as.factor(sympatric_allopatric_test$predicted_sp))
)
)
conf_df_fp <- as.data.frame(conf_mat$table)
conf_df_fp$total <- sapply(conf_df_fp$Reference, function(x) {
sum(sympatric_allopatric_test$vocalizing_sp == x)
})
conf_df_fp$Proportion <- conf_df_fp$Freq / conf_df_fp$total
conf_df_fp$pred_common_name <- common_names[as.character(conf_df_fp$Prediction)]
conf_df_fp$ref_common_name <- common_names[as.character(conf_df_fp$Reference)]
# sort levels by genus
conf_df_fp$pred_common_name <- factor(
conf_df_fp$pred_common_name,
levels = c(
"Three-wattled Bellbird",
"Chestnut-headed Oropendola",
"Montezuma Oropendola",
"Collared Aracari",
"Pale-mandibled Aracari",
"Keel-billed Toucan",
"Choco Toucan",
"Unassigned"
)
)
conf_df_fp$ref_common_name <- factor(
conf_df_fp$ref_common_name,
levels = c(
"Three-wattled Bellbird",
"Chestnut-headed Oropendola",
"Montezuma Oropendola",
"Collared Aracari",
"Pale-mandibled Aracari",
"Keel-billed Toucan",
"Choco Toucan",
"Unassigned"
)
)
ggplot(conf_df_fp[conf_df_fp$ref_common_name != "Unassigned", ],
aes(x = ref_common_name, y = pred_common_name, fill = Proportion)) +
geom_tile() +
theme_bw() +
coord_equal() +
scale_fill_gradient2(low = mako(10)[3], high = mako(10)[7]) +
labs(x = "Reference species", y = "Predicted species") +
geom_text(aes(label = round(Proportion, 2)), color = "black") +
theme_classic() +
theme(axis.text.x = element_text(
color = "black",
size = 11,
angle = 30,
hjust = 1
))Excluding low confidence classifications (confidence >= optimal threshold)
sympatric_allopatric_test_tp <- sympatric_allopatric_test[sympatric_allopatric_test$true.positive, ]
# save confusion matrix
conf_mat <- confusionMatrix(
data = as.factor(sympatric_allopatric_test_tp$predicted_sp),
reference = factor(sympatric_allopatric_test_tp$vocalizing_sp, levels = levels(as.factor(sympatric_allopatric_test_tp$predicted_sp))
)
)
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <- sapply(conf_df$Reference, function(x) {
sum(sympatric_allopatric_test_tp$vocalizing_sp == x)
})
conf_df$Proportion <- conf_df$Freq / conf_df$total
conf_df$pred_common_name <- common_names[as.character(conf_df$Prediction)]
conf_df$ref_common_name <- common_names[as.character(conf_df$Reference)]
# sort levels by genus
conf_df$pred_common_name <- factor(
conf_df$pred_common_name,
levels = c(
"Three-wattled Bellbird",
"Chestnut-headed Oropendola",
"Montezuma Oropendola",
"Collared Aracari",
"Pale-mandibled Aracari",
"Keel-billed Toucan",
"Choco Toucan",
"Unassigned"
)
)
conf_df$ref_common_name <- factor(
conf_df$ref_common_name,
levels = c(
"Three-wattled Bellbird",
"Chestnut-headed Oropendola",
"Montezuma Oropendola",
"Collared Aracari",
"Pale-mandibled Aracari",
"Keel-billed Toucan",
"Choco Toucan",
"Unassigned"
)
)
ggplot(conf_df, aes(x = ref_common_name, y = pred_common_name, fill = Proportion)) +
geom_tile() +
theme_bw() +
coord_equal() +
scale_fill_gradient2(low = mako(10)[3], high = mako(10)[7]) +
labs(x = "Reference species", y = "Predicted species") +
geom_text(aes(label = round(Proportion, 2)), color = "black") +
theme_classic() +
theme(
axis.text.x = element_text(
color = "black",
size = 11,
angle = 30,
hjust = 1
)
)embs <- read.csv("./data/processed/embeddings/embeddings_sympatric_allopatric_spp.csv")
# Split into separate columns
emb_matrix <- do.call(rbind, strsplit(embs$embedding, ","))
colnames(emb_matrix) <- paste0("emb", 1:ncol(emb_matrix))
emb_matrix <- apply(emb_matrix, 2, as.numeric)
# Combine with the first 3 columns
embs <- as.data.frame(cbind(embs[, 1:3], emb_matrix))
# add metadata
embs$file_path <- gsub("Nueva-grabaci\xf3n", "Nueva-grabación", embs$file_path, useBytes = TRUE)
embs$sound.files <- basename(gsub("\\\\", "/", embs$file_path, useBytes = TRUE))
embs$vocalizing_sp <- sapply(embs$sound.files, function(x) {
est$source[paste0(est$sound.files, ".wav") == x][1]
})
set.seed(123)
tsne_out <- Rtsne(as.matrix(embs[, grep("emb", names(embs))]))
tsne_all <- data.frame(vocalizing_sp = embs$vocalizing_sp, tsne1 = tsne_out$Y[,1], tsne2 = tsne_out$Y[,2])
saveRDS(tsne_all, "./data/processed/tsne_dims_sympatric_and_allopatric_species.rds")# colors and shapes by species for scatterplots
cols <- c(adjustcolor("tomato", alpha.f = 0.4), mako(4, alpha = 0.4, begin = 0.2, end = 0.8))
cols2 <- c(adjustcolor("tomato", alpha.f = 0.9), mako(4, alpha = 0.9, begin = 0.2, end = 0.8))
cols <- c(cols[1:2], rep(cols[3:5], each = 2))
cols2 <- c(cols2[1:2], rep(cols2[3:5], each = 2))
shapes <- c(16, 16, 16, 1, 17, 2, 15, 0)
names(shapes) <- names(cols) <- names(cols2) <- c(
"mimic_bellbird",
"Three-wattled Bellbird",
"Montezuma Oropendola",
"Chestnut-headed Oropendola",
"Collared Aracari",
"Pale-mandibled Aracari",
"Keel-billed Toucan",
"Choco Toucan"
)
tsne_all <- readRDS("./data/processed/tsne_dims_sympatric_and_allopatric_species.rds")
tsne_only_model_species <- tsne_all[tsne_all$vocalizing_sp != "mimic_bellbird", ]
tsne_only_model_species$common_name <- common_names[as.character(tsne_only_model_species$vocalizing_sp)]
tsne_only_model_species$common_name <- factor(
tsne_only_model_species$common_name,
levels = c(
"Three-wattled Bellbird",
"Chestnut-headed Oropendola",
"Montezuma Oropendola",
"Collared Aracari",
"Pale-mandibled Aracari",
"Keel-billed Toucan",
"Choco Toucan"
)
)
ggplot(tsne_only_model_species, aes(x = tsne1, y = tsne2, color = common_name, shape = common_name)) +
geom_point(size = 2) +
theme_minimal() +
labs(x = "t-SNE 1", y = "t-SNE 2", color = "", shape = "") +
scale_color_manual(values = cols[names(cols) %in% unique(tsne_only_model_species$common_name)]) +
scale_shape_manual(values = shapes[names(shapes) %in% unique(tsne_only_model_species$common_name)]) +
theme_classic()# read data
mimic_class <- imp_raven(
path = "./data/processed/birdnet_classification_outputs/mimic_bellbird/sympatrics_and_allopatrics_model/",
all.data = TRUE,
warbler.format = TRUE,
pb = FALSE
)
# filter low confidence classifications
mimic_class$true.positive <- sapply(seq_len(nrow(mimic_class)), function(x){
mimic_class$Confidence[x] >= mod_eval_symp_allo$Optimal.Threshold[mod_eval_symp_allo$Class == mimic_class$`Common Name`[x]]
})
mimic_class$`Common Name`[!mimic_class$true.positive] <- "nocall"
# add metadata
mimic_class$sound.files <- basename(gsub(
"\\\\",
"/",
mimic_class$sound.files
))
# rename column
mimic_class$predicted_sp <- mimic_class$`Common Name`
mimic_class$vocalizing_sp <- "Procnias tricarunculatus"
mimic_class$`Common Name` <- NULL
prop_mimic_class <- table(mimic_class$predicted_sp)
prop_mimic_class <- data.frame(prop_mimic_class)
names(prop_mimic_class) <- c("Prediction", "Freq")
prop_mimic_class$total <- sum(prop_mimic_class$Freq)
prop_mimic_class$Proportion <- prop_mimic_class$Freq / prop_mimic_class$total
prop_mimic_class$Reference <- "Focal\nbellbird"
others <- conf_df_fp[conf_df_fp$Reference == "Procnias tricarunculatus", ]
others$Reference <- "Other\nbellbirds"
prop_mimic_psarocolius <- prop_mimic_class[prop_mimic_class$Prediction == "Psarocolius montezuma",]
prop_mimic_psarocolius$Freq <- 0
prop_mimic_psarocolius$Proportion <- 0
prop_mimic_psarocolius$Prediction <- "Psarocolius guatimozinus"
# proportions observed in adult bellbird
prop_class <- rbind(
prop_mimic_class,
prop_mimic_psarocolius,
others[, c("Prediction", "Freq", "total", "Proportion", "Reference")]
)
prop_class$genus <- sapply(
strsplit(as.character(prop_class$Prediction), " "),
"[[",
1
)
prop_class$type <- ifelse(
grepl("guati|eryth|brevis", prop_class$Prediction),
"Allopatric",
"Sympatric"
)
prop_class$type[prop_class$genus == "Procnias"] <- "Own\nspecies"
prop_class$type <- factor(
prop_class$type,
levels = c("Allopatric", "Sympatric", "Own\nspecies")
)
prop_class$genus <- factor(
prop_class$genus,
levels = c("Procnias", "Pteroglossus", "Psarocolius", "Ramphastos")
)
prop_class$Reference <- factor(
prop_class$Reference,
levels = c("Other\nbellbirds", "Focal\nbellbird")
)
genus_labels <- c(
"Procnias" = "Three-wattled bellbird",
"Psarocolius" = "Oropendolas",
"Pteroglossus" = "Aracaris",
"Ramphastos" = "Toucans"
)
prop_class$common_name <- common_names[as.character(prop_class$Prediction)]
prop_class$common_name <- gsub("-", "\n", prop_class$common_name)
prop_class$sp_label <- sapply(
strsplit(as.character(prop_class$common_name), " "),
"[[",
1
)
prop_class$genus[prop_class$sp_label == "Unassigned"] <- "Procnias"
prop_class$sp_label[prop_class$sp_label == "Three\nwattled" & prop_class$sp_label == "Unassigned"] <- ""
prop_class$sp_label_y <- ave(
prop_class$Proportion,
prop_class$genus,
FUN = function(x) {
x + log10(max(x) + 0.999) * 0.44
}
)# settings
base_size <- 14
percent_text_size <- 4
species_text_size <- 4
# GENERA TO PLOT
loop_df <- data.frame(
gen = c(
"Procnias",
"Pteroglossus",
"Psarocolius",
"Ramphastos"
),
label = c("A", "B", "C", "D"),
y_max = c(
1.3,
0.057,
0.004,
0.16
)
)
prop_class <- prop_class[order(prop_class$genus, prop_class$Reference),]
prop_class$sp_label_y[prop_class$genus == "Procnias"] <- c(
0.4, 1.03, 0.9, 0.4
) - 0.05
prop_class$sp_label_y[prop_class$genus == "Pteroglossus"] <- c(
0.01, 0.01, 0.019, 0.047
) - 0.003
prop_class$sp_label_y[prop_class$genus == "Psarocolius"] <- c(
0.0004, 0.0014, 0.00255, 0.0004
) + 0.0001
prop_class$sp_label_y[prop_class$genus == "Ramphastos"] <- c(
0.026, 0.02, 0.02, 0.13
) + 0.006
bar_cols <- c("gray", mako(10, alpha = 0.6, begin = 0.2, end = 0.8)[c(2, 8)])
prop_class$prop_label <- paste(
round(prop_class$Proportion * 100, 1),
"%\n n = ",
prop_class$Freq
)
# PANEL FUNCTION
make_panel <- function(gen, label, y_max){
# subset data
dat_sub <- subset(prop_class, genus == gen)
if(gen == "Procnias"){
dat_sub$type <- factor(
dat_sub$type,
levels = c(
"Own\nspecies",
"Allopatric",
"Sympatric"
)
)
}
# exactly 3 tick marks
y_breaks <- c(
0,
y_max / 2,
y_max
)
if(gen == "Procnias"){
fill_scale <- scale_fill_manual(
name = "",
values = c(
"Own\nspecies" = "#28192FB3",
"Allopatric" = "#28192FB3", # not used
"Sympatric" = "#DEF5E5B3" # bellbird color
),
breaks = "Sympatric"
)
} else {
fill_scale <- scale_fill_manual(
name = "",
values = c(
"Own\nspecies" = bar_cols[1],
"Allopatric" = bar_cols[2],
"Sympatric" = bar_cols[3]
),
breaks = c(
"Allopatric",
"Sympatric"
)
)
}
# y-axis formatting
if(gen == "Psarocolius"){
y_scale <- scale_y_continuous(
limits = c(0, y_max),
breaks = y_breaks,
labels = function(x) sprintf("%.1f%%", x * 100)
)
} else {
y_scale <- scale_y_continuous(
limits = c(0, y_max),
breaks = y_breaks,
labels = scales::percent
)
}
ggplot(dat_sub, aes(x = Reference, y = Proportion)) +
geom_bar(
aes(
group = interaction(Reference, type),
fill = type
),
position = position_dodge(),
stat = "identity"
) +
# percentages
geom_text(
lineheight = 0.7,
aes(
group = interaction(Reference, type),
label = dat_sub$prop_label,
),
position = position_dodge(width = 0.9),
vjust = -0.1,
size = percent_text_size
) +
# species labels
geom_text(
aes(
group = interaction(Reference, type),
label = sp_label,
y = sp_label_y
),
position = position_dodge(width = 0.9),
angle = 90,
hjust = 0,
vjust = 0.5,
size = species_text_size
) +
facet_wrap(
~ genus,
labeller = labeller(genus = genus_labels)
) +
y_scale +
fill_scale +
labs(
title = label,
x = NULL
) +
theme_classic(base_size = base_size) +
theme(
# legend only in Pteroglossus panel
legend.position = if(gen == "Pteroglossus"){
c(0.3, 0.8)
} else {
"none"
},
legend.background = element_rect(
fill = "white",
color = NA
),
axis.title.y = element_blank(),
panel.grid.major.x = element_blank(),
strip.background = element_rect(
fill = "white",
color = "black"
),
strip.text = element_text(
size = base_size
),
axis.text.x = element_text(
size = base_size
),
axis.text.y = element_text(
size = base_size
),
legend.text = element_text(
size = base_size
),
plot.title = element_text(
face = "bold",
hjust = 0
),
axis.line = element_line(
linewidth = 0.8
),
axis.ticks = element_line(
linewidth = 0.8
)
)
}
# CREATE PANELS
plot_list <- lapply(
seq_len(nrow(loop_df)),
function(i){
make_panel(
gen = loop_df$gen[i],
label = loop_df$label[i],
y_max = loop_df$y_max[i]
)
}
)
# COMBINE PANELS
combined_plot <- plot_grid(
plotlist = plot_list,
ncol = 2,
align = "hv"
)
# ADD SHARED Y-AXIS TITLE
# SHARED Y-AXIS LABEL
y_label <- ggdraw() +
draw_label(
"Percentage of vocalizations predicted as",
angle = 90,
size = base_size + 2
)
# FINAL PLOT
final_plot <- plot_grid(
y_label,
combined_plot,
ncol = 2,
rel_widths = c(0.06, 1)
)
if (!isTRUE(getOption('knitr.in.progress'))) {
ggsave(
filename = "./scripts/focal_vs_other_bellbirds_prediction.png",
plot = final_plot,
width = 10,
height = 8,
dpi = 300,
device = grDevices::png
)
final_plot
}Evaluate whether the proportion of vocalizations predicted as sympatric (model) species vs allopatric species is significantly higher in the focal bellbird than in other bellbirds
mimic_other_bellbirds <- sympatric_allopatric_test[sympatric_allopatric_test$predicted_sp != "Procnias tricarunculatus", ]
mimic_focal <- mimic_class[mimic_class$predicted_sp != "Procnias tricarunculatus", ]
mimic_focal$vocalizing_type <- "focal"
mimic_other_bellbirds$vocalizing_type <- "others"
mimic_bellbirds <- rbind(
mimic_focal[, c("predicted_sp", "vocalizing_type")],
mimic_other_bellbirds[, c("predicted_sp", "vocalizing_type")]
)
mimic_bellbirds$vocalizing_type <- factor(
mimic_bellbirds$vocalizing_type,
levels = c("others", "focal")
)
mimic_bellbirds$mimic_type <-
ifelse(
grepl("guati|eryth|brevis", mimic_bellbirds$predicted_sp),
0,
1
)
mimic_bellbirds$genus <- sapply(
strsplit(as.character(mimic_bellbirds$predicted_sp), " "),
"[[",
1
)
cores <- 4
chains <- 4
iter <- 4000
priors <- c(
prior(normal(0, 1.5), class = "Intercept"),
prior(normal(0, 0.7), class = "b"),
prior(exponential(1), class = "sd")
)
only_prior_fit <- brm(
mimic_type ~ vocalizing_type + (1 | genus),
data = mimic_bellbirds[mimic_bellbirds$predicted_sp != "nocall",],
family = bernoulli(),
prior = priors,
sample_prior = "only",
chains = chains,
iter = iter,
cores = cores,
file = "./data/processed/mimicry_prob_by_simpatry_model_only_priors.RDS",
file_refit = "on_change"
)
mod1 <- brm(
mimic_type ~ vocalizing_type + (1 | genus),
data = mimic_bellbirds[mimic_bellbirds$predicted_sp != "nocall",],
family = bernoulli(),
cores = cores,
prior = priors,
chains = chains,
iter = iter,
file = "./data/processed/mimicry_prob_by_simpatry_model.RDS",
file_refit = "on_change"
)Priors
prior_summary(mod1)| prior | class | coef | group | resp | dpar | nlpar | lb | ub | tag | source |
|---|---|---|---|---|---|---|---|---|---|---|
| normal(0, 0.7) | b | user | ||||||||
| b | vocalizing_typefocal | default | ||||||||
| normal(0, 1.5) | Intercept | user | ||||||||
| exponential(1) | sd | 0 | user | |||||||
| sd | genus | default | ||||||||
| sd | Intercept | genus | default |
Prior predictive checks
custom_ppc(only_prior_fit, group = "vocalizing_type", ndraws = 100)fill_color <- mako(10)[8]
extended_summary(
mod1,
n.posterior = 1000,
fill = fill_color,
trace.palette = mako,
remove.intercepts = FALSE,
highlight = TRUE,
print.name = FALSE
)| priors | formula | iterations | chains | thinning | warmup | diverg_transitions | rhats > 1.05 | min_bulk_ESS | min_tail_ESS | seed | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | b-normal(0, 0.7) Intercept-normal(0, 1.5) sd-exponential(1) | mimic_type ~ vocalizing_type + (1 | genus) | 4000 | 4 | 1 | 2000 | 2 (0.00025%) | 0 | 2382.72 | 3132.15 | 637637388 |
| Estimate | l-95% CI | u-95% CI | Rhat | Bulk_ESS | Tail_ESS | |
|---|---|---|---|---|---|---|
| b_Intercept | -0.713 | -2.137 | 0.502 | 1.001 | 2382.72 | 3132.15 |
| b_vocalizing_typefocal | 3.153 | 2.781 | 3.528 | 1 | 4738.91 | 4324.30 |
Posterior predictive check
custom_ppc(mod1, group = "vocalizing_type", ndraws = 100)post <- as_draws_df(mod1)
# Odds ratio: focal vs other birds
focal_vs_others <- exp(post$b_vocalizing_typefocal)
# Predicted probability of sympatric mimicry in focal birds
p_focal <- posterior_epred(
mod1,
newdata = data.frame(
vocalizing_type = "focal",
genus = unique(mimic_bellbirds$genus)[1]
),
re_formula = NA
)
p_focal <- as.numeric(p_focal)
# Predicted probability of sympatric mimicry in other birds
p_others <- posterior_epred(
mod1,
newdata = data.frame(
vocalizing_type = "others",
genus = unique(mimic_bellbirds$genus)[1]
),
re_formula = NA
)
p_others <- as.numeric(p_others)
# Within-group sympatric vs allopatric odds
focal_ratio <- p_focal / (1 - p_focal)
other_ratio <- p_others / (1 - p_others)Focal bellbird vocalizations had 23.43 times greater odds [95% UI: 16.14–34.04] of being classified as the sympatric rather than the allopatric model species compared with vocalizations from other bellbirds (predicted probability of sympatric classification for focal birds = 0.92; for other bellbirds = 0.34).
Within focal bellbird vocalizations, the odds of classification as the sympatric model species were 11.84 times greater [95% UI: 2.77–40.39] than the odds of classification as closely related allopatric species (predicted probability of sympatric classification for focal birds = 0.92; predicted probability of allopatric classification for focal birds = 0.08).
For vocalizations from other bellbirds, the odds of classification as sympatric and allopatric species were similar (odds = 0.51 [95% UI: 0.12–1.65]; predicted probability of sympatric classification for other bellbirds = 0.34; predicted probability of allopatric classification for other bellbirds = 0.66).
mod_eval_symp <- read.csv("./data/processed/birdnet_models/sympatric/sympatric_evaluation.csv")
names(mod_eval_symp) <- gsub("..", " ", names(mod_eval_symp), fixed = TRUE)
print(mod_eval_symp)| Class | Precision 0.5. | Recall 0.5. | F1.Score 0.5. | Precision opt. | Recall opt. | F1.Score opt. | AUPRC | AUROC | Optimal.Threshold | True.Positives | False.Positives | True.Negatives | False.Negatives | Samples | Percentage |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| OVERALL (Macro-avg) | 0.9884 | 0.9908 | 0.9896 | 0.9939 | 0.9890 | 0.9914 | 0.9994 | 0.9998 | |||||||
| Procnias tricarunculatus | 0.9783 | 0.9854 | 0.9818 | 1.0000 | 0.9781 | 0.9889 | 0.9994 | 0.9998 | 0.60 | 134 | 0 | 434 | 3 | 137 | 23.99 |
| Psarocolius montezuma | 0.9848 | 1.0000 | 0.9924 | 0.9848 | 1.0000 | 0.9924 | 0.9985 | 0.9998 | 0.15 | 65 | 1 | 505 | 0 | 65 | 11.38 |
| Pteroglossus torquatus | 1.0000 | 0.9811 | 0.9905 | 1.0000 | 0.9811 | 0.9905 | 0.9997 | 1.0000 | 0.15 | 52 | 0 | 518 | 1 | 53 | 9.28 |
| Ramphastos sulfuratus | 0.9906 | 0.9968 | 0.9937 | 0.9906 | 0.9968 | 0.9937 | 0.9998 | 0.9998 | 0.50 | 315 | 3 | 252 | 1 | 316 | 55.34 |
Keeping low confidence classifications (confidence < optimal threshold)
# read data
sympatric_class <- imp_raven(
path = "./data/processed/birdnet_classification_outputs/sympatrics/sympatrics_model/",
all.data = TRUE,
warbler.format = TRUE,
pb = FALSE
)
# keep only test data
test_files <- list.files("/home/m/Dropbox/Projects/bellbird_mimicry/data/processed/birdnet_data/test_data/", full.names = FALSE, recursive = TRUE)
test_files <- basename(gsub("\\\\", "/", test_files))
sympatric_class$sound.files <- basename(gsub("\\\\", "/", sympatric_class$sound.files))
# all(sympatric_class$sound.files %in% test_files)
# setdiff(test_files, sympatric_class$sound.files)
# setdiff(sympatric_class$sound.files, test_files)
sympatric_class_test <- sympatric_class[sympatric_class$sound.files %in% test_files, ]
sympatric_class_test$true.positive <- sapply(seq_len(nrow(sympatric_class_test)), function(x){
sympatric_class_test$Confidence[x] >= mod_eval_symp$Optimal.Threshold[mod_eval_symp$Class == sympatric_class_test$`Common Name`[x]]
})
sympatric_class_test$`Common Name`[!sympatric_class_test$true.positive] <- "nocall"
# rename column
sympatric_class_test$predicted_sp <- sympatric_class_test$`Common Name`
sympatric_class_test$`Common Name` <- NULL
# read metadata
est <- readRDS(
"./data/processed/extended_selection_table_sympatric_and_allopatric_species.RDS"
)
# add metadata
sympatric_class_test$sound.files <- basename(gsub("\\\\", "/", sympatric_class_test$sound.files))
sympatric_class_test$vocalizing_sp <- sapply(sympatric_class_test$sound.files, function(x) {
est$source[paste0(est$sound.files, ".wav") == x][1]
})
sympatric_class_test <- sympatric_class_test[sympatric_class_test$vocalizing_sp %in% unique(sympatric_class_test$predicted_sp),]
# save confusion matrix
conf_mat <- confusionMatrix(
data = as.factor(sympatric_class_test$predicted_sp),
reference = factor(sympatric_class_test$vocalizing_sp, levels = levels(as.factor(sympatric_class_test$predicted_sp)))
)
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <- sapply(conf_df$Reference, function(x) {
sum(sympatric_class_test$vocalizing_sp == x)
})
conf_df$Proportion <- conf_df$Freq / conf_df$total
conf_df$pred_common_name <- common_names[as.character(conf_df$Prediction)]
conf_df$ref_common_name <- common_names[as.character(conf_df$Reference)]
# sort levels by genus
conf_df$pred_common_name <- factor(
conf_df$pred_common_name,
levels = c(
"Three-wattled Bellbird",
"Montezuma Oropendola",
"Collared Aracari",
"Keel-billed Toucan",
"Unassigned"
)
)
conf_df$ref_common_name <- factor(
conf_df$ref_common_name,
levels = c(
"Three-wattled Bellbird",
"Montezuma Oropendola",
"Collared Aracari",
"Keel-billed Toucan",
"Unassigned"
)
)
ggplot(conf_df[conf_df$ref_common_name != "Unassigned",], aes(x = ref_common_name, y = pred_common_name, fill = Proportion)) +
geom_tile() +
theme_bw() +
coord_equal() +
scale_fill_gradient2(low = mako(10)[3], high = mako(10)[7]) +
labs(x = "Reference species", y = "Predicted species") +
geom_text(aes(label = round(Proportion, 2)), color = "black") +
theme_classic() +
theme(
axis.text.x = element_text(
color = "black",
size = 11,
angle = 30,
hjust = 1
)
)Excluding low confidence classifications (confidence >= optimal threshold)
# save confusion matrix
conf_mat <- confusionMatrix(
data = as.factor(sympatric_class_test$predicted_sp[sympatric_class_test$true.positive]),
reference = factor(sympatric_class_test$vocalizing_sp[sympatric_class_test$true.positive], levels = levels(as.factor(sympatric_class_test$predicted_sp[sympatric_class_test$true.positive])))
)
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <- sapply(conf_df$Reference, function(x) {
sum(sympatric_class_test$vocalizing_sp[sympatric_class_test$true.positive] == x)
})
conf_df$Proportion <- conf_df$Freq / conf_df$total
conf_df$pred_common_name <- common_names[as.character(conf_df$Prediction)]
conf_df$ref_common_name <- common_names[as.character(conf_df$Reference)]
# sort levels by genus
conf_df$pred_common_name <- factor(
conf_df$pred_common_name,
levels = c(
"Three-wattled Bellbird",
"Montezuma Oropendola",
"Collared Aracari",
"Keel-billed Toucan"
)
)
conf_df$ref_common_name <- factor(
conf_df$ref_common_name,
levels = c(
"Three-wattled Bellbird",
"Montezuma Oropendola",
"Collared Aracari",
"Keel-billed Toucan"
)
)
ggplot(conf_df, aes(x = ref_common_name, y = pred_common_name, fill = Proportion)) +
geom_tile() +
theme_bw() +
coord_equal() +
scale_fill_gradient2(low = mako(10)[3], high = mako(10)[7]) +
labs(x = "Reference species", y = "Predicted species") +
geom_text(aes(label = round(Proportion, 2)), color = "black") +
theme_classic() +
theme(
axis.text.x = element_text(
color = "black",
size = 11,
angle = 30,
hjust = 1
)
)# read data
sympatric_focal <- imp_raven(
path = "./data/processed/birdnet_classification_outputs/mimic_bellbird/sympatric_model/",
all.data = TRUE,
warbler.format = TRUE,
pb = FALSE
)
# filter out low confidence calls
sympatric_focal$true.positive <- sapply(seq_len(nrow(sympatric_focal)), function(x){
if (sympatric_focal$`Common Name`[x] == "nocall")
return(FALSE)
sympatric_focal$Confidence[x] >= mod_eval_symp$Optimal.Threshold[mod_eval_symp$Class == sympatric_focal$`Common Name`[x]]
})
sympatric_focal$Reference <- "Focal\nbellbird"
sympatric_focal_unfiltered <- sympatric_focal
sympatric_focal$`Common Name`[!sympatric_focal$true.positive] <- "nocall"
# rename column
sympatric_focal$predicted_sp <- sympatric_focal$`Common Name`
sympatric_focal$`Common Name` <- NULL
# get other bellbirds from previous data
sympatric_others <- sympatric_class_test[sympatric_class_test$vocalizing_sp == "Procnias tricarunculatus", ]
sympatric_others$vocalizing_sp <- NULL
sympatric_others$Reference <- "Other\nbellbirds"
sympatric <- rbind(
sympatric_focal,
sympatric_others
)
prop_sympatric <- table(sympatric$predicted_sp, sympatric$Reference)
prop_sympatric <- data.frame(prop_sympatric)
names(prop_sympatric) <- c("Prediction", "Vocalizing", "Freq")
# add Psarocolius
# prop_sympatric2 <- prop_sympatric[3:4, ]
# prop_sympatric2$Prediction <- "Psarocolius montezuma"
# prop_sympatric2$Freq <- 0
# prop_sympatric <- rbind(prop_sympatric, prop_sympatric2)
prop_sympatric$total <- sapply(prop_sympatric$Vocalizing, function(x) sum(prop_sympatric$Freq[prop_sympatric$Vocalizing == x]))
prop_sympatric$Proportion <- prop_sympatric$Freq / prop_sympatric$total
prop_sympatric$common_name <- common_names[as.character(
prop_sympatric$Prediction
)]
# order levels
prop_sympatric$common_name <- factor(
prop_sympatric$common_name,
levels = c(
"Three-wattled Bellbird",
"Collared Aracari",
"Keel-billed Toucan",
"Montezuma Oropendola",
"Unassigned"
)
)# prepare data
prop_sympatric$panel <- as.character(prop_sympatric$common_name)
prop_sympatric$panel[
prop_sympatric$common_name %in%
c("Three-wattled Bellbird", "Unassigned")
] <- "Three-wattled Bellbird"
prop_sympatric$Vocalizing <- factor(
prop_sympatric$Vocalizing,
levels = c("Other\nbellbirds", "Focal\nbellbird")
)
# User settings
base_size <- 14
percent_text_size <- 4
loop_df <- data.frame(
panel = c(
"Three-wattled Bellbird",
"Collared Aracari",
"Montezuma Oropendola",
"Keel-billed Toucan"
),
label = c(
"A",
"B",
"C",
"D"
),
y_max = c(
1.4,
0.1,
0.022,
0.27
)
)
prop_sympatric$prop_label <- paste(
round(prop_sympatric$Proportion * 100, 1),
"%\n n = ",
prop_sympatric$Freq
)
prop_sympatric$sp_label <- ""
prop_sympatric$sp_label[prop_sympatric$panel == "Three-wattled Bellbird"] <- as.character(prop_sympatric$common_name[prop_sympatric$panel == "Three-wattled Bellbird"])
prop_sympatric$sp_label <- gsub("Three-wattled Bellbird", "Three\nwattled", prop_sympatric$sp_label)
prop_sympatric$sp_label <- as.character(prop_sympatric$sp_label)
prop_sympatric$sp_label_y <- prop_sympatric$Proportion + 0.18
# panel function
make_panel <- function(panel_name, label, y_max){
dat_sub <- subset(
prop_sympatric, subset =
prop_sympatric$panel == panel_name
)
y_breaks <- c(
0,
y_max / 2,
y_max
)
if(panel_name == "Montezuma Oropendola"){
y_scale <- scale_y_continuous(
limits = c(0, y_max),
breaks = y_breaks,
labels = function(x) sprintf("%.1f%%", x * 100)
)
} else {
y_scale <- scale_y_continuous(
limits = c(0, y_max),
breaks = y_breaks,
labels = scales::percent
)
}
if(panel_name == "Three-wattled Bellbird"){
fill_scale <- scale_fill_manual(
name = "",
values = c(
"Unassigned" = "#DEF5E5B3",
"Three-wattled Bellbird" = "#28192FB3" # bellbird color
),
breaks = "Sympatric"
)
} else {
fill_scale <- scale_fill_manual(
name = "",
values = c(bar_cols[3]),
)
}
ggplot(
dat_sub,
aes(
x = Vocalizing,
y = Proportion,
fill = common_name
)
) +
geom_text(
aes(
group = interaction(Vocalizing, common_name),
label = sp_label,
y = sp_label_y
),
position = position_dodge(width = 0.9),
angle = 90,
hjust = 0,
vjust = 0.5,
size = species_text_size
) +
geom_bar(
stat = "identity",
position = position_dodge(width = 0.9)
) +
geom_text(
lineheight = 0.7,
aes(
label = dat_sub$prop_label
),
position = position_dodge(width = 0.9),
vjust = -0.1,
size = percent_text_size
) +
facet_wrap(
~ panel,
scales = "free_x"
) +
y_scale +
fill_scale +
labs(
title = label,
x = NULL
) +
theme_classic(base_size = base_size) +
theme(
legend.position = "none",
legend.background = element_rect(
fill = "white",
color = NA
),
axis.title.y = element_blank(),
strip.background = element_rect(
fill = "white",
color = "black"
),
strip.text = element_text(
size = base_size
),
axis.text.y = element_text(
size = base_size
),
legend.text = element_text(
size = base_size
),
plot.title = element_text(
face = "bold",
hjust = 0
),
axis.line = element_line(
linewidth = 0.8
),
axis.ticks = element_line(
linewidth = 0.8
)
)
}
# Create panels
plot_list <- lapply(
seq_len(nrow(loop_df)),
function(i){
make_panel(
panel_name = loop_df$panel[i],
label = loop_df$label[i],
y_max = loop_df$y_max[i]
)
}
)
# Combine panels
combined_plot <- plot_grid(
plotlist = plot_list,
ncol = 2,
align = "hv"
)
y_label <- ggdraw() +
draw_label(
"Percentage of vocalizations predicted as",
angle = 90,
size = base_size
)
# Final plot
final_plot <- plot_grid(
y_label,
combined_plot,
ncol = 2,
rel_widths = c(0.06, 1)
)
if (!isTRUE(getOption('knitr.in.progress'))) {
ggsave(
filename = "./scripts/focal_vs_other_bellbirds_sympatric_prediction.png",
plot = final_plot,
width = 10,
height = 8,
dpi = 300,
device = grDevices::png
)
final_plot
} sympatric$prediction_type <- ifelse(
grepl("rocnias", sympatric$predicted_sp),
0,
1
)
sympatric$vocalizing_type <- ifelse(
grepl("ocal", sympatric$Reference),
"focal",
"others"
)
# order levels
sympatric$vocalizing_type <- factor(
sympatric$vocalizing_type,
levels = c("others", "focal")
)
cores <- 4
chains <- 4
iter <- 4000
priors <- c(
prior(normal(-3, 1), class = "Intercept"),
prior(normal(0, 2), class = "b")
)
only_prior_fit2 <- brm(
prediction_type ~ vocalizing_type + predicted_sp,
data = sympatric[sympatric$predicted_sp != "nocall",],
family = bernoulli(),
prior = priors,
sample_prior = "only",
chains = chains,
iter = iter,
cores = cores,
file = "./data/processed/mimicry_simpatric_only_model_only_priors.RDS",
file_refit = "on_change"
)
mod2 <- brm(
prediction_type ~ vocalizing_type,
data = sympatric[sympatric$predicted_sp != "nocall",],
family = bernoulli(),
prior = priors,
chains = chains,
iter = iter,
cores = cores,
control = list(
adapt_delta = 0.999,
max_treedepth = 15
),
file = "./data/processed/mimicry_simpatric_only_model.RDS",
file_refit = "on_change"
)Priors
prior_summary(mod2)| prior | class | coef | group | resp | dpar | nlpar | lb | ub | tag | source |
|---|---|---|---|---|---|---|---|---|---|---|
| normal(0, 2) | b | user | ||||||||
| b | vocalizing_typefocal | default | ||||||||
| normal(-3, 1) | Intercept | user |
Prior predictive checks
custom_ppc(only_prior_fit2, group = "vocalizing_type", ndraws = 100)fill_color <- mako(10)[8]
extended_summary(
mod2,
n.posterior = 1000,
fill = fill_color,
trace.palette = mako,
remove.intercepts = FALSE,
highlight = TRUE,
print.name = FALSE
)| priors | formula | iterations | chains | thinning | warmup | diverg_transitions | rhats > 1.05 | min_bulk_ESS | min_tail_ESS | seed | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | b-normal(0, 2) Intercept-normal(-3, 1) | prediction_type ~ vocalizing_type | 4000 | 4 | 1 | 2000 | 0 (0%) | 0 | 1528.37 | 1785.78 | 2052028757 |
| Estimate | l-95% CI | u-95% CI | Rhat | Bulk_ESS | Tail_ESS | |
|---|---|---|---|---|---|---|
| b_Intercept | -5.140 | -7.251 | -3.595 | 1.002 | 1528.37 | 1785.78 |
| b_vocalizing_typefocal | 5.417 | 3.869 | 7.528 | 1.001 | 1551.31 | 1853.54 |
Posterior predictive check
custom_ppc(mod1, group = "vocalizing_type", ndraws = 100)# Odds ratio: focal vs other bellbirds
post2 <- as_draws_df(mod2)
# Odds ratio: focal vs other bellbirds
focal_vs_others <- exp(post2$b_vocalizing_typefocal)
# Predicted probability of sympatric classification for focal birds
p_focal <- posterior_epred(
mod2,
newdata = data.frame(vocalizing_type = "focal")
)
p_focal <- as.numeric(p_focal)
# Predicted probability of sympatric classification for other bellbirds
p_others <- posterior_epred(
mod2,
newdata = data.frame(vocalizing_type = "others")
)
p_others <- as.numeric(p_others)mimic_data <- est[est$source == "mimic_bellbird", ]
rec_info <- read.table("./data/raw/Recording_dates_.csv", head = TRUE, sep = ";")
rec_info$date <- gsub("\\/", "-", rec_info$date)
rec_info$date <- gsub("out", "ago", rec_info$date)
rec_info$date <- gsub("set", "sep", rec_info$date)
rec_info$org.date <- rec_info$date
rec_info$date <- as.Date(rec_info$date, format = "%d-%b-%y")
mimic_data$orig.sound.files <- attr(mimic_data, "check.res")$orig.sound.files
mimic_data$date <- sapply(mimic_data$orig.sound.files, function(x) as.character(rec_info$date[rec_info$sound.files ==
x][1]))
mimic_data$date <- as.Date(mimic_data$date)
sympatric_focal_unfiltered$fixed.sound.files <- gsub("Nueva-grabaci\xf3n", "Nueva-grabación", sympatric_focal_unfiltered$sound.files, useBytes = TRUE)
sympatric_focal_unfiltered$fixed.sound.files <- basename(gsub("\\\\", "/", sympatric_focal_unfiltered$fixed.sound.files, useBytes = TRUE))
mimic_data$sound.files <- gsub("Nueva-grabaci\xf3n", "Nueva-grabación", mimic_data$sound.files, useBytes = TRUE)
mimic_data$sound.files <- paste0(mimic_data$sound.files, ".wav")
mimic_data$pred_mimic <- sapply(mimic_data$sound.files, function(x) {
sympatric_focal_unfiltered$`Common Name`[sympatric_focal_unfiltered$fixed.sound.files == x][1]
})
# mimic_data <- mimic_data[mimic_data$pred_mimic != "babbling",]
# mimic_data$pred_mimic <- ifelse(is.na(mimic_data$pred_mimic), "nocall", mimic_data$pred_mimic)
agg_pred <- aggregate(orig.sound.files ~ date + pred_mimic, mimic_data, length)
out <- lapply(unique(agg_pred$date), function(x) {
X <- agg_pred[agg_pred$date == x, ]
X$prop <- X$orig.sound.files/sum(X$orig.sound.files)
if (length(unique(X$pred_mimic)) < length(unique(mimic_data$pred_mimic))) {
new_df <- data.frame(date = X$date[1], pred_mimic = setdiff(c("Procnias tricarunculatus",
"Pteroglossus torquatus", "Ramphastos sulfuratus", "Psarocolius montezuma", "nocall"), X$pred_mimic),
orig.sound.files = 0, prop = 0)
X <- rbind(new_df, X)
}
X$total <- sum(X$orig.sound.files)
return(X)
})
prop_pred <- do.call(rbind, out)
prop_pred$date.fact <- as.factor(as.character(prop_pred$date))
count_date <- aggregate(orig.sound.files ~ date, prop_pred, sum)
prop_pred$pred_common_name <- common_names[as.character(prop_pred$pred_mimic)]
dat <- prop_pred[prop_pred$pred_mimic != "Psarocolius montezuma",]
dat$julian_day <- as.numeric(format(as.Date(dat$date), "%j"))
dat$pred_common_name <- common_names[as.character(dat$pred_mimic)]
dat$pred_common_name <- factor(dat$pred_common_name, levels = c( "Three-wattled Bellbird", "Collared Aracari", "Keel-billed Toucan", "Unassigned"))We modeled the probability of producing original sounds as a function of Julian day while allowing both intercepts and temporal slopes to vary among mimic categories (pred_mimic). Rather than fitting a full fixed interaction between date and mimic category, we used a hierarchical varying-slope structure to account for heterogeneity among mimic groups while enabling partial pooling across categories. This approach reduces overparameterization and yields more stable estimates, particularly for mimic groups with limited sample sizes, while still allowing different groups to exhibit distinct temporal trajectories.
# weakly informative priors
priors <- c(
# Weakly informative logistic intercept
prior(normal(0, 2.5), class = "Intercept"),
# Temporal slope
prior(normal(0, 1), class = "b"),
# Random effect standard deviations
prior(exponential(1), class = "sd")
)
mod2 <- brm(
orig.sound.files | trials(total) ~
scale(julian_day) +
(1 + scale(julian_day) | pred_common_name),
family = binomial,
data = dat,
prior = priors,
chains = 4,
cores = 4,
iter = 4000,
control = list(adapt_delta = 0.95), file_refit = "on_change",
file = "data/processed/brms_model_prop_pred_mimicry.rds"
)
# mod2 <- add_criterion(
# mod2,
# criterion = "loo")
only_prior_mod2 <- brm(
orig.sound.files | trials(total) ~
scale(julian_day) +
(1 + scale(julian_day) | pred_common_name),
family = binomial,
data = dat,
prior = priors,
sample_prior = "only",
chains = 4,
cores = 4,
iter = 4000,
control = list(adapt_delta = 0.95), file_refit = "on_change",
file = "data/processed/brms_model_prop_pred_mimicry_only_prior.rds"
)Priors
prior_summary(mod2)| prior | class | coef | group | resp | dpar | nlpar | lb | ub | tag | source |
|---|---|---|---|---|---|---|---|---|---|---|
| normal(0, 1) | b | user | ||||||||
| b | scalejulian_day | default | ||||||||
| normal(0, 2.5) | Intercept | user | ||||||||
| lkj_corr_cholesky(1) | L | default | ||||||||
| L | pred_common_name | default | ||||||||
| exponential(1) | sd | 0 | user | |||||||
| sd | pred_common_name | default | ||||||||
| sd | Intercept | pred_common_name | default | |||||||
| sd | scalejulian_day | pred_common_name | default |
Prior predictive checks
custom_ppc(only_prior_mod2, group = "vocalizing_type", ndraws = 100)fill_color <- mako(10)[8]
extended_summary(
mod2,
n.posterior = 1000,
fill = fill_color,
trace.palette = mako,
remove.intercepts = FALSE,
highlight = TRUE,
print.name = FALSE
)| priors | formula | iterations | chains | thinning | warmup | diverg_transitions | rhats > 1.05 | min_bulk_ESS | min_tail_ESS | seed | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | b-normal(0, 1) Intercept-normal(0, 2.5) L-lkj_corr_cholesky(1) sd-exponential(1) | orig.sound.files | trials(total) ~ scale(julian_day) + (1 + scale(julian_day) | pred_common_name) | 4000 | 4 | 1 | 2000 | 2 (0.00025%) | 0 | 2720.74 | 3719.75 | 263265829 |
| Estimate | l-95% CI | u-95% CI | Rhat | Bulk_ESS | Tail_ESS | |
|---|---|---|---|---|---|---|
| b_Intercept | -1.704 | -4.106 | 0.708 | 1 | 2720.74 | 3719.75 |
| b_scalejulian_day | -0.140 | -1.097 | 0.816 | 1 | 3674.43 | 4492.18 |
Posterior predictive check
custom_ppc(mod2, group = "vocalizing_type", ndraws = 100)# Extract posterior draws as base R data.frame
post <- as.data.frame(mod2)
# Get species levels
species_levels <- unique(mod2$data$pred_common_name)
# Global slope
beta_global <- post[, "b_scalejulian_day"]
# Find all random slope columns automatically
rand_cols <- grep("^r_pred_common_name\\[.*,scalejulian_day\\]$",
names(post),
value = TRUE)
# Extract species names from column names
species_levels <- sub("r_pred_common_name\\[(.*),scalejulian_day\\]", "\\1", rand_cols)
# Storage matrix
results <- matrix(NA, nrow = length(species_levels), ncol = 3)
colnames(results) <- c("mean_slope",
"l95",
"u95")
rownames(results) <- species_levels
# Loop through species
for (i in seq_along(rand_cols)) {
slope_draws <- beta_global + post[, rand_cols[i]]
results[i, "mean_slope"] <- mean(slope_draws)
results[i, "l95"] <- quantile(slope_draws, 0.025)
results[i, "u95"] <- quantile(slope_draws, 0.975)
}
# Print results
print(round(results, 4), row.names = TRUE)| mean_slope | l95 | u95 | |
|---|---|---|---|
| Three-wattled.Bellbird | 0.7808 | 0.6627 | 0.9048 |
| Collared.Aracari | -0.1110 | -0.2725 | 0.0511 |
| Keel-billed.Toucan | -0.7156 | -0.8308 | -0.6031 |
| Unassigned | -0.8273 | -3.2819 | 1.3958 |
# Create prediction grid
newdata <- expand.grid(
julian_day = seq(min(dat$julian_day),
max(dat$julian_day),
length.out = 100),
pred_common_name = unique(dat$pred_common_name)
)
# IMPORTANT: match scaling used in model
newdata$scalejulian_day <- scale(dat$julian_day)[1] # dummy to get attributes
newdata$scalejulian_day <- (newdata$julian_day - mean(dat$julian_day)) /
sd(dat$julian_day)
newdata$total <- round(mean(dat$total), 0) # use mean total for predictions
# Get posterior expected probabilities
epred <- posterior_epred(
mod2,
newdata = newdata,
re_formula = NULL
)
# Posterior mean prediction
newdata$fit <- colMeans(epred)
# Convert julian day back to date for plotting
origin_year <- format(as.Date(dat$date[1]), "%Y")
newdata$date <- as.Date(newdata$julian_day - 1,
origin = paste0(origin_year, "-01-01"))
newdata$fit_prop <- newdata$fit / 100
# include only those for which CI did not include 0
results_df <- as.data.frame(results)
sig_spp <- rownames(results_df)[results_df$l95 * results_df$u95 > 0]
newdata <- newdata[newdata$pred_common_name %in% c("Three-wattled Bellbird", "Collared Aracari", "Keel-billed Toucan", "Unassigned"), ]
# Plot
ggplot(
dat[dat$prop > 0, ],
aes(
x = as.Date(date),
y = prop,
fill = pred_common_name,
col = pred_common_name,
size = orig.sound.files
)
) +
geom_point(pch = 21) +
geom_line(
data = newdata,
aes(x = date, y = fit_prop, color = pred_common_name),
size = 1.2
) +
scale_fill_viridis_d(alpha = 0.6, end = 0.8) +
scale_color_viridis_d(alpha = 0.8, end = 0.8) +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5)) +
labs(
x = "Date",
y = "Percentage of vocalizations\npredicted as",
fill = "Predicted species",
color = "Predicted species",
size = "Number of recorded\nvocalizations"
) +
scale_y_continuous(
labels = scales::percent)─ Session info ───────────────────────────────────────────────────────────────
setting value
version R version 4.5.2 (2025-10-31)
os Ubuntu 22.04.4 LTS
system x86_64, linux-gnu
ui X11
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz America/Costa_Rica
date 2026-06-11
pandoc 3.6.3 @ /usr/lib/rstudio/resources/app/bin/quarto/bin/tools/x86_64/ (via rmarkdown)
quarto 1.8.25 @ /usr/lib/rstudio/resources/app/bin/quarto/bin/quarto
─ Packages ───────────────────────────────────────────────────────────────────
package * version date (UTC) lib source
abind 1.4-8 2024-09-12 [1] CRAN (R 4.5.2)
ape 5.8-1 2024-12-16 [1] CRAN (R 4.5.2)
arrayhelpers 1.1-0 2020-02-04 [1] CRAN (R 4.5.2)
backports 1.5.1 2026-04-03 [1] CRAN (R 4.5.2)
bayesplot 1.15.0 2025-12-12 [1] CRAN (R 4.5.2)
bitops 1.0-9 2024-10-03 [1] CRAN (R 4.5.2)
bridgesampling 1.2-1 2025-11-19 [1] CRAN (R 4.5.2)
brio 1.1.5 2024-04-24 [1] CRAN (R 4.5.2)
brms * 2.23.0 2025-09-09 [1] CRAN (R 4.5.2)
brmsish * 1.0.0 2026-03-03 [1] Github (maRce10/brmsish@81ab826)
Brobdingnag 1.2-9 2022-10-19 [1] CRAN (R 4.5.2)
cachem 1.1.0 2024-05-16 [1] CRAN (R 4.5.2)
caret * 7.0-1 2024-12-10 [1] CRAN (R 4.5.2)
checkmate 2.3.4 2026-02-03 [1] CRAN (R 4.5.2)
class 7.3-23 2025-01-01 [1] CRAN (R 4.5.2)
cli 3.6.6 2026-04-09 [1] CRAN (R 4.5.2)
coda 0.19-4.1 2024-01-31 [1] CRAN (R 4.5.2)
codetools 0.2-20 2024-03-31 [1] CRAN (R 4.5.2)
cowplot * 1.2.0 2025-07-07 [1] CRAN (R 4.5.2)
crayon 1.5.3 2024-06-20 [1] CRAN (R 4.5.2)
curl 7.0.0 2025-08-19 [1] CRAN (R 4.5.2)
data.table 1.18.2.1 2026-01-27 [1] CRAN (R 4.5.2)
devtools 2.4.6 2025-10-03 [1] CRAN (R 4.5.2)
digest 0.6.39 2025-11-19 [1] CRAN (R 4.5.2)
distributional 0.6.0 2026-01-14 [1] CRAN (R 4.5.2)
doParallel * 1.0.17 2022-02-07 [1] CRAN (R 4.5.2)
dplyr 1.2.1 2026-04-03 [1] CRAN (R 4.5.2)
dtw 1.23-1 2022-09-19 [1] CRAN (R 4.5.2)
e1071 1.7-17 2025-12-18 [1] CRAN (R 4.5.2)
ellipsis 0.3.2 2021-04-29 [3] CRAN (R 4.1.1)
evaluate 1.0.5 2025-08-27 [1] CRAN (R 4.5.2)
farver 2.1.2 2024-05-13 [1] CRAN (R 4.5.2)
fastmap 1.2.0 2024-05-15 [1] CRAN (R 4.5.2)
fftw 1.0-9 2024-09-20 [1] CRAN (R 4.5.2)
foreach * 1.5.2 2022-02-02 [3] CRAN (R 4.1.2)
fs 2.0.1 2026-03-24 [1] CRAN (R 4.5.2)
future 1.70.0 2026-03-14 [1] CRAN (R 4.5.2)
future.apply 1.20.2 2026-02-20 [1] CRAN (R 4.5.2)
generics 0.1.4 2025-05-09 [1] CRAN (R 4.5.2)
ggdist 3.3.3 2025-04-23 [1] CRAN (R 4.5.2)
ggplot2 * 4.0.2 2026-02-03 [1] CRAN (R 4.5.2)
globals 0.19.1 2026-03-13 [1] CRAN (R 4.5.2)
glue 1.8.0 2024-09-30 [1] CRAN (R 4.5.2)
gower 1.0.2 2024-12-17 [1] CRAN (R 4.5.2)
gridExtra 2.3 2017-09-09 [3] CRAN (R 4.0.1)
gtable 0.3.6 2024-10-25 [1] CRAN (R 4.5.2)
hardhat 1.4.2 2025-08-20 [1] CRAN (R 4.5.2)
htmltools 0.5.9 2025-12-04 [1] CRAN (R 4.5.2)
htmlwidgets 1.6.4 2023-12-06 [1] CRAN (R 4.5.2)
httr 1.4.8 2026-02-13 [1] CRAN (R 4.5.2)
inline 0.3.21 2025-01-09 [1] CRAN (R 4.5.2)
ipred 0.9-15 2024-07-18 [1] CRAN (R 4.5.2)
iterators * 1.0.14 2022-02-05 [3] CRAN (R 4.1.2)
jsonlite 2.0.0 2025-03-27 [1] CRAN (R 4.5.2)
kableExtra * 1.4.0 2024-01-24 [1] CRAN (R 4.5.2)
knitr * 1.51 2025-12-20 [1] CRAN (R 4.5.2)
labeling 0.4.3 2023-08-29 [1] CRAN (R 4.5.2)
lattice * 0.22-9 2026-02-09 [1] CRAN (R 4.5.2)
lava 1.9.0 2026-04-05 [1] CRAN (R 4.5.2)
lifecycle 1.0.5 2026-01-08 [1] CRAN (R 4.5.2)
listenv 0.10.1 2026-03-10 [1] CRAN (R 4.5.2)
loo 2.9.0 2025-12-23 [1] CRAN (R 4.5.2)
lubridate 1.9.5 2026-02-04 [1] CRAN (R 4.5.2)
magrittr 2.0.5 2026-04-04 [1] CRAN (R 4.5.2)
MASS 7.3-65 2025-02-28 [1] CRAN (R 4.5.2)
Matrix 1.7-4 2025-08-28 [1] CRAN (R 4.5.2)
matrixStats 1.5.0 2025-01-07 [1] CRAN (R 4.5.2)
memoise 2.0.1 2021-11-26 [3] CRAN (R 4.1.2)
ModelMetrics 1.2.2.2 2020-03-17 [3] CRAN (R 4.0.1)
mvtnorm 1.3-3 2025-01-10 [1] CRAN (R 4.5.2)
NatureSounds 1.0.5 2025-01-17 [1] CRAN (R 4.5.2)
nlme 3.1-168 2025-03-31 [1] CRAN (R 4.5.2)
nnet 7.3-20 2025-01-01 [1] CRAN (R 4.5.2)
otel 0.2.0 2025-08-29 [1] CRAN (R 4.5.2)
packrat 0.9.3 2025-06-16 [1] CRAN (R 4.5.2)
parallelly 1.46.1 2026-01-08 [1] CRAN (R 4.5.2)
pbapply 1.7-4 2025-07-20 [1] CRAN (R 4.5.2)
pillar 1.11.1 2025-09-17 [1] CRAN (R 4.5.2)
pkgbuild 1.4.8 2025-05-26 [1] CRAN (R 4.5.2)
pkgconfig 2.0.3 2019-09-22 [3] CRAN (R 4.0.1)
pkgload 1.5.1 2026-04-01 [1] CRAN (R 4.5.2)
plyr 1.8.9 2023-10-02 [1] CRAN (R 4.5.2)
posterior 1.6.1 2025-02-27 [1] CRAN (R 4.5.2)
pROC 1.19.0.1 2025-07-31 [1] CRAN (R 4.5.2)
prodlim 2026.03.11 2026-03-11 [1] CRAN (R 4.5.2)
proxy 0.4-29 2025-12-29 [1] CRAN (R 4.5.2)
purrr 1.2.2 2026-04-10 [1] CRAN (R 4.5.2)
QuickJSR 1.9.0 2026-01-25 [1] CRAN (R 4.5.2)
R6 2.6.1 2025-02-15 [1] CRAN (R 4.5.2)
RColorBrewer 1.1-3 2022-04-03 [1] CRAN (R 4.5.2)
Rcpp * 1.1.1 2026-01-10 [1] CRAN (R 4.5.2)
RcppParallel 5.1.11-2 2026-03-05 [1] CRAN (R 4.5.2)
RCurl 1.98-1.17 2025-03-22 [1] CRAN (R 4.5.2)
recipes 1.3.1 2025-05-21 [1] CRAN (R 4.5.2)
remotes 2.5.0 2024-03-17 [1] CRAN (R 4.5.2)
reshape2 1.4.5 2025-11-12 [1] CRAN (R 4.5.2)
rjson 0.2.23 2024-09-16 [1] CRAN (R 4.5.2)
rlang 1.2.0 2026-04-06 [1] CRAN (R 4.5.2)
rmarkdown 2.31 2026-03-26 [1] CRAN (R 4.5.2)
rpart 4.1.24 2025-01-07 [1] CRAN (R 4.5.2)
Rraven * 1.0.16 2025-10-23 [1] CRAN (R 4.5.2)
rstan 2.32.7 2025-03-10 [1] CRAN (R 4.5.2)
rstantools 2.6.0 2026-01-10 [1] CRAN (R 4.5.2)
rstudioapi 0.18.0 2026-01-16 [1] CRAN (R 4.5.2)
Rtsne * 0.17 2023-12-07 [1] CRAN (R 4.5.2)
S7 0.2.1 2025-11-14 [1] CRAN (R 4.5.2)
scales 1.4.0 2025-04-24 [1] CRAN (R 4.5.2)
seewave 2.2.4 2025-08-19 [1] CRAN (R 4.5.2)
sessioninfo 1.2.3 2025-02-05 [1] CRAN (R 4.5.2)
signal 1.8-1 2024-06-26 [1] CRAN (R 4.5.2)
sketchy 1.0.7 2026-03-03 [1] CRANs (R 4.5.2)
StanHeaders 2.32.10 2024-07-15 [1] CRAN (R 4.5.2)
stringi 1.8.7 2025-03-27 [1] CRAN (R 4.5.2)
stringr 1.6.0 2025-11-04 [1] CRAN (R 4.5.2)
survival 3.8-6 2026-01-16 [1] CRAN (R 4.5.2)
svglite 2.2.2 2025-10-21 [1] CRAN (R 4.5.2)
svUnit 1.0.8 2025-08-26 [1] CRAN (R 4.5.2)
systemfonts 1.3.1 2025-10-01 [1] CRAN (R 4.5.2)
tensorA 0.36.2.1 2023-12-13 [1] CRAN (R 4.5.2)
testthat 3.3.2 2026-01-11 [1] CRAN (R 4.5.2)
textshaping 1.0.4 2025-10-10 [1] CRAN (R 4.5.2)
tibble 3.3.1 2026-01-11 [1] CRAN (R 4.5.2)
tidybayes 3.0.7 2024-09-15 [1] CRAN (R 4.5.2)
tidyr 1.3.2 2025-12-19 [1] CRAN (R 4.5.2)
tidyselect 1.2.1 2024-03-11 [1] CRAN (R 4.5.2)
timechange 0.4.0 2026-01-29 [1] CRAN (R 4.5.2)
timeDate 4052.112 2026-01-28 [1] CRAN (R 4.5.2)
tuneR 1.4.7 2024-04-17 [1] CRAN (R 4.5.2)
usethis 3.2.1 2025-09-06 [1] CRAN (R 4.5.2)
vctrs 0.7.3 2026-04-11 [1] CRAN (R 4.5.2)
viridis * 0.6.5 2024-01-29 [1] CRAN (R 4.5.2)
viridisLite * 0.4.3 2026-02-04 [1] CRAN (R 4.5.2)
warbleR 1.1.37 2025-10-22 [1] CRAN (R 4.5.2)
withr 3.0.2 2024-10-28 [1] CRAN (R 4.5.2)
xaringanExtra 0.8.0 2024-05-19 [1] CRAN (R 4.5.2)
xfun 0.57 2026-03-20 [1] CRAN (R 4.5.2)
xml2 1.5.2 2026-01-17 [1] CRAN (R 4.5.2)
yaml 2.3.12 2025-12-10 [1] CRAN (R 4.5.2)
[1] /home/m/R/x86_64-pc-linux-gnu-library/4.5
[2] /usr/local/lib/R/site-library
[3] /usr/lib/R/site-library
[4] /usr/lib/R/library
* ── Packages attached to the search path.
──────────────────────────────────────────────────────────────────────────────