Code
# options to customize chunk outputs
knitr::opts_chunk$set(
tidy.opts = list(width.cutoff = 65),
tidy = TRUE,
message = FALSE
)Heterospecific vocal mimicry in a bellbird
Source code and data found at https://github.com/maRce10/vocal_mimicry_bellbird
# options to customize chunk outputs
knitr::opts_chunk$set(
tidy.opts = list(width.cutoff = 65),
tidy = TRUE,
message = FALSE
)# knitr is require for creating html/pdf/word reports
# formatR is used for soft-wrapping code
# install knitr package if not installed
if (!requireNamespace("sketchy", quietly = TRUE)) {
install.packages("sketchy")
}
packages <- c(
"knitr",
"Rtsne",
"ggplot2",
"viridis",
"umap",
"xgboost",
"caret",
"doParallel",
"reshape2",
"pROC",
"smotefamily",
"brms",
"cowplot"
)
# install/ load packages
sketchy::load_packages(packages = packages)# colors and shapes by species for scatterplots
cols <- c(adjustcolor("tomato", alpha.f = 0.4), mako(4, alpha = 0.4, begin = 0.2, end = 0.8))
shapes <- c(16, 19, 15, 18, 17)
names(shapes) <- names(cols) <- c(
"mimetic_bellbird",
"Procnias tricarunculatus",
"Psarocolius montezuma",
"Ramphastos sulfuratus",
"Pteroglossus torquatus"
)
theme_set(theme_classic(base_size = 20))
# set method parameters
ctrl <- caret::trainControl(method = "repeatedcv", repeats = 5)
plot_overlap_props <- function(df, method = "mcp.overlap", formula = model_sp ~ tsne1 + tsne2){
ovlps <- space_similarity(
formula,
data = df,
nperm = 1000,
pb = FALSE,
method = "mcp.overlap",
trControl = ctrl,
tuneLength = 10,
seed = 123
)
ovlps1.2 <- data.frame(group.1 = ovlps$group.1, group.2 = ovlps$group.2, ovlp = ovlps[, 3])
ovlps2.1 <- data.frame(group.1 = ovlps$group.2, group.2 = ovlps$group.1, ovlp = ovlps[, 4])
ovlps <- rbind(ovlps1.2, ovlps2.1)
gg <- ggplot(ovlps, aes(x = group.1, y = group.2, fill = ovlp)) +
geom_tile() +
theme_bw() +
coord_equal() +
scale_fill_gradient2(low = mako(10)[3], high = mako(10)[7]) +
geom_text(aes(label = round(ovlp, 2)), color = "black") +
theme_classic() +
theme(
axis.text.x = element_text(
color = "black",
size = 11,
angle = 30,
vjust = 0.8,
hjust = 0.8
)
) +
labs(x = "", y = "", fill = "Proportional\noverlap")
return(gg)
}
# to create several posterior predictive check plots out of a brms fit
custom_ppc <- function(fit, group = NULL, ndraws = 30) {
by_group <- if (!is.null(group)) {
TRUE
} else
FALSE
if (by_group)
by_group <- if (any(names(fit$data) == group)) {
TRUE
} else
FALSE
if (by_group)
by_group <-
if (is.character(fit$data[, group]) |
is.factor(fit$data[, group])) {
TRUE
} else
FALSE
if (by_group) {
ppc_dens <- pp_check(fit,
ndraws = ndraws,
type = 'dens_overlay_grouped',
group = group)
pp_mean <- pp_check(
fit,
type = "stat_grouped",
stat = "mean",
group = group,
ndraws = ndraws
) + theme_classic()
pp_scat <- pp_check(fit, type = "scatter_avg", # group = group,
ndraws = ndraws)
} else {
ppc_dens <- pp_check(fit, ndraws = ndraws, type = 'dens_overlay')
pp_mean <- pp_check(fit,
type = "stat",
stat = "mean",
ndraws = ndraws) + theme_classic()
pp_scat <- pp_check(fit, type = "scatter_avg", ndraws = ndraws)
}
pp_stat2 <- pp_check(fit, type = "stat_2d", ndraws = ndraws)
pp_plot_list <-
list(ppc_dens, pp_mean, pp_scat, pp_stat2)
pp_plot_list[c(1, 3:4)] <-
lapply(pp_plot_list[c(1, 3:4)], function(x)
x + scale_color_viridis_d(
begin = 0.3,
end = 0.8,
alpha = 0.5,
option = "mako",
) + scale_fill_viridis_d(
begin = 0.3,
end = 0.8,
alpha = 0.5,
option = "mako"
) + theme_classic())
ppc_plot <- plot_grid(plotlist = pp_plot_list, ncol = 2)
print(ppc_plot)
}Add metadata:
raw_embs <- read.csv("data/processed/birdnet_predictions_and_embeddings.csv")
bb_est <- readRDS(
"./data/processed/extended_selection_table_high_snr_models_and_allmimetic_sounds.RDS"
)
#add source
raw_embs$source <- sapply(raw_embs$sound_file, function(x) bb_est$source[paste0(attr(bb_est, "check.res")$sound.files, ".wav") == x][1])
raw_embs$model.species <- sapply(raw_embs$sound_file, function(x) bb_est$Model.species[paste0(attr(bb_est, "check.res")$sound.files, ".wav") == x][1])
raw_embs$vocalization <- sapply(raw_embs$sound_file, function(x) bb_est$Vocalization[paste0(attr(bb_est, "check.res")$sound.files, ".wav") == x][1])
raw_embs$species.vocalization <- sapply(raw_embs$sound_file, function(x) bb_est$species.vocalization[paste0(attr(bb_est, "check.res")$sound.files, ".wav") == x][1])
raw_embs$orig.sound.files <- sapply(raw_embs$sound_file, function(x) attr(bb_est, "check.res")$orig.sound.files[paste0(attr(bb_est, "check.res")$sound.files, ".wav") == x][1])
write.csv(raw_embs, "data/processed/birdnet_predictions_and_embeddings_with_metadata.csv", row.names = FALSE)Sample sizes:
embs <- read.csv("data/processed/birdnet_predictions_and_embeddings_with_metadata.csv")
# select only model species
model_sp_embs <- embs[grep("mimetic|babbling", embs$species.vocalization, invert = TRUE), ]
table(embs$source)
mimetic_bellbird Procnias tricarunculatus Psarocolius montezuma
1660 715 74
Pteroglossus torquatus Ramphastos sulfuratus
64 1272
table(embs$species.vocalization)
mimetic_P.tricarunculatus-adult mimetic_P.tricarunculatus-aggresive
250 46
mimetic_P.tricarunculatus-babbling mimetic_P.tricarunculatus-P.montezuma
177 17
mimetic_P.tricarunculatus-P.torquatus mimetic_P.tricarunculatus-R.sulfuratus
224 920
mimetic_P.tricarunculatus-whistle P.montezuma-model
26 74
P.torquatus-model P.tricarunculatus-babbling
64 16
P.tricarunculatus-Monteverde P.tricarunculatus-Nicaragua
287 225
P.tricarunculatus-Panama P.tricarunculatus-Talamanca
55 132
R.sulfuratus-model
1272
# Find column indices that match pattern
embedding_columns <- grep("^logit_", names(embs), value = TRUE)
# Check results
cat("Number of embedding columns:", length(embedding_columns), "\n")Number of embedding columns: 6522
Perplexity = 30
tsne_out <- Rtsne(as.matrix(model_sp_embs[, embedding_columns]))
tsne_model_species <- data.frame(model_sp = model_sp_embs$source, tsne1 = tsne_out$Y[,1], tsne2 = tsne_out$Y[,2])
saveRDS(tsne_model_species, "data/processed/tsne_dims_model_species.rds")tsne_only_model_species <- readRDS("data/processed/tsne_dims_model_species.rds")
ggplot(tsne_only_model_species, aes(x = tsne1, y = tsne2, color = model_sp, shape = model_sp)) +
geom_point(size = 2) +
theme_minimal() +
labs(x = "t-SNE 1", y = "t-SNE 2", color = "", shape = "") +
scale_color_manual(values = cols[names(cols) %in% unique(tsne_only_model_species$model_sp)]) +
scale_shape_manual(values = shapes[names(shapes) %in% unique(tsne_only_model_species$model_sp)]) +
theme_classic()Perplexity = 5
T-sne including mimetic bellbird
embs_mimetic_and_models <- embs[grep("babbling", embs$species.vocalization, invert = TRUE), ]
tsne_out <- Rtsne(as.matrix(embs_mimetic_and_models[, embedding_columns]), num_threads = 30, perplexity = 5)
tsne_all <- data.frame(model_sp = embs_mimetic_and_models$source, sp.vocalization = embs_mimetic_and_models$species.vocalization, tsne1 = tsne_out$Y[,1], tsne2 = tsne_out$Y[,2])
saveRDS(tsne_all, "data/processed/tsne_dims_all_low_perplexity.rds")Plotting only model species
tsne_all <- readRDS("data/processed/tsne_dims_all_low_perplexity.rds")
tsne_model_species <- tsne_all[grep("mimetic|babbling", tsne_all$sp.vocalization, invert = TRUE), ]
ggplot(tsne_model_species, aes(x = tsne1, y = tsne2, color = model_sp, shape = model_sp)) +
geom_point(size = 2) +
theme_minimal() +
labs(x = "t-SNE 1", y = "t-SNE 2", color = "", shape = "") +
scale_color_manual(values = cols[names(cols) %in% unique(tsne_model_species$model_sp)]) +
scale_shape_manual(values = shapes[names(shapes) %in% unique(tsne_model_species$model_sp)]) +
theme_classic()Including mimetic bellbird
ggplot(tsne_all, aes(x = tsne1, y = tsne2, color = model_sp, shape = model_sp)) +
geom_point(size = 2) +
theme_minimal() +
labs(x = "t-SNE 1", y = "t-SNE 2", color = "", shape = "") +
scale_color_manual(values = cols[names(cols) %in% unique(tsne_all$model_sp)]) +
scale_shape_manual(values = shapes[names(shapes) %in% unique(tsne_all$model_sp)]) +
theme_classic()Perplexity = 30
T-sne including mimetic bellbird
embs_mimetic_and_models <- embs[grep("babbling", embs$species.vocalization, invert = TRUE), ]
tsne_out <- Rtsne(as.matrix(embs_mimetic_and_models[, embedding_columns]), num_threads = 30, perplexity = 30)
tsne_all <- data.frame(model_sp = embs_mimetic_and_models$source, sp.vocalization = embs_mimetic_and_models$species.vocalization, tsne1 = tsne_out$Y[,1], tsne2 = tsne_out$Y[,2])
saveRDS(tsne_all, "data/processed/tsne_dims_all_mid_perplexity.rds")Plotting only model species
tsne_all <- readRDS("data/processed/tsne_dims_all_mid_perplexity.rds")
tsne_model_species <- tsne_all[grep("mimetic|babbling", tsne_all$sp.vocalization, invert = TRUE), ]
ggplot(tsne_model_species, aes(x = tsne1, y = tsne2, color = model_sp, shape = model_sp)) +
geom_point(size = 2) +
theme_minimal() +
labs(x = "t-SNE 1", y = "t-SNE 2", color = "", shape = "") +
scale_color_manual(values = cols[names(cols) %in% unique(tsne_model_species$model_sp)]) +
scale_shape_manual(values = shapes[names(shapes) %in% unique(tsne_model_species$model_sp)]) +
theme_classic()Including mimetic bellbird
ggplot(tsne_all, aes(x = tsne1, y = tsne2, color = model_sp, shape = model_sp)) +
geom_point(size = 2) +
theme_minimal() +
labs(x = "t-SNE 1", y = "t-SNE 2", color = "", shape = "") +
scale_color_manual(values = cols[names(cols) %in% unique(tsne_all$model_sp)]) +
scale_shape_manual(values = shapes[names(shapes) %in% unique(tsne_all$model_sp)]) +
theme_classic()Perplexity = 50
T-sne including mimetic bellbird
embs_mimetic_and_models <- embs[grep("babbling", embs$species.vocalization, invert = TRUE), ]
tsne_out <- Rtsne(as.matrix(embs_mimetic_and_models[, embedding_columns]), num_threads = 30, perplexity = 50)
tsne_all <- data.frame(
model_sp = embs_mimetic_and_models$source,
sp.vocalization = embs_mimetic_and_models$species.vocalization,
tsne1 = tsne_out$Y[, 1],
tsne2 = tsne_out$Y[, 2]
)
saveRDS(tsne_all, "data/processed/tsne_dims_all_high_perplexity.rds")Plotting only model species
tsne_all <- readRDS("data/processed/tsne_dims_all_high_perplexity.rds")
tsne_model_species <- tsne_all[grep("mimetic|babbling", tsne_all$sp.vocalization, invert = TRUE), ]
ggplot(tsne_model_species, aes(x = tsne1, y = tsne2, color = model_sp, shape = model_sp)) +
geom_point(size = 2) +
theme_minimal() +
labs(x = "t-SNE 1", y = "t-SNE 2", color = "", shape = "") +
scale_color_manual(values = cols[names(cols) %in% unique(tsne_model_species$model_sp)]) +
scale_shape_manual(values = shapes[names(shapes) %in% unique(tsne_model_species$model_sp)]) +
theme_classic()Including mimetic bellbird
ggplot(tsne_all, aes(x = tsne1, y = tsne2, color = model_sp, shape = model_sp)) +
geom_point(size = 2) +
theme_minimal() +
labs(x = "t-SNE 1", y = "t-SNE 2", color = "", shape = "") +
scale_color_manual(values = cols[names(cols) %in% unique(tsne_all$model_sp)]) +
scale_shape_manual(values = shapes[names(shapes) %in% unique(tsne_all$model_sp)]) +
theme_classic()# Configure UMAP
# n_neighbors is the most important parameter (similar to t-SNE perplexity)
umap_config <- umap.defaults
umap_config$n_neighbors <- 15 # Default is 15, good starting point
umap_config$min_dist <- 0.1 # Minimum distance between points (default 0.1)
umap_config$n_components <- 2 # 2D projection
umap_config$random_state <- 42 # For reproducibility
# Run UMAP
set.seed(42)
embs_mimetic_and_models <- embs[grep("babbling", embs$species.vocalization, invert = TRUE), ]
umap_result <- umap(
as.matrix(embs_mimetic_and_models[, embedding_columns]),
config = umap_config
)
# Create dataframe for plotting
umap_all <- data.frame(
model_sp = embs_mimetic_and_models$source,
sp.vocalization = embs_mimetic_and_models$species.vocalization,
umap1 = umap_result$layout[, 1],
umap2 = umap_result$layout[, 2]
)
saveRDS(umap_all, "data/processed/umap_dims_all.rds")Plotting only model species
umap_all <- readRDS("data/processed/umap_dims_all.rds")
umap_model_species <- umap_all[grep("mimetic|babbling", umap_all$sp.vocalization, invert = TRUE), ]
ggplot(umap_model_species, aes(x = umap1, y = umap2, color = model_sp, shape = model_sp)) +
geom_point(size = 2) +
theme_minimal() +
labs(x = "UMAP 1", y = "UMAP 2", color = "", shape = "") +
scale_color_manual(values = cols[names(cols) %in% unique(umap_model_species$model_sp)]) +
scale_shape_manual(values = shapes[names(shapes) %in% unique(umap_model_species$model_sp)]) +
theme_classic()ggplot(umap_all, aes(x = umap1, y = umap2, color = model_sp, shape = model_sp)) +
geom_point(size = 2) +
theme_minimal() +
labs(x = "UMAP 1", y = "UMAP 2", color = "", shape = "") +
scale_color_manual(values = cols[names(cols) %in% unique(umap_all$model_sp)]) +
scale_shape_manual(values = shapes[names(shapes) %in% unique(umap_all$model_sp)]) +
theme_classic()# Separate features and labels
model_sp_embs <- embs[grep("mimetic|babbling", embs$species.vocalization, invert = TRUE), ]
X <- as.matrix(model_sp_embs[, embedding_columns])
y <- model_sp_embs$source
# Convert to factor (ensure consistent levels)
y <- as.factor(y)
cat("Original class distribution:\n")Original class distribution:
table(y)y
Procnias tricarunculatus Psarocolius montezuma Pteroglossus torquatus
699 74 64
Ramphastos sulfuratus
1272
# use caret to create a 80-20 split of the data for training and testing the model, stratified by source + sound file (species), keeping entire orig.sound.files levels for testing
unique_orig_sound_files <- as.data.frame(model_sp_embs[
!duplicated(model_sp_embs$orig.sound.files),
c("orig.sound.files", "source")
])
set.seed(123)
# exclude pteroglossus to add it manually
train_sound_files <- createDataPartition(
unique_orig_sound_files$source,
p = .8,
list = FALSE,
times = 1
)
train_sound_files <- train_sound_files[unique_orig_sound_files$source[train_sound_files] != "Pteroglossus torquatus"]
# leave c("Campan_0128.wav", "Campan_0129.wav") for testing
train_sound_files <- c(train_sound_files, which(unique_orig_sound_files$orig.sound.files %in% c("20200805-062852.wav", "20200805-063013.wav", "20200805-063224.wav", "Campan_0003.wav", "Campan_0127.wav", "Pteroglossus-torquatus-154157.wav", "Pteroglossus-torquatus-439335.wav", "Pteroglossus-torquatus-489619.wav", "Pteroglossus-torquatus-526136.wav")))
train_index <- which(model_sp_embs$orig.sound.files %in%
unique_orig_sound_files$orig.sound.files[train_sound_files])
X_train <- X[train_index, ]
X_test <- X[-train_index, ]
y_train <- y[train_index]
y_test <- y[-train_index]
cat("\nTraining set size:", nrow(X_train))
Training set size: 1719
cat("\nTest set size:", nrow(X_test))
Test set size: 390
cat("\n\nTraining class distribution:\n")
Training class distribution:
print(table(y_train))y_train
Procnias tricarunculatus Psarocolius montezuma Pteroglossus torquatus
566 64 56
Ramphastos sulfuratus
1033
# Calculate mean and SD from training set only (to avoid data leakage)
train_mean <- colMeans(X_train)
train_sd <- apply(X_train, 2, sd)
# Standardize both sets
X_train_scaled <- scale(X_train, center = train_mean, scale = train_sd)
X_test_scaled <- scale(X_test, center = train_mean, scale = train_sd)
# Check for any features with zero variance and remove them
zero_var <- which(train_sd == 0 | is.na(train_sd))
if(length(zero_var) > 0) {
cat("\nRemoving", length(zero_var), "zero-variance features\n")
X_train_scaled <- X_train_scaled[, -zero_var]
X_test_scaled <- X_test_scaled[, -zero_var]
}train_data <- data.frame(X_train_scaled, source = gsub(" ", "_", as.character(y_train)))
# k nearest neighbor with caret
set.seed(123)
ctrl <- trainControl(
method = "repeatedcv",
number = 10,
repeats = 5,
sampling = "down", # this handles imbalance
classProbs = TRUE
)
set.seed(123)
knn_model <- train(
source ~ .,
data = train_data,
method = "knn",
trControl = ctrl,
tuneLength = 20
)
saveRDS(knn_model, "data/processed/knn_model.rds")knn_model <- readRDS("data/processed/knn_model.rds")
test_data <- data.frame(X_test_scaled, source = gsub(" ", "_", as.character(y_test)))
pred_test <- predict(knn_model, newdata = test_data)
y_test <- gsub(" ", "_", as.character(y_test))
conf_mat <- confusionMatrix(pred_test, as.factor(y_test))
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <- sapply(conf_df$Reference, function(x)
sum(y_test ==
x))
conf_df$proportion <- conf_df$Freq / conf_df$total
conf_df <- conf_df[complete.cases(conf_df), ]
conf_df$Reference <- gsub("_", " ", conf_df$Reference)
conf_df$Prediction <- gsub("_", " ", conf_df$Prediction)
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_gradient2(low = mako(10)[3], high = mako(10)[7]) + geom_text(aes(label = round(proportion, 2)), color = "black") + labs(x = "Reference model species", y = "Predicted model species") +
theme_classic() + theme(axis.text.x = element_text(
color = "black",
size = 11,
angle = 30,
vjust = 0.8,
hjust = 0.8
))embs_mimetic_and_models <- embs[grep("babbling", embs$species.vocalization, invert = TRUE), ]
X_mimic_labeled <- embs_mimetic_and_models[embs_mimetic_and_models$source == "mimetic_bellbird" & embs_mimetic_and_models$model.species != "", ]
X_mimic <- X_mimic_labeled[, embedding_columns]
X_mimic_scaled <- scale(X_mimic, center = train_mean, scale = train_sd)
pred_mimic <- X_mimic_labeled$pred_mimic <- predict(knn_model, newdata = X_mimic_scaled)
## ggplot bars for predicted species
ggplot(data.frame(pred_mimic), aes(x = pred_mimic)) +
geom_bar(fill = mako(10)[9]) +
theme_classic() +
labs(x = "Predicted model species", y = "Count") +
theme(axis.text.x = element_text(
color = "black",
size = 11,
angle = 30,
vjust = 0.8,
hjust = 0.8
)) +
scale_x_discrete(labels = function(x) gsub("_", " ", x))y_mimic <- X_mimic_labeled$model.species
y_mimic <- gsub(" ", "_", as.character(y_mimic))
conf_mat <- confusionMatrix(pred_mimic, as.factor(y_mimic))
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <- sapply(conf_df$Reference, function(x)
sum(y_mimic ==
x))
conf_df$proportion <- conf_df$Freq / conf_df$total
conf_df <- conf_df[complete.cases(conf_df), ]
conf_df$Reference <- gsub("_", " ", conf_df$Reference)
conf_df$Prediction <- gsub("_", " ", conf_df$Prediction)
ggplot(conf_df, aes(x = Reference, y = Prediction, fill = proportion)) +
geom_tile() + theme_bw() + coord_equal() + scale_fill_gradient2(low = mako(10)[3], high = mako(10)[7]) + geom_text(aes(label = round(proportion, 2)), color = "black") + labs(x = "Reference model species", y = "Predicted model species") +
theme_classic() + theme(axis.text.x = element_text(
color = "black",
size = 11,
angle = 30,
vjust = 0.8,
hjust = 0.8
))Proportion of predicted model species per recording date, including only dates with at least 10 recordings of model species (to avoid noise from small sample sizes)
embs_babling <- embs[grepl("babbling", embs$species.vocalization) & embs$source == "mimetic_bellbird", ]
embs_babling$pred_mimic <- "babbling"
mimic_data <- rbind(embs_babling[, names(X_mimic_labeled)], X_mimic_labeled)
rec_info <- read.table("./data/raw/Recording_dates_.csv", head = TRUE, sep = ";")
rec_info$date <- gsub("\\/", "-", rec_info$date)
rec_info$date <- gsub("out", "ago", rec_info$date)
rec_info$date <- gsub("set", "sep", rec_info$date)
rec_info$org.date <- rec_info$date
rec_info$date <- as.Date(rec_info$date, format = "%d-%b-%y")
mimic_data$date <- sapply(mimic_data$orig.sound.files, function(x) as.character(rec_info$date[rec_info$sound.files ==
x][1]))
mimic_data$date <- as.Date(mimic_data$date)
agg_pred <- aggregate(orig.sound.files ~ date + pred_mimic, mimic_data, length)
out <- lapply(unique(agg_pred$date), function(x) {
X <- agg_pred[agg_pred$date == x, ]
X$prop <- X$orig.sound.files/sum(X$orig.sound.files)
if (length(unique(X$pred_mimic)) < 4) {
new_df <- data.frame(date = X$date[1], pred_mimic = setdiff(c("Procnias_tricarunculatus",
"Pteroglossus_torquatus", "Ramphastos_sulfuratus", "Psarocolius_montezuma", "babbling"), X$pred_mimic),
orig.sound.files = 0, prop = 0)
X <- rbind(new_df, X)
}
X$total <- sum(X$orig.sound.files)
return(X)
})
prop_pred <- do.call(rbind, out)
prop_pred$date.fact <- as.factor(as.character(prop_pred$date))
count_date <- aggregate(orig.sound.files ~ date, prop_pred, sum)
ggplot(prop_pred[prop_pred$date %in% count_date$date[count_date$orig.sound.files >=
10], ], aes(x = date.fact, y = prop, fill = pred_mimic)) + geom_bar(stat = "identity") +
scale_fill_viridis_d(alpha = 0.7, end = 0.8, option = "G") + theme_classic() + theme(axis.text.x = element_text(angle = 45,
vjust = 0.5))Excluding P. montezuma, which is the only model species that is rarely predicted in the mimetic bellbird vocalizations
ggplot(
prop_pred[
prop_pred$date %in%
count_date$date[count_date$orig.sound.files >= 10] &
prop_pred$pred_mimic != "Psarocolius_montezuma",
],
aes(x = date.fact, y = prop, fill = pred_mimic)
) +
geom_bar(stat = "identity") +
scale_fill_viridis_d(alpha = 0.7, end = 0.8, option = "G") +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5))# weakly informative priors
priors <- c(
# Weakly informative logistic intercept
prior(normal(0, 2.5), class = "Intercept"),
# Temporal slope
prior(normal(0, 1), class = "b"),
# Random effect standard deviations
prior(exponential(1), class = "sd")
)
dat <- prop_pred[prop_pred$pred_mimic != "Psarocolius_montezuma",]
dat$julian_day <- as.numeric(format(as.Date(dat$date), "%j"))
fit <- brm(
orig.sound.files | trials(total) ~
scale(julian_day) +
(1 + scale(julian_day) | pred_mimic),
family = binomial,
data = dat,
prior = priors,
chains = 4,
cores = 4,
iter = 4000,
control = list(adapt_delta = 0.95), file_refit = "on_change",
file = "data/processed/brms_model_prop_pred_mimicry.rds"
)
null_priors <- c(
# Weakly informative logistic intercept
prior(normal(0, 2.5), class = "Intercept"),
# Random effect standard deviations
prior(exponential(1), class = "sd")
)
null_fit <- brm(
orig.sound.files | trials(total) ~
1 +
(1 | pred_mimic),
family = binomial,
data = dat,
prior = null_priors,
chains = 4,
cores = 4,
iter = 4000,
control = list(adapt_delta = 0.95),
file_refit = "on_change",
file = "data/processed/brms_null_model_prop_pred_mimicry.rds"
)fit <- readRDS("data/processed/brms_model_prop_pred_mimicry.rds")
# Extract posterior draws as base R data.frame
post <- as.data.frame(fit)
# Get species levels
species_levels <- unique(fit$data$pred_mimic)
# Global slope
beta_global <- post[, "b_scalejulian_day"]
# Find all random slope columns automatically
rand_cols <- grep("^r_pred_mimic\\[.*,scalejulian_day\\]$",
names(post),
value = TRUE)
# Extract species names from column names
species_levels <- sub("r_pred_mimic\\[(.*),scalejulian_day\\]", "\\1", rand_cols)
# Storage matrix
results <- matrix(NA, nrow = length(species_levels), ncol = 3)
colnames(results) <- c("mean_slope",
"l95",
"u95")
rownames(results) <- species_levels
# Loop through species
for (i in seq_along(rand_cols)) {
slope_draws <- beta_global + post[, rand_cols[i]]
results[i, "mean_slope"] <- mean(slope_draws)
results[i, "l95"] <- quantile(slope_draws, 0.025)
results[i, "u95"] <- quantile(slope_draws, 0.975)
}
# Print results
print(round(results, 4)) mean_slope l95 u95
babbling -0.0700 -0.2415 0.0987
Procnias_tricarunculatus 1.1138 0.8618 1.3894
Pteroglossus_torquatus 0.1652 0.0068 0.3295
Ramphastos_sulfuratus -0.3854 -0.5018 -0.2693
dat <- prop_pred[prop_pred$pred_mimic != "Psarocolius_montezuma",]
dat$julian_day <- as.numeric(format(as.Date(dat$date), "%j"))
# Create prediction grid
newdata <- expand.grid(
julian_day = seq(min(dat$julian_day),
max(dat$julian_day),
length.out = 100),
pred_mimic = unique(dat$pred_mimic)
)
# IMPORTANT: match scaling used in model
newdata$scalejulian_day <- scale(dat$julian_day)[1] # dummy to get attributes
newdata$scalejulian_day <- (newdata$julian_day - mean(dat$julian_day)) /
sd(dat$julian_day)
newdata$total <- round(mean(dat$total), 0) # use mean total for predictions
# Get posterior expected probabilities
epred <- posterior_epred(
fit,
newdata = newdata,
re_formula = NULL
)
# Posterior mean prediction
newdata$fit <- colMeans(epred)
# Convert julian day back to date for plotting
origin_year <- format(as.Date(dat$date[1]), "%Y")
newdata$date <- as.Date(newdata$julian_day - 1,
origin = paste0(origin_year, "-01-01"))
newdata$fit_prop <- newdata$fit / 100
# include only those for which CI did not include 0
results_df <- as.data.frame(results)
sig_spp <- rownames(results_df)[results_df$l95 * results_df$u95 > 0]
newdata <- newdata[newdata$pred_mimic %in% sig_spp, ]
# Plot
ggplot(
dat[dat$prop > 0, ],
aes(
x = as.Date(date),
y = prop,
fill = pred_mimic,
col = pred_mimic,
size = orig.sound.files
)
) +
geom_point(pch = 21) +
geom_line(
data = newdata,
aes(x = date, y = fit_prop, color = pred_mimic),
size = 1.2
) +
scale_fill_viridis_d(alpha = 0.6, end = 0.8) +
scale_color_viridis_d(alpha = 0.8, end = 0.8) +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5)) +
labs(
x = "Date",
y = "Proportion of vocalizations",
fill = "Predicted species",
color = "Predicted species",
size = "Number of recorded\nvocalizations"
)Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
─ Session info ───────────────────────────────────────────────────────────────
setting value
version R version 4.5.2 (2025-10-31)
os Ubuntu 22.04.4 LTS
system x86_64, linux-gnu
ui X11
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz America/Costa_Rica
date 2026-03-05
pandoc 3.6.3 @ /usr/lib/rstudio/resources/app/bin/quarto/bin/tools/x86_64/ (via rmarkdown)
quarto 1.8.25 @ /usr/lib/rstudio/resources/app/bin/quarto/bin/quarto
─ Packages ───────────────────────────────────────────────────────────────────
package * version date (UTC) lib source
abind 1.4-8 2024-09-12 [1] CRAN (R 4.5.2)
askpass 1.2.1 2024-10-04 [1] CRAN (R 4.5.2)
backports 1.5.0 2024-05-23 [1] CRAN (R 4.5.2)
bayesplot 1.15.0 2025-12-12 [1] CRAN (R 4.5.2)
bridgesampling 1.2-1 2025-11-19 [1] CRAN (R 4.5.2)
brms * 2.23.0 2025-09-09 [1] CRAN (R 4.5.2)
Brobdingnag 1.2-9 2022-10-19 [1] CRAN (R 4.5.2)
cachem 1.1.0 2024-05-16 [1] CRAN (R 4.5.2)
caret * 7.0-1 2024-12-10 [1] CRAN (R 4.5.2)
checkmate 2.3.4 2026-02-03 [1] CRAN (R 4.5.2)
class 7.3-23 2025-01-01 [1] CRAN (R 4.5.2)
cli 3.6.5 2025-04-23 [1] CRAN (R 4.5.2)
coda 0.19-4.1 2024-01-31 [1] CRAN (R 4.5.2)
codetools 0.2-20 2024-03-31 [1] CRAN (R 4.5.2)
cowplot * 1.2.0 2025-07-07 [1] CRAN (R 4.5.2)
crayon 1.5.3 2024-06-20 [1] CRAN (R 4.5.2)
data.table 1.18.2.1 2026-01-27 [1] CRAN (R 4.5.2)
dbscan 1.2.4 2025-12-19 [1] CRAN (R 4.5.2)
devtools 2.4.6 2025-10-03 [1] CRAN (R 4.5.2)
digest 0.6.39 2025-11-19 [1] CRAN (R 4.5.2)
distributional 0.6.0 2026-01-14 [1] CRAN (R 4.5.2)
doParallel * 1.0.17 2022-02-07 [1] CRAN (R 4.5.2)
dplyr 1.2.0 2026-02-03 [1] CRAN (R 4.5.2)
e1071 1.7-17 2025-12-18 [1] CRAN (R 4.5.2)
ellipsis 0.3.2 2021-04-29 [3] CRAN (R 4.1.1)
evaluate 1.0.5 2025-08-27 [1] CRAN (R 4.5.2)
farver 2.1.2 2024-05-13 [1] CRAN (R 4.5.2)
fastmap 1.2.0 2024-05-15 [1] CRAN (R 4.5.2)
FNN 1.1.4.1 2024-09-22 [1] CRAN (R 4.5.2)
foreach * 1.5.2 2022-02-02 [3] CRAN (R 4.1.2)
fs 1.6.6 2025-04-12 [1] CRAN (R 4.5.2)
future 1.69.0 2026-01-16 [1] CRAN (R 4.5.2)
future.apply 1.20.2 2026-02-20 [1] CRAN (R 4.5.2)
generics 0.1.4 2025-05-09 [1] CRAN (R 4.5.2)
ggplot2 * 4.0.2 2026-02-03 [1] CRAN (R 4.5.2)
globals 0.19.0 2026-02-02 [1] CRAN (R 4.5.2)
glue 1.8.0 2024-09-30 [1] CRAN (R 4.5.2)
gower 1.0.2 2024-12-17 [1] CRAN (R 4.5.2)
gridExtra 2.3 2017-09-09 [3] CRAN (R 4.0.1)
gtable 0.3.6 2024-10-25 [1] CRAN (R 4.5.2)
hardhat 1.4.2 2025-08-20 [1] CRAN (R 4.5.2)
htmltools 0.5.9 2025-12-04 [1] CRAN (R 4.5.2)
htmlwidgets 1.6.4 2023-12-06 [1] CRAN (R 4.5.2)
igraph 2.2.2 2026-02-12 [1] CRAN (R 4.5.2)
inline 0.3.21 2025-01-09 [1] CRAN (R 4.5.2)
ipred 0.9-15 2024-07-18 [1] CRAN (R 4.5.2)
iterators * 1.0.14 2022-02-05 [3] CRAN (R 4.1.2)
jsonlite 2.0.0 2025-03-27 [1] CRAN (R 4.5.2)
knitr * 1.51 2025-12-20 [1] CRAN (R 4.5.2)
labeling 0.4.3 2023-08-29 [1] CRAN (R 4.5.2)
lattice * 0.22-9 2026-02-09 [1] CRAN (R 4.5.2)
lava 1.8.2 2025-10-30 [1] CRAN (R 4.5.2)
lifecycle 1.0.5 2026-01-08 [1] CRAN (R 4.5.2)
listenv 0.10.0 2025-11-02 [1] CRAN (R 4.5.2)
loo 2.9.0 2025-12-23 [1] CRAN (R 4.5.2)
lubridate 1.9.5 2026-02-04 [1] CRAN (R 4.5.2)
magrittr 2.0.4 2025-09-12 [1] CRAN (R 4.5.2)
MASS 7.3-65 2025-02-28 [1] CRAN (R 4.5.2)
Matrix 1.7-4 2025-08-28 [1] CRAN (R 4.5.2)
matrixStats 1.5.0 2025-01-07 [1] CRAN (R 4.5.2)
memoise 2.0.1 2021-11-26 [3] CRAN (R 4.1.2)
ModelMetrics 1.2.2.2 2020-03-17 [3] CRAN (R 4.0.1)
mvtnorm 1.3-3 2025-01-10 [1] CRAN (R 4.5.2)
nlme 3.1-168 2025-03-31 [1] CRAN (R 4.5.2)
nnet 7.3-20 2025-01-01 [1] CRAN (R 4.5.2)
openssl 2.3.5 2026-02-26 [1] CRAN (R 4.5.2)
otel 0.2.0 2025-08-29 [1] CRAN (R 4.5.2)
packrat 0.9.3 2025-06-16 [1] CRAN (R 4.5.2)
parallelly 1.46.1 2026-01-08 [1] CRAN (R 4.5.2)
pillar 1.11.1 2025-09-17 [1] CRAN (R 4.5.2)
pkgbuild 1.4.8 2025-05-26 [1] CRAN (R 4.5.2)
pkgconfig 2.0.3 2019-09-22 [3] CRAN (R 4.0.1)
pkgload 1.5.0 2026-02-03 [1] CRAN (R 4.5.2)
plyr 1.8.9 2023-10-02 [1] CRAN (R 4.5.2)
png 0.1-8 2022-11-29 [1] CRAN (R 4.5.2)
posterior 1.6.1 2025-02-27 [1] CRAN (R 4.5.2)
pROC * 1.19.0.1 2025-07-31 [1] CRAN (R 4.5.2)
prodlim 2025.04.28 2025-04-28 [1] CRAN (R 4.5.2)
proxy 0.4-29 2025-12-29 [1] CRAN (R 4.5.2)
purrr 1.2.1 2026-01-09 [1] CRAN (R 4.5.2)
QuickJSR 1.9.0 2026-01-25 [1] CRAN (R 4.5.2)
R6 2.6.1 2025-02-15 [1] CRAN (R 4.5.2)
RColorBrewer 1.1-3 2022-04-03 [1] CRAN (R 4.5.2)
Rcpp * 1.1.1 2026-01-10 [1] CRAN (R 4.5.2)
RcppParallel 5.1.11-2 2026-03-05 [1] CRAN (R 4.5.2)
recipes 1.3.1 2025-05-21 [1] CRAN (R 4.5.2)
remotes 2.5.0 2024-03-17 [1] CRAN (R 4.5.2)
reshape2 * 1.4.5 2025-11-12 [1] CRAN (R 4.5.2)
reticulate 1.45.0 2026-02-13 [1] CRAN (R 4.5.2)
rlang 1.1.7 2026-01-09 [1] CRAN (R 4.5.2)
rmarkdown 2.30 2025-09-28 [1] CRAN (R 4.5.2)
rpart 4.1.24 2025-01-07 [1] CRAN (R 4.5.2)
RSpectra 0.16-2 2024-07-18 [1] CRAN (R 4.5.2)
rstan 2.32.7 2025-03-10 [1] CRAN (R 4.5.2)
rstantools 2.6.0 2026-01-10 [1] CRAN (R 4.5.2)
rstudioapi 0.18.0 2026-01-16 [1] CRAN (R 4.5.2)
Rtsne * 0.17 2023-12-07 [1] CRAN (R 4.5.2)
S7 0.2.1 2025-11-14 [1] CRAN (R 4.5.2)
scales 1.4.0 2025-04-24 [1] CRAN (R 4.5.2)
sessioninfo 1.2.3 2025-02-05 [1] CRAN (R 4.5.2)
sketchy 1.0.7 2026-03-03 [1] CRANs (R 4.5.2)
smotefamily * 1.4.0 2024-03-14 [1] CRAN (R 4.5.2)
StanHeaders 2.32.10 2024-07-15 [1] CRAN (R 4.5.2)
stringi 1.8.7 2025-03-27 [1] CRAN (R 4.5.2)
stringr 1.6.0 2025-11-04 [1] CRAN (R 4.5.2)
survival 3.8-6 2026-01-16 [1] CRAN (R 4.5.2)
tensorA 0.36.2.1 2023-12-13 [1] CRAN (R 4.5.2)
tibble 3.3.1 2026-01-11 [1] CRAN (R 4.5.2)
tidyselect 1.2.1 2024-03-11 [1] CRAN (R 4.5.2)
timechange 0.4.0 2026-01-29 [1] CRAN (R 4.5.2)
timeDate 4052.112 2026-01-28 [1] CRAN (R 4.5.2)
umap * 0.2.10.0 2023-02-01 [1] CRAN (R 4.5.2)
usethis 3.2.1 2025-09-06 [1] CRAN (R 4.5.2)
vctrs 0.7.1 2026-01-23 [1] CRAN (R 4.5.2)
viridis * 0.6.5 2024-01-29 [1] CRAN (R 4.5.2)
viridisLite * 0.4.3 2026-02-04 [1] CRAN (R 4.5.2)
withr 3.0.2 2024-10-28 [1] CRAN (R 4.5.2)
xaringanExtra 0.8.0 2024-05-19 [1] CRAN (R 4.5.2)
xfun 0.56 2026-01-18 [1] CRAN (R 4.5.2)
xgboost * 3.2.0.1 2026-02-10 [1] CRAN (R 4.5.2)
yaml 2.3.12 2025-12-10 [1] CRAN (R 4.5.2)
[1] /home/m/R/x86_64-pc-linux-gnu-library/4.5
[2] /usr/local/lib/R/site-library
[3] /usr/lib/R/site-library
[4] /usr/lib/R/library
* ── Packages attached to the search path.
──────────────────────────────────────────────────────────────────────────────