## add 'developer' to packages to be installed from github
x <- c("data.table", "lubridate", "devtools", "maRce10/warbleR", "readxl", "ranger", "caret", "e1071", "pbapply", "viridis", "ggplot2", "DT", "kableExtra", "rlang", "Sim.DiffProc", "soundgen", "brms", "ggrepel", "posterior", "ggridges", "tidybayes", "purrr" #, "markovchain", "igraph", "TraMineR", "spgs"
)
aa <- lapply(x, function(y) {
# get pakage name
pkg <- strsplit(y, "/")[[1]]
pkg <- pkg[length(pkg)]
# check if installed, if not then install
if (!pkg %in% installed.packages()[,"Package"]) {
if (grepl("/", y)) devtools::install_github(y, force = TRUE) else
install.packages(y)
}
# load package
a <- try(require(pkg, character.only = T), silent = T)
if (!a) remove.packages(pkg)
})
warbleR_options(wl = 300, parallel = 1, bp = "frange", fast = TRUE, threshold = 15, ovlp = 20)
opts_knit$set(root.dir = "..")
theme_set(theme_classic(base_size = 24))
# set evaluation false
opts_chunk$set( fig.width = 7, fig.height = 4, warning = FALSE, message = FALSE, tidy = FALSE)
cols <- viridis(10, alpha = 0.7)
# print results brms models
summary_brm_model <- function(x, gsub.pattern = NULL, gsub.replacement = NULL){
summ <- summary(x)$fixed
fit <- x$fit
betas <- grep("^b_", names(fit@sim$samples[[1]]), value = TRUE)
hdis <- t(sapply(betas, function(y) hdi(fit@sim$samples[[1]][[y]]))
)
summ_table <- data.frame(summ, hdis, iterations = attr(fit, "stan_args")[[1]]$iter, chains = length(attr(fit, "stan_args")))
summ_table <- summ_table[rownames(summ_table) != "Intercept", c("Estimate", "Rhat", "Bulk_ESS", "l.95..CI", "u.95..CI", "iterations", "chains")]
summ_table <- as.data.frame(summ_table)
summ_table$Rhat <- round(summ_table$Rhat, digits = 3)
summ_table$CI_low <- round(unlist(summ_table$l.95..CI), digits = 3)
summ_table$CI_high <- round(unlist(summ_table$u.95..CI), digits = 3)
summ_table$l.95..CI <- summ_table$u.95..CI <- NULL
out <- lapply(betas, function(y) data.frame(variable = y, value = sort(fit@sim$samples[[1]][[y]], decreasing = FALSE)))
posteriors <- do.call(rbind, out)
posteriors <- posteriors[posteriors$variable != "b_Intercept", ]
if (!is.null(gsub.pattern) & !is.null(gsub.replacement))
posteriors$variable <- gsub(pattern = gsub.pattern, replacement = gsub.replacement, posteriors$variable)
gg <- ggplot(data = posteriors, aes(y = variable, x = value, fill = stat(quantile))) +
geom_density_ridges_gradient(quantile_lines = TRUE, quantile_fun = HDInterval::hdi, vline_linetype = 2) +
theme_classic() +
xlim(range(c(min(summ_table[ , "CI_low"]), max(summ_table[ , "CI_high"])), 0)) +
geom_vline(xintercept = 0, col = "red") +
scale_fill_manual(values = c("transparent", "lightblue", "transparent"), guide = "none") + ggtitle(x$formula)
if (!is.null(gsub.pattern) & !is.null(gsub.replacement))
rownames(summ_table) <- gsub(pattern = gsub.pattern, replacement = gsub.replacement, rownames(summ_table))
summ_table$Rhat <- ifelse(summ_table$Rhat > 1.05, cell_spec(summ_table$Rhat, "html", color ="white", background = "red", bold = TRUE, font_size = 12), cell_spec(summ_table$Rhat, "html"))
signif <- summ_table[,"CI_low"] * summ_table[,"CI_high"] > 0
df1 <- kbl(summ_table, row.names = TRUE, escape = FALSE, format = "html", digits = 3)
df1 <- row_spec(kable_input = df1,row = which(summ_table$CI_low * summ_table$CI_high > 0), background = adjustcolor(cols[9], alpha.f = 0.3))
df1 <- kable_styling(df1, bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE, font_size = 12)
print(df1)
print(gg)
}
clls <- readRDS("./data/processed/curated_extended_selection_table_inquiry_calls_2020_&_2021.RDS")
sub_clls <- clls[!duplicated(clls$org.sound.files), ]
sub_clls$file.duration <- sub_clls$sound.file.samples / (sub_clls$sample.rate * 1000)
metadat <- read.csv("./data/processed/metadata_inquiry_calls_2020_&_2021.csv", stringsAsFactors = FALSE)
caps <- as.data.frame(read_excel("./data/raw/Proyecto MPI enero 2020_2.xlsx", sheet = "Capturas"))
# read acoustic parameter data
acous_param_l <- readRDS("./data/processed/acoustic_parameters_all_groups_specific_warbler_acoustic_measurements_curated_data_2020_&_2021.RDS")
# read as RDS
agg_pred <- readRDS("./data/processed/predicted_individual_in_group_flights_2020_&_2021.RDS")
# read diagnostics
diagnostics <- readRDS("./data/processed/random_forests_diagnostics_solo_flight.RDS")
# remove groups with individuals with no solo flight
diagnostics$all_with_solo <- sapply(1:nrow(diagnostics), function(x)
{
Y <- acous_param_l[[diagnostics$group[x]]]
Y$sound.files <- sapply(Y$sound.files, function(x) strsplit(x, ".wav")[[1]][1])
Y$year.audio <- sapply(Y$sound.files, function(x) sub_clls$year.audio[gsub(".wav", "", sub_clls$org.sound.files) == x][1])
indivs <- strsplit(na.omit(Y$indiv[Y$year.audio == diagnostics$group_flight_files[x]])[1], split = "\\|")[[1]]
call_per_indiv <- sapply(indivs, function(y) sum(clls$Individuo == y & clls$Experimento == "vuelo solo"))
if (any(call_per_indiv == 0)) return("missing solo") else
return("OK")
}
)
diagnostics <- diagnostics[diagnostics$all_with_solo == "OK", ]
agg_pred <-agg_pred[agg_pred$group %in% diagnostics$group, ]
# get summary by group
summary_by_group_list <- lapply(unique(agg_pred$group), function(x) {
# print(x)
Y <- agg_pred[agg_pred$group == x, ]
#total number of calls above lowest group threshold for true positives
above_threshold_calls <- sum(Y$max_prob > diagnostics$min_prob_threshold[diagnostics$group == x])
#proportion of calls
prop_above_calls <- above_threshold_calls / nrow(Y)
return(data.frame(group = x, experiment = diagnostics$experiment[diagnostics$group == x], total_calls = nrow(Y), above_threshold_calls = above_threshold_calls, prop_above_calls, n_individuals = diagnostics$n_indiv[diagnostics$group == x]))
})
# calls per individual and group
calls_by_indiv_group <- aggregate(sound.files ~ group + pred_indiv, agg_pred, length)
names(calls_by_indiv_group) <- c("Group", "Individual_ID", "Number_of_calls")
calls_by_indiv_group <- calls_by_indiv_group[order(calls_by_indiv_group$Group), ]
write.csv(calls_by_indiv_group, "./data/processed/calls_by_indiv_group.csv", row.names = FALSE)
summary_by_group <- do.call(rbind, summary_by_group_list)
groups_by_cat <- aggregate(group ~ experiment, summary_by_group, FUN = function(x) length(unique(x)))
#print pretty table
df3 <- knitr::kable(groups_by_cat, row.names = FALSE, escape = FALSE, format = "html", digits = 2)
kable_styling(df3, bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE, font_size = 18)
experiment | group |
---|---|
mixed | 33 |
regular | 26 |
Group count by number of individuals and experiment
groups_by_cat_n <- as.data.frame.matrix(table(summary_by_group$experiment, summary_by_group$n_individuals))
#print pretty table
df4 <- knitr::kable(groups_by_cat_n, row.names = TRUE, escape = FALSE, format = "html", digits = 2)
kable_styling(df4, bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE, font_size = 18)
2 | 3 | 4 | 5 | 6 | 7 | |
---|---|---|---|---|---|---|
mixed | 12 | 10 | 7 | 4 | 0 | 0 |
regular | 2 | 7 | 8 | 5 | 2 | 2 |
# get group and solo call counts per individual, call rate in call/min
call_rate_list <- pblapply(1:nrow(diagnostics), function(x){
Y <- acous_param_l[[diagnostics$group[x]]]
Y$sound.files <- sapply(Y$sound.files, function(x) strsplit(x, ".wav")[[1]][1])
Y$year.audio <- sapply(Y$sound.files, function(x) sub_clls$year.audio[gsub(".wav", "", sub_clls$org.sound.files) == x][1])
indivs_in_group <- strsplit(Y$indiv[Y$year.audio == diagnostics$group_flight_files[x]], split = "\\|")[[1]]
calls_per_sound_file_list <- lapply(na.omit(c(indivs_in_group, "group")), function(y){
if (y != "group"){
tab <- table(Y$year.audio[Y$indiv == y])
if (length(tab) == 0) {tab <- 0
names(tab) <- y}
} else{
tab <- sum(Y$year.audio == diagnostics$group_flight_files[x], na.rm = TRUE)
names(tab) <- diagnostics$group_flight_files[x]
}
df <- data.frame(group = diagnostics$group[x], experiment = diagnostics$experiment[x], year.audio = names(tab), indiv = y, calls = as.vector(tab), flight.time = sapply(names(tab), USE.NAMES = FALSE, function(x) (round(as.numeric(metadat$Tiempo.de.vuelo[metadat$year.audio == x][1]) * 60 / 0.04166667))), file.duration = sapply(names(tab), function(x) (sub_clls$file.duration[sub_clls$year.audio == x][1])))
return(df)
}
)
calls_per_sound_file <- do.call(rbind, calls_per_sound_file_list)
calls_per_sound_file$rate <- calls_per_sound_file$calls / calls_per_sound_file$file.duration * 60 # calls
return(calls_per_sound_file)
})
call_rate_by_group <- do.call(rbind, call_rate_list)
rownames(call_rate_by_group) <- 1:nrow(call_rate_by_group)
# add call rate to those missing
call_rate_by_group$rate <- ifelse(!is.na(call_rate_by_group$rate), call_rate_by_group$rate, call_rate_by_group$calls / call_rate_by_group$flight.time)
# add sex
call_rate_by_group$sex <- sapply(call_rate_by_group$indiv, function(x) if(x != "group") na.exclude(caps$Sexo[caps$Murci == x])[1] else NA, USE.NAMES = FALSE)
call_rate_by_group$sex <- ifelse(call_rate_by_group$sex == "m", "Male", "Female")
# add age
call_rate_by_group$age <- sapply(call_rate_by_group$indiv, function(x) if(x != "group") na.exclude(caps$Edad[caps$Murci == x])[1] else NA, USE.NAMES = FALSE)
call_rate_by_group$age <- ifelse(call_rate_by_group$age == "sa", "Sub-adult", "Adult")
# reproductive stage
call_rate_by_group$reprod.stg <- sapply(call_rate_by_group$indiv, function(x) if(x != "group") na.exclude(caps$`Estado reproductivo`[caps$Murci == x])[1] else NA, USE.NAMES = FALSE)
call_rate_by_group$reprod.stg[call_rate_by_group$reprod.stg == "p?"] <- "Pregnant"
call_rate_by_group$reprod.stg[call_rate_by_group$reprod.stg == "ne"] <- "Inactive"
call_rate_by_group$group.size <-sapply(call_rate_by_group$group, function(x) diagnostics$n_indiv[diagnostics$group == x])
# order levels
call_rate_by_group$experiment <- factor(call_rate_by_group$experiment, levels = c("regular", "mixed"))
call_rate_by_group$type <- factor(ifelse(call_rate_by_group$indiv == "group", "group", "solo"), levels = c("solo", "group"))
saveRDS(call_rate_by_group, "./data/processed/call_rate_by_group.RDS")
# get group and solo call counts per individual, call rate in call/min
call_rate_indiv_list <- pblapply(1:nrow(diagnostics), function(x){
# print(x)
Y <- acous_param_l[[diagnostics$group[x]]]
Y$pred.id <- NA
# add id for group flights
Y$pred.id[Y$experiment != "vuelo solo"] <- sapply(Y$sound.files[Y$experiment != "vuelo solo"], function(x) agg_pred$pred_indiv[agg_pred$sound.files == x])
Y$sound.files <- sapply(Y$sound.files, function(x) strsplit(x, ".wav")[[1]][1])
Y$year.audio <- sapply(Y$sound.files, function(x) sub_clls$year.audio[gsub(".wav", "", sub_clls$org.sound.files) == x][1])
indivs_in_group <- na.omit(strsplit(Y$indiv[Y$year.audio == diagnostics$group_flight_files[x]], split = "\\|")[[1]])
rate_indiv_list <- lapply(indivs_in_group, function(y){
# print(y)
year.audio.group <- na.omit(unique(Y$year.audio[Y$experiment != "vuelo solo"]))
year.audio.solo <- na.omit(unique(Y$year.audio[Y$indiv == y & Y$experiment == "vuelo solo"]))
flight.time.group <- round(as.numeric(metadat$Tiempo.de.vuelo[metadat$year.audio == year.audio.group][1]) * 60 / 0.04166667)
flight.time.solo <- sapply(year.audio.solo, function(w) round(as.numeric(metadat$Tiempo.de.vuelo[metadat$year.audio == w][1]) * 60 / 0.04166667)
)
file.duration.group <- sub_clls$file.duration[sub_clls$year.audio == year.audio.group][1]
file.duration.solo <- sapply(year.audio.solo, function(w) sub_clls$file.duration[sub_clls$year.audio == w][1])
calls.solo <- sapply(year.audio.solo, function(w) sum(Y$year.audio == w))
calls.group <- sum(Y$year.audio == year.audio.group & Y$pred.id == y)
df <- data.frame(indiv = y, group = diagnostics$group[x], size = length(indivs_in_group), experiment = diagnostics$experiment[x], type = c("group", "solo"), year.audio = c(year.audio.group, paste(year.audio.solo, collapse = "/")), calls = c(calls.group, sum(calls.solo)), flight.time = c(flight.time.group, sum(flight.time.solo)), file.duration = c(file.duration.group, sum(file.duration.solo)))
return(df)
}
)
rate_indiv <- do.call(rbind, rate_indiv_list)
rate_indiv$rate <- rate_indiv$calls / rate_indiv$file.duration * 60 # calls
return(rate_indiv)
})
call_rate_indiv <- do.call(rbind, call_rate_indiv_list)
rownames(call_rate_indiv) <- 1:nrow(call_rate_indiv)
# add call rate to those missing
call_rate_indiv$rate <- ifelse(!is.na(call_rate_indiv$rate), call_rate_indiv$rate, call_rate_indiv$calls / call_rate_indiv$flight.time)
# add sex
call_rate_indiv$sex <- sapply(call_rate_indiv$indiv, function(x) if(x != "group") na.exclude(caps$Sexo[caps$Murci == x])[1] else NA, USE.NAMES = FALSE)
call_rate_indiv$sex <- ifelse(call_rate_indiv$sex == "m", "Male", "Female")
# add age
call_rate_indiv$age <- sapply(call_rate_indiv$indiv, function(x) if(x != "group") na.exclude(caps$Edad[caps$Murci == x])[1] else NA, USE.NAMES = FALSE)
call_rate_indiv$age <- ifelse(call_rate_indiv$age == "sa", "Sub-adult", "Adult")
# reproductive stage
call_rate_indiv$reprod.stg <- sapply(call_rate_indiv$indiv, function(x) if(x != "group") na.exclude(caps$`Estado reproductivo`[caps$Murci == x])[1] else NA, USE.NAMES = FALSE)
call_rate_indiv$reprod.stg[call_rate_indiv$reprod.stg == "p?"] <- "Pregnant"
call_rate_indiv$reprod.stg[call_rate_indiv$reprod.stg == "ne"] <- "Inactive"
call_rate_indiv$group.size <-sapply(call_rate_indiv$group, function(x) diagnostics$n_indiv[diagnostics$group == x])
# order levels
call_rate_indiv$experiment <- factor(call_rate_indiv$experiment, levels = c("regular", "mixed"))
call_rate_indiv$type <- factor(ifelse(call_rate_indiv$type == "group", "group", "solo"), levels = c("solo", "group"))
saveRDS(call_rate_indiv, "./data/processed/call_rate_by_individual.RDS")
call_rate_by_group <- readRDS("./data/processed/call_rate_by_group.RDS")
call_rate_indiv <- readRDS("./data/processed/call_rate_by_individual.RDS")
call_rate_by_group <- call_rate_by_group[call_rate_by_group$type != "solo", ]
call_rate_by_group$type <- "overall.group"
call_rate_indiv$type <- as.character(call_rate_indiv$type)
call_rate_indiv$type[call_rate_indiv$type == "group"] <- "indiv.group"
call_rate <- rbind(call_rate_by_group[, intersect(names(call_rate_by_group), names(call_rate_indiv))
], call_rate_indiv[, intersect(names(call_rate_by_group), names(call_rate_indiv))
])
# aggregate for plot
agg_rate <- aggregate(rate ~ experiment + type + group.size, data = call_rate, FUN = mean)
agg_rate$sd <- aggregate(rate ~ experiment + type + group.size, data = call_rate, FUN = sd)$rate
agg_rate$type <- factor(agg_rate$type, levels = c("solo", "indiv.group", "overall.group"))
# ggplot(agg_rate[agg_rate$group.size < 6, ], aes(x = type, y = rate)) +
# geom_point(color = viridis(10)[3], size = 2) +
# geom_errorbar(aes(ymin = rate - sd, ymax = rate + sd), color = viridis(10)[3], width=.1, lwd = 1.1) +
# geom_hline(yintercept = mean(call_rate$rate[call_rate$type == "solo"], na.rm = TRUE), lty = 2, alpha = 0.5) +
# labs(x = "Flight", y = "Call rate (calls / min)") +
# facet_grid(group.size ~ experiment) +
# theme(axis.text.x = element_text(angle = 45, hjust = 1))
# aggregate for plot
agg_rate_indiv <- aggregate(rate ~ experiment + type + group.size + indiv, data = call_rate_indiv, FUN = mean)
agg_rate_indiv$sd <- aggregate(rate ~ experiment + type + group.size + indiv, data = call_rate_indiv, FUN = sd)$rate
agg_rate_indiv <- agg_rate_indiv[agg_rate_indiv$indiv != "group", ]
mixed_ids <- unique(agg_rate_indiv$indiv[agg_rate_indiv$experiment == "mixed"])
mixed_l <- lapply(mixed_ids, function(x) {
X <- agg_rate_indiv[agg_rate_indiv$indiv == x, ]
X <- X[X$experiment == "mixed" | X$type == "solo", ]
if (sum(X$experiment == "mixed") > 1)
X <- X[X$group.size == max(X$group.size[X$experiment == "mixed"]) | X$type == "solo",]
if (nrow(X) == 1)
X <- X[vector(), ]
return(X)
})
mixed <- do.call(rbind, mixed_l)
mixed$experiment <- "mixed"
# regular
regular_ids <- unique(agg_rate_indiv$indiv[agg_rate_indiv$experiment == "regular"])
regular_l <- lapply(regular_ids, function(x) {
X <- agg_rate_indiv[agg_rate_indiv$indiv == x, ]
X <- X[X$experiment == "regular" | X$type == "solo", ]
if (sum(X$experiment == "regular") > 1)
X <- X[X$group.size == max(X$group.size[X$experiment == "regular"]) | X$type == "solo",]
if (nrow(X) == 1)
X <- X[vector(), ]
return(X)
})
regular <- do.call(rbind, regular_l)
regular$experiment <- "regular"
sub_agg <- rbind(mixed, regular)
sub_agg$experiment_f <- factor(ifelse(sub_agg$experiment == "regular", "Regular", "Artificial"), levels = c("Regular", "Artificial"))
sub_agg$type_f <- factor(ifelse(sub_agg$type == "solo", "Solo", "In group"), levels = c("Solo", "In group"))
# individual plot solo vs individual in group
ggplot(sub_agg, aes(x = type_f, y = rate, group = indiv, color = group.size)) +
geom_line() +
geom_point(size = 2) +
scale_color_viridis_c(alpha = 0.8) +
labs(x = "Flight", y = "Call rate (calls/s)", color = "Group\nsize") +
facet_grid( ~ experiment_f) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
call_rate_indiv <- call_rate_indiv[!is.na(call_rate_indiv$rate) & call_rate_indiv$rate > 0, ]
call_rate_indiv$experiment.type <- ifelse(call_rate_indiv$type == "solo", "solo", paste(call_rate_indiv$experiment, call_rate_indiv$type, sep = "-"))
call_rate_indiv$experiment.type <- gsub("mixed-indiv.group", "artificial.group", call_rate_indiv$experiment.type)
call_rate_indiv$experiment.type <- gsub("regular-indiv.group", "real.group", call_rate_indiv$experiment.type)
call_rate_indiv$experiment.type <- factor(call_rate_indiv$experiment.type, levels = c("solo", "real.group", "artificial.group"))
# mean centering group size
call_rate_indiv$group.size <- call_rate_indiv$group.size - mean(call_rate_indiv$group.size)
chains <- 3
iter <- 5000
model_formulas <- c("rate ~ experiment.type + (1 | indiv)", "rate ~ experiment.type + experiment.type:group.size + (1 | indiv)", "rate ~ 1 + (1 | indiv)", "rate ~ experiment.type + (experiment.type | indiv)", "rate ~ experiment.type + experiment.type:group.size + (experiment.type | indiv)", "rate ~ 1 + (experiment.type | indiv)")
brms_models <- lapply(model_formulas, function(x){
mod <- brm(
formula = x,
iter = iter,
thin = 1,
data = call_rate_indiv,
family = lognormal(),
silent = 2,
chains = chains,
cores = chains
)
mod <- add_criterion(mod, c("loo"))
return(mod)
})
names(brms_models) <- model_formulas
saveRDS(brms_models, "./data/processed/individual_call_rate_solo_vs_group_models.RDS")
brms_models <- readRDS("./data/processed/individual_call_rate_solo_vs_group_models.RDS")
cat(paste(length(brms_models),"models evaluated:\n"))
6 models evaluated:
for(i in names(brms_models))
cat(paste("- ", i, "\n"))
comp_mods <- loo_compare(brms_models[[1]], brms_models[[2]], brms_models[[3]], brms_models[[4]], brms_models[[5]], brms_models[[6]], model_names = names(brms_models))
df1 <- kbl(comp_mods, row.names = TRUE, escape = FALSE, format = "html", digits = 3)
cat("Compare models:")
Compare models:
kable_styling(df1, bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE, font_size = 12)
elpd_diff | se_diff | elpd_loo | se_elpd_loo | p_loo | se_p_loo | looic | se_looic | |
---|---|---|---|---|---|---|---|---|
rate ~ experiment.type + experiment.type:group.size + (experiment.type | indiv) | 0.000 | 0.000 | -939.554 | 27.123 | 157.050 | 13.005 | 1879.107 | 54.247 |
rate ~ experiment.type + (experiment.type | indiv) | -21.833 | 9.543 | -961.387 | 25.243 | 139.563 | 11.566 | 1922.774 | 50.485 |
rate ~ 1 + (experiment.type | indiv) | -33.184 | 10.526 | -972.738 | 25.187 | 154.243 | 12.086 | 1945.476 | 50.373 |
rate ~ experiment.type + experiment.type:group.size + (1 | indiv) | -52.018 | 14.293 | -991.572 | 23.324 | 54.766 | 5.006 | 1983.144 | 46.649 |
rate ~ experiment.type + (1 | indiv) | -53.907 | 13.988 | -993.461 | 22.753 | 51.529 | 4.153 | 1986.921 | 45.506 |
rate ~ 1 + (1 | indiv) | -128.646 | 18.295 | -1068.199 | 19.856 | 20.676 | 1.788 | 2136.399 | 39.712 |
cat("Best model:\n")
Best model:
cat(paste("- ", rownames(comp_mods)[1], "\n"))
# best model
if (!grepl("1 +", rownames(comp_mods)[1], fixed = TRUE))
summary_brm_model(brms_models[[rownames(comp_mods)[1]]], gsub.pattern = "experiment.type", gsub.replacement = "solo_vs_")
Estimate | Rhat | Bulk_ESS | iterations | chains | CI_low | CI_high | |
---|---|---|---|---|---|---|---|
solo_vs_real.group | -1.279 | 1 | 1096.061 | 5000 | 1 | -1.556 | -0.995 |
solo_vs_artificial.group | -1.613 | 1 | 1272.381 | 5000 | 1 | -2.049 | -1.193 |
solo_vs_solo:group.size | -0.021 | 1 | 2356.684 | 5000 | 1 | -0.105 | 0.066 |
solo_vs_real.group:group.size | -0.160 | 1.001 | 1149.756 | 5000 | 1 | -0.338 | 0.016 |
solo_vs_artificial.group:group.size | -0.539 | 1.001 | 1902.582 | 5000 | 1 | -0.774 | -0.308 |
call_rate_by_group <- readRDS("./data/processed/call_rate_by_group.RDS")
call_rate_by_group$experiment.type <- ifelse(call_rate_by_group$type == "solo", "solo", paste(call_rate_by_group$experiment, call_rate_by_group$type, sep = "-"))
call_rate_by_group$experiment.type <- gsub("mixed.group", "artificial.group", call_rate_by_group$experiment.type)
call_rate_by_group$experiment.type <- gsub("regular.group", "real.group", call_rate_by_group$experiment.type)
call_rate_by_group$experiment.type <- factor(call_rate_by_group$experiment.type, levels = c("solo", "real.group", "artificial.group"))
# aggregate for plot
agg_rate_group <- aggregate(rate ~ experiment + type + group.size + group, data = call_rate_by_group, FUN = mean)
agg_rate_group$sd <- aggregate(rate ~ experiment + type + group.size + group, data = call_rate_by_group, FUN = sd)$rate
agg_rate_group <- agg_rate_group[agg_rate_group$type != "indiv.group", ]
mixed_ids <- unique(agg_rate_group$group[agg_rate_group$experiment == "mixed"])
mixed_l <- lapply(mixed_ids, function(x) {
X <- agg_rate_group[agg_rate_group$group == x, ]
X <- X[X$experiment == "mixed" | X$type == "solo", ]
if (sum(X$experiment == "mixed") > 1)
X <- X[X$group.size == max(X$group.size[X$experiment == "mixed"]) | X$type == "solo",]
if (nrow(X) == 1)
X <- X[vector(), ]
return(X)
})
mixed <- do.call(rbind, mixed_l)
mixed$experiment <- "mixed"
# regular
regular_ids <- unique(agg_rate_group$group[agg_rate_group$experiment == "regular"])
regular_l <- lapply(regular_ids, function(x) {
X <- agg_rate_group[agg_rate_group$group == x, ]
X <- X[X$experiment == "regular" | X$type == "solo", ]
if (sum(X$experiment == "regular") > 1)
X <- X[X$group.size == max(X$group.size[X$experiment == "regular"]) | X$type == "solo",]
if (nrow(X) == 1)
X <- X[vector(), ]
return(X)
})
regular <- do.call(rbind, regular_l)
regular$experiment <- "regular"
sub_agg <- rbind(mixed, regular)
sub_agg$experiment_f <- factor(ifelse(sub_agg$experiment == "regular", "Regular", "Artificial"), levels = c("Regular", "Artificial"))
sub_agg$type_f <- factor(ifelse(sub_agg$type == "solo", "Solo", "Overall group"), levels = c("Solo", "Overall group"))
# individual plot solo vs individual in group
ggplot(sub_agg, aes(x = type_f, y = rate, group = group, color = group.size)) +
geom_line() +
geom_point(size = 2) +
scale_color_viridis_c(alpha = 0.8) +
labs(x = "Flight", y = "Call rate (calls/s)", color = "Group\nsize") +
facet_grid( ~ experiment_f) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# in the same scale than he one above
# ggplot(sub_agg, aes(x = type_f, y = rate, group = group, color = group.size)) +
# geom_line() +
# geom_point(size = 2) +
# scale_color_viridis_c(alpha = 0.8) +
# labs(x = "Flight", y = "Call rate (calls/s)", color = "Group\nsize") +
# facet_grid( ~ experiment_f) +
# theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
# ylim(c(0, 210))
call_rate_by_group <- readRDS("./data/processed/call_rate_by_group.RDS")
call_rate_by_group$experiment.type <- ifelse(call_rate_by_group$type == "solo", "solo", paste(call_rate_by_group$experiment, call_rate_by_group$type, sep = "-"))
call_rate_by_group$experiment.type <- gsub("mixed.group", "artificial.group", call_rate_by_group$experiment.type)
call_rate_by_group$experiment.type <- gsub("regular.group", "real.group", call_rate_by_group$experiment.type)
call_rate_by_group$experiment.type <- factor(call_rate_by_group$experiment.type, levels = c("solo", "real.group", "artificial.group"))
# mean centering group size
call_rate_by_group$group.size <- call_rate_by_group$group.size - mean(call_rate_by_group$group.size)
chains <- 3
iter <- 5000
model_formulas <- c("rate ~ experiment.type + (1 | group)", "rate ~ experiment.type + experiment.type:group.size + (1 | group)", "rate ~ 1 + (1 | group)")
brms_models <- lapply(model_formulas, function(x){
mod <- brm(
formula = x,
iter = iter,
thin = 1,
data = call_rate_by_group,
family = lognormal(),
silent = 2,
chains = chains,
cores = chains
)
mod <- add_criterion(mod, c("loo"))
return(mod)
})
names(brms_models) <- model_formulas
saveRDS(brms_models, "./data/processed/individual_vs_group_call_rate_models.RDS")
brms_models <- readRDS("./data/processed/individual_vs_group_call_rate_models.RDS")
cat(paste(length(brms_models), "models evaluated:\n"))
3 models evaluated:
for(i in names(brms_models))
cat(paste("- ", i, "\n"))
comp_mods <- loo_compare(brms_models[[1]], brms_models[[2]], brms_models[[3]], model_names = names(brms_models))
df1 <- kbl(comp_mods, row.names = TRUE, escape = FALSE, format = "html", digits = 3)
cat("Compare models:")
Compare models:
kable_styling(df1, bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE, font_size = 12)
elpd_diff | se_diff | elpd_loo | se_elpd_loo | p_loo | se_p_loo | looic | se_looic | |
---|---|---|---|---|---|---|---|---|
rate ~ 1 + (1 | group) | 0.000 | 0.000 | -1445.212 | 23.248 | 30.238 | 2.821 | 2890.425 | 46.496 |
rate ~ experiment.type + (1 | group) | -0.317 | 1.977 | -1445.529 | 23.032 | 32.180 | 2.963 | 2891.059 | 46.065 |
rate ~ experiment.type + experiment.type:group.size + (1 | group) | -3.861 | 2.526 | -1449.073 | 23.029 | 35.729 | 3.337 | 2898.147 | 46.058 |
# cat("Best model:\n")
# cat(paste("- ", rownames(comp_mods)[1], "\n"))
#
# # best model
# if (!grepl("1 +", rownames(comp_mods)[1], fixed = TRUE))
# summary_brm_model(brms_models[[rownames(comp_mods)[1]]], gsub.pattern = "experiment.type", gsub.replacement = "solo_vs_")
The best model did not outperforme the null (intercept only) model
seltab <- attributes(clls)$check.results
seltab$year.audio <- clls$year.audio
seltab <- seltab[!is.na(seltab$sound.files), ]
seltab$type <- sapply(seltab$year.audio, function(x) metadat$Experimento[metadat$year.audio == x][1])
seltab$group <- sapply(seltab$year.audio, function(x) metadat$Grupo[metadat$year.audio == x][1])
seltab <- seltab[!is.na(seltab$group), ]
seltab$type <-ifelse(grepl("solo", seltab$type), "solo", "group")
seltab$indiv <- sapply(1:nrow(seltab), function(x) if (seltab$type[x] == "solo") metadat$Individuo[metadat$year.audio == seltab$year.audio[x]] else "group")
seltab$start <- seltab$orig.start
seltab$end <- seltab$orig.end
# get group and solo call counts per individual, call rate in call/min
gaps_indiv_list <- pblapply(1:nrow(diagnostics), function(x){
# print(x)
# get indivs
sound.files <- acous_param_l[[diagnostics$group[x]]]$sound.files
Y <- seltab[seltab$sound.files %in% sound.files, ]
# add id for group flights
Y$pred.id <- NA
Y$pred.id[Y$type == "group"] <- sapply(Y$sound.files[Y$type == "group"], function(x) agg_pred$pred_indiv[agg_pred$sound.files == x])
Y <- Y[order(Y$orig.sound.files, Y$start), ]
gaps_indiv_list <- lapply(unique(Y$indiv), function(y){
# print(y)
if (y != "group"){
W <- Y[Y$indiv == y, ]
W$sound.files <- W$orig.sound.files
gaps.solo <- data.frame(group = diagnostics$group[x], indiv = y, experiment = diagnostics$experiment[x], type = "solo", gaps = if (nrow(W) > 0) na.omit(gaps(W, pb = FALSE)$gaps) else NA)
Z <- Y[Y$pred.id == y & !is.na(Y$pred.id), ]
Z$sound.files <- Z$orig.sound.files
group.gaps <- if (nrow(Z) > 0) na.omit(gaps(Z, pb = FALSE)$gaps) else NA
gaps.group <- data.frame(group = diagnostics$group[x], indiv = y, experiment = diagnostics$experiment[x], type = "indiv.group", gaps = if (length(group.gaps) > 0) group.gaps else NA)
gaps_data <- rbind(gaps.group, gaps.solo)
} else {
Q <- Y[Y$type == "group", ]
Q$sound.files <- Q$orig.sound.files
gaps_data <- data.frame(group = diagnostics$group[x], indiv = y, experiment = diagnostics$experiment[x], type = "overall.group", gaps = na.omit(gaps(Q, pb = FALSE)$gaps))
return(gaps_data)
}
})
gaps_indiv <- do.call(rbind, gaps_indiv_list)
gaps_indiv$group.size <- diagnostics$n_indiv[x]
return(gaps_indiv)
}
)
gaps_indiv <- do.call(rbind, gaps_indiv_list)
# add sex
gaps_indiv$sex <- sapply(gaps_indiv$indiv, function(x) if(x != "group") na.exclude(caps$Sexo[caps$Murci == x])[1] else NA, USE.NAMES = FALSE)
gaps_indiv$sex <- ifelse(gaps_indiv$sex == "m", "Male", "Female")
# add age
gaps_indiv$age <- sapply(gaps_indiv$indiv, function(x) if(x != "group") na.exclude(caps$Edad[caps$Murci == x])[1] else NA, USE.NAMES = FALSE)
gaps_indiv$age <- ifelse(gaps_indiv$age == "sa", "Sub-adult", "Adult")
# reproductive stage
gaps_indiv$reprod.stg <- sapply(gaps_indiv$indiv, function(x) if(x != "group") na.exclude(caps$`Estado reproductivo`[caps$Murci == x])[1] else NA, USE.NAMES = FALSE)
gaps_indiv$reprod.stg[gaps_indiv$reprod.stg == "p?"] <- "Pregnant"
gaps_indiv$reprod.stg[gaps_indiv$reprod.stg == "ne"] <- "Inactive"
gaps_indiv$group.size <-sapply(gaps_indiv$group, function(x) diagnostics$n_indiv[diagnostics$group == x])
# order levels
gaps_indiv$experiment <- factor(gaps_indiv$experiment, levels = c("Regular", "Artificial"))
gaps_indiv$experiment.type <- ifelse(gaps_indiv$type == "solo", "solo", paste(gaps_indiv$experiment, gaps_indiv$type, sep = "-"))
# #find individuals with repeated solo data
# indivs_2_sets <- sapply(unique(gaps_indiv$indiv), function(x) length(unique(gaps_indiv$group[gaps_indiv$indiv == x & gaps_indiv$type == "solo"])))
#
# # remove those
# indivs_2_sets <- names(indivs_2_sets)[indivs_2_sets > 1]
# indivs_2_sets <- indivs_2_sets[indivs_2_sets != "group"]
#
# gaps_indiv_filter_list <- lapply(unique(gaps_indiv$indiv), function(x){
#
# Y <- gaps_indiv[gaps_indiv$indiv == x, ]
#
# if(x %in% indivs_2_sets){
# Y <- Y[Y$group == unique(Y$group)[1] & Y$type == "solo" | Y$type == "indiv.group",]
#
# }
#
# return(Y)
#
# })
# gaps_indiv_filter <- do.call(rbind, gaps_indiv_filter_list)
#
# # all have 1
# all(sapply(unique(gaps_indiv_filter$indiv), function(x) length(unique(gaps_indiv_filter$group[gaps_indiv_filter$indiv == x & gaps_indiv_filter$type == "solo"]))) <= 1)
saveRDS(gaps_indiv, "./data/processed/gaps_by_individual.RDS")
gaps_indiv <- readRDS("./data/processed/gaps_by_individual.RDS")
# aggregate for plot
agg_gaps <- aggregate(gaps ~ experiment + type + group.size, data = gaps_indiv, FUN = mean)
agg_gaps$sd <- aggregate(gaps ~ experiment + type + group.size, data = gaps_indiv, FUN = sd)$gaps
# ggplot(agg_gaps[agg_gaps$group.size < 6, ], aes(x = type, y = gaps)) +
# geom_point(color = viridis(10)[3], size = 2) +
# geom_errorbar(aes(ymin = gaps - sd, ymax = gaps + sd), color = viridis(10)[3], width=.1, lwd = 1.1) +
# geom_hline(yintercept = mean(gaps_indiv$gaps[gaps_indiv$type == "solo"], na.rm = TRUE), lty = 2, alpha = 0.5) +
# labs(x = "Flight", y = "Gap duration (s)") +
# facet_grid(group.size ~ experiment) +
# theme(axis.text.x = element_text(angle = 45, hjust = 1))
# aggregate for plot
agg_gaps_indiv <- aggregate(gaps ~ experiment + type + group.size + indiv, data = gaps_indiv, FUN = mean)
agg_gaps_indiv$sd <- aggregate(gaps ~ experiment + type + group.size + indiv, data = gaps_indiv, FUN = sd)$gaps
agg_gaps_indiv <- agg_gaps_indiv[agg_gaps_indiv$indiv != "group", ]
mixed_ids <- unique(agg_gaps_indiv$indiv[agg_gaps_indiv$experiment == "mixed"])
mixed_l <- lapply(mixed_ids, function(x) {
X <- agg_gaps_indiv[agg_gaps_indiv$indiv == x, ]
X <- X[X$experiment == "mixed" | X$type == "solo", ]
if (sum(X$experiment == "mixed") > 1)
X <- X[X$group.size == max(X$group.size[X$experiment == "mixed"]) | X$type == "solo",]
if (nrow(X) == 1)
X <- X[vector(), ]
return(X)
})
mixed <- do.call(rbind, mixed_l)
mixed$experiment <- "mixed"
# regular
regular_ids <- unique(agg_gaps_indiv$indiv[agg_gaps_indiv$experiment == "regular"])
regular_l <- lapply(regular_ids, function(x) {
X <- agg_gaps_indiv[agg_gaps_indiv$indiv == x, ]
X <- X[X$experiment == "regular" | X$type == "solo", ]
if (sum(X$experiment == "regular") > 1)
X <- X[X$group.size == max(X$group.size[X$experiment == "regular"]) | X$type == "solo",]
if (nrow(X) == 1)
X <- X[vector(), ]
return(X)
})
regular <- do.call(rbind, regular_l)
regular$experiment <- "regular"
sub_agg <- rbind(mixed, regular)
sub_agg$experiment_f <- factor(ifelse(sub_agg$experiment == "regular", "Regular", "Artificial"), levels = c("Regular", "Artificial"))
sub_agg$type_f <- factor(ifelse(sub_agg$type == "solo", "Solo", "In group"), levels = c("Solo", "In group"))
# individual plot solo vs individual in group
ggplot(sub_agg, aes(x = type_f, y = gaps, group = indiv, color = group.size)) +
geom_line() +
geom_point(size = 2) +
scale_color_viridis_c(alpha = 0.8) +
labs(x = "Flight", y = "Gap duration (s)", color = "Group\nsize") +
facet_grid( ~ experiment_f) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
chains <- 3
iter <- 10000
sub_gaps_indiv <- gaps_indiv[gaps_indiv$type != "overall.group", ]
sub_gaps_indiv$experiment.type <- gsub("-indiv.group", "", sub_gaps_indiv$experiment.type)
sub_gaps_indiv$experiment.type <- gsub("mixed", "artificial.group", sub_gaps_indiv$experiment.type)
sub_gaps_indiv$experiment.type <- gsub("regular", "real.group", sub_gaps_indiv$experiment.type)
sub_gaps_indiv$experiment.type <- factor(sub_gaps_indiv$experiment.type, levels = c("solo", "real.group", "artificial.group"))
# mean centering group size
sub_gaps_indiv$group.size <- sub_gaps_indiv$group.size - mean(sub_gaps_indiv$group.size)
model_formulas <- c("gaps ~ experiment.type + (1 | indiv)", "gaps ~ experiment.type + experiment.type:group.size + (1 | indiv)", "gaps ~ 1 + (1 | indiv)", "gaps ~ experiment.type + (experiment.type | indiv)", "gaps ~ experiment.type + experiment.type:group.size + (experiment.type | indiv)", "gaps ~ 1 + (experiment.type | indiv)")
brms_models <- lapply(model_formulas, function(x){
mod <- brm(
formula = x,
iter = iter,
thin = 1,
data = sub_gaps_indiv,
family = lognormal(),
silent = 2,
chains = chains,
cores = chains
)
mod <- add_criterion(mod, c("loo"))
return(mod)
})
names(brms_models) <- model_formulas
saveRDS(brms_models, "./data/processed/individual_gaps_solo_vs_group_models.RDS")
brms_models <- readRDS("./data/processed/individual_gaps_solo_vs_group_models.RDS")
cat(paste(length(brms_models),"models evaluated:\n"))
6 models evaluated:
for(i in names(brms_models))
cat(paste("- ", i, "\n"))
comp_mods <- loo_compare(brms_models[[1]], brms_models[[2]], brms_models[[3]], brms_models[[4]], brms_models[[5]], brms_models[[6]], model_names = names(brms_models))
df1 <- kbl(comp_mods, row.names = TRUE, escape = FALSE, format = "html", digits = 3)
cat("Compare models:")
Compare models:
kable_styling(df1, bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE, font_size = 12)
elpd_diff | se_diff | elpd_loo | se_elpd_loo | p_loo | se_p_loo | looic | se_looic | |
---|---|---|---|---|---|---|---|---|
gaps ~ experiment.type + experiment.type:group.size + (experiment.type | indiv) | 0.000 | 0.000 | -23451.41 | 133.836 | 281.298 | 12.345 | 46902.82 | 267.672 |
gaps ~ experiment.type + (experiment.type | indiv) | -20.900 | 6.792 | -23472.31 | 134.025 | 278.011 | 12.027 | 46944.62 | 268.050 |
gaps ~ 1 + (experiment.type | indiv) | -27.682 | 7.675 | -23479.09 | 134.576 | 291.048 | 13.007 | 46958.19 | 269.153 |
gaps ~ experiment.type + experiment.type:group.size + (1 | indiv) | -510.299 | 39.178 | -23961.71 | 132.239 | 132.204 | 5.145 | 47923.42 | 264.478 |
gaps ~ experiment.type + (1 | indiv) | -532.517 | 39.773 | -23983.93 | 132.260 | 128.189 | 5.083 | 47967.86 | 264.520 |
gaps ~ 1 + (1 | indiv) | -711.788 | 47.312 | -24163.20 | 136.220 | 126.193 | 4.937 | 48326.40 | 272.440 |
cat("Best model:\n")
Best model:
cat(paste("- ", rownames(comp_mods)[1], "\n"))
# best model
if (!grepl("1 +", rownames(comp_mods)[1], fixed = TRUE))
summary_brm_model(brms_models[[rownames(comp_mods)[1]]], gsub.pattern = "experiment.type", gsub.replacement = "solo_vs_")
Estimate | Rhat | Bulk_ESS | iterations | chains | CI_low | CI_high | |
---|---|---|---|---|---|---|---|
solo_vs_real.group | 0.638 | 1 | 598.625 | 5000 | 1 | 0.428 | 0.860 |
solo_vs_artificial.group | 0.945 | 1.005 | 693.141 | 5000 | 1 | 0.726 | 1.182 |
solo_vs_solo:group.size | 0.027 | 1 | 817.091 | 5000 | 1 | -0.019 | 0.075 |
solo_vs_real.group:group.size | 0.186 | 1.001 | 777.128 | 5000 | 1 | 0.043 | 0.327 |
solo_vs_artificial.group:group.size | 0.251 | 1 | 3243.662 | 5000 | 1 | 0.178 | 0.323 |
# contrasts
cat("Compare artificial vs real group gaps:\n")
Compare artificial vs real group gaps:
contrasts <- c(one = "experiment.typeartificial.group - experiment.typereal.group = 0")
hypothesis(brms_models[[rownames(comp_mods)[1]]], contrasts)
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 one 0.31 0.15 0.01 0.61 NA NA *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
brms_models[[rownames(comp_mods)[1]]]
chains <- 3
iter <- 10000
sub_gaps_indiv <- gaps_indiv[gaps_indiv$type != "overall.group", ]
sub_gaps_indiv$experiment.type <- gsub("-indiv.group", "", sub_gaps_indiv$experiment.type)
sub_gaps_indiv$experiment.type <- gsub("mixed", "artificial.group", sub_gaps_indiv$experiment.type)
sub_gaps_indiv$experiment.type <- gsub("regular", "real.group", sub_gaps_indiv$experiment.type)
sub_gaps_indiv$experiment.type <- factor(sub_gaps_indiv$experiment.type, levels = c("solo", "real.group", "artificial.group"))
# mean centering group size
sub_gaps_indiv$group.size <- sub_gaps_indiv$group.size - mean(sub_gaps_indiv$group.size)
model_formulas <- c("gaps ~ experiment.type + (experiment.type | indiv)", "gaps ~ experiment.type + experiment.type:group.size + (experiment.type | indiv)", "gaps ~ experiment.type + group.size + (experiment.type | indiv)", "gaps ~ 1 + (experiment.type | indiv)")
brms_models <- lapply(model_formulas, function(x){
mod <- brm(
formula = x,
iter = iter,
thin = 1,
data = sub_gaps_indiv,
family = lognormal(),
silent = 2,
chains = chains,
cores = chains
)
mod <- add_criterion(mod, c("loo"))
return(mod)
})
names(brms_models) <- model_formulas
saveRDS(brms_models, "./data/processed/individual_gaps_solo_vs_group_random_slope_models.RDS")
brms_models <- readRDS("./data/processed/individual_gaps_solo_vs_group_random_slope_models.RDS")
cat(paste(length(brms_models),"models evaluated:\n"))
for(i in names(brms_models))
cat(paste("- ", i, "\n"))
comp_mods <- loo_compare(brms_models[[1]], brms_models[[2]], brms_models[[3]], brms_models[[4]], model_names = names(brms_models))
df1 <- kbl(comp_mods, row.names = TRUE, escape = FALSE, format = "html", digits = 3)
cat("Compare models:")
kable_styling(df1, bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE, font_size = 12)
cat("Best model:\n")
cat(paste("- ", rownames(comp_mods)[1], "\n"))
# best model
if (!grepl("1 +", rownames(comp_mods)[1], fixed = TRUE))
summary_brm_model(brms_models[[rownames(comp_mods)[1]]], gsub.pattern = "experiment.type", gsub.replacement = "solo_vs_")
#
# # contrasts
cat("Compare artificial vs real group gaps:\n")
contrasts <- c(one = "experiment.typeartificial.group - experiment.typereal.group = 0")
hypothesis(brms_models[[rownames(comp_mods)[1]]], contrasts)
# aggregate for plot
agg_gaps_group <- aggregate(gaps ~ experiment + type + group.size + group, data = gaps_indiv, FUN = mean)
agg_gaps_group$sd <- aggregate(gaps ~ experiment + type + group.size + group, data = gaps_indiv, FUN = sd)$gaps
agg_gaps_group <- agg_gaps_group[agg_gaps_group$type != "indiv.group", ]
mixed_ids <- unique(agg_gaps_group$group[agg_gaps_group$experiment == "mixed"])
mixed_l <- lapply(mixed_ids, function(x) {
X <- agg_gaps_group[agg_gaps_group$group == x, ]
X <- X[X$experiment == "mixed" | X$type == "solo", ]
if (sum(X$experiment == "mixed") > 1)
X <- X[X$group.size == max(X$group.size[X$experiment == "mixed"]) | X$type == "solo",]
if (nrow(X) == 1)
X <- X[vector(), ]
return(X)
})
mixed <- do.call(rbind, mixed_l)
mixed$experiment <- "mixed"
# regular
regular_ids <- unique(agg_gaps_group$group[agg_gaps_group$experiment == "regular"])
regular_l <- lapply(regular_ids, function(x) {
X <- agg_gaps_group[agg_gaps_group$group == x, ]
X <- X[X$experiment == "regular" | X$type == "solo", ]
if (sum(X$experiment == "regular") > 1)
X <- X[X$group.size == max(X$group.size[X$experiment == "regular"]) | X$type == "solo",]
if (nrow(X) == 1)
X <- X[vector(), ]
return(X)
})
regular <- do.call(rbind, regular_l)
regular$experiment <- "regular"
sub_agg <- rbind(mixed, regular)
sub_agg$experiment_f <- factor(ifelse(sub_agg$experiment == "regular", "Regular", "Artificial"), levels = c("Regular", "Artificial"))
sub_agg$type_f <- factor(ifelse(sub_agg$type == "solo", "Solo", "Overall group"), levels = c("Solo", "Overall group"))
# individual plot solo vs individual in group
ggplot(sub_agg, aes(x = type_f, y = gaps, group = group, color = group.size)) +
geom_line() +
geom_point(size = 2) +
scale_color_viridis_c(alpha = 0.8) +
labs(x = "Flight", y = "Gap duration (s)", color = "Group\nsize") +
facet_grid( ~ experiment_f) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# in the same scale than he one above
ggplot(sub_agg, aes(x = type_f, y = gaps, group = group, color = group.size)) +
geom_line() +
geom_point(size = 2) +
scale_color_viridis_c(alpha = 0.8) +
labs(x = "Flight", y = "Gap duration (s)", color = "Group\nsize") +
facet_grid( ~ experiment_f) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
ylim(c(0, 210))
chains <- 3
iter <- 10000
sub_gaps_indiv <- gaps_indiv[gaps_indiv$type != "indiv.group", ]
sub_gaps_indiv$experiment.type <- gsub("-overall.group", "", sub_gaps_indiv$experiment.type)
sub_gaps_indiv$experiment.type <- gsub("mixed", "artificial.group", sub_gaps_indiv$experiment.type)
sub_gaps_indiv$experiment.type <- gsub("regular", "real.group", sub_gaps_indiv$experiment.type)
sub_gaps_indiv$experiment.type <- factor(sub_gaps_indiv$experiment.type, levels = c("solo", "real.group", "artificial.group"))
model_formulas <- c("gaps ~ experiment.type + (1 | group)", "gaps ~ experiment.type + experiment.type:group.size + (1 | group)", "gaps ~ 1 + (1 | group)")
brms_models <- lapply(model_formulas, function(x){
mod <- brm(
formula = x,
iter = iter,
thin = 1,
data = sub_gaps_indiv,
family = lognormal(),
silent = 2,
chains = chains,
cores = chains
)
mod <- add_criterion(mod, c("loo"))
return(mod)
})
names(brms_models) <- model_formulas
saveRDS(brms_models, "./data/processed/group_gaps_solo_vs_group_models.RDS")
brms_models <- readRDS("./data/processed/group_gaps_solo_vs_group_models.RDS")
cat(paste(length(brms_models),"models evaluated:\n"))
3 models evaluated:
for(i in names(brms_models))
cat(paste("- ", i, "\n"))
comp_mods <- loo_compare(brms_models[[1]], brms_models[[2]], brms_models[[3]], model_names = names(brms_models))
df1 <- kbl(comp_mods, row.names = TRUE, escape = FALSE, format = "html", digits = 3)
cat("Compare models:")
Compare models:
kable_styling(df1, bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE, font_size = 12)
elpd_diff | se_diff | elpd_loo | se_elpd_loo | p_loo | se_p_loo | looic | se_looic | |
---|---|---|---|---|---|---|---|---|
gaps ~ experiment.type + experiment.type:group.size + (1 | group) | 0.00 | 0.000 | -25524.42 | 134.888 | 84.646 | 3.027 | 51048.84 | 269.775 |
gaps ~ experiment.type + (1 | group) | -38.19 | 10.464 | -25562.61 | 134.492 | 81.895 | 2.989 | 51125.22 | 268.985 |
gaps ~ 1 + (1 | group) | -316.28 | 28.939 | -25840.70 | 127.477 | 77.181 | 2.789 | 51681.40 | 254.954 |
cat("Best model:\n")
Best model:
cat(paste("- ", rownames(comp_mods)[1], "\n"))
# best model
if (!grepl("1 +", rownames(comp_mods)[1], fixed = TRUE))
summary_brm_model(brms_models[[rownames(comp_mods)[1]]], gsub.pattern = "experiment.type", gsub.replacement = "solo_vs_")
Estimate | Rhat | Bulk_ESS | iterations | chains | CI_low | CI_high | |
---|---|---|---|---|---|---|---|
solo_vs_real.group | 0.144 | 1 | 1774.521 | 5000 | 1 | -0.052 | 0.353 |
solo_vs_artificial.group | 0.340 | 1 | 1124.939 | 5000 | 1 | 0.084 | 0.587 |
solo_vs_solo:group.size | -0.017 | 1.014 | 160.991 | 5000 | 1 | -0.111 | 0.087 |
solo_vs_real.group:group.size | -0.191 | 1.009 | 177.928 | 5000 | 1 | -0.287 | -0.085 |
solo_vs_artificial.group:group.size | -0.171 | 1.005 | 193.597 | 5000 | 1 | -0.273 | -0.057 |
# contrasts
cat("Compare artificial vs real group gaps:\n")
Compare artificial vs real group gaps:
contrasts <- c(one = "experiment.typeartificial.group - experiment.typereal.group = 0")
hypothesis(brms_models[[rownames(comp_mods)[1]]], contrasts)
## Hypothesis Tests for class b:
## Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
## 1 one 0.2 0.16 -0.13 0.51 NA NA
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.
str(gaps_indiv)
indivs <- unique(gaps_indiv$indiv)
indivs <- indivs[indivs != "group"]
gaps_indiv_sex_l <- lapply(indivs, function(x){
X <- gaps_indiv[gaps_indiv$indiv == x, ]
X <- X[X$type == "indiv.group", ]
if(any(!is.na(X$sex)))
X$sex <- X$sex[!is.na(X$sex)][1]
if(any(!is.na(X$age)))
X$age <- X$age[!is.na(X$age)][1]
if(any(!is.na(X$reprod.stg)))
X$reprod.stg <- X$reprod.stg[!is.na(X$reprod.stg)][1]
X <- X[!is.na(X$sex), ]
return(X)
}
)
gaps_indiv_sex <- do.call(rbind, gaps_indiv_sex_l)
table(gaps_indiv_sex$sex, gaps_indiv_sex$experiment)
ggplot(gaps_indiv_sex, aes(x = sex, y = gaps)) +
geom_violin() +
facet_wrap(~ experiment) +
theme_classic()
# read as RDS
agg_pred <- readRDS("./data/processed/predicted_individual_in_group_flights_2020_&_2021.RDS")
agg_pred$orig.sound.files <- paste0(sapply(agg_pred$sound.files, function(x) strsplit(x, ".wav")[[1]][1]), ".wav")
agg_pred$start <- sapply(agg_pred$sound.files, function(x) seltab$orig.start[seltab$sound.files == x])
agg_pred$end <- sapply(agg_pred$sound.files, function(x) seltab$orig.end[seltab$sound.files == x])
agg_pred$middle.time <- (agg_pred$start + agg_pred$end) / 2
agg_pred$abbr_indiv <- as.numeric(as.factor(agg_pred$pred_indiv))
agg_pred$group2 <- make.names(agg_pred$group)
agg_pred <- agg_pred[!duplicated(agg_pred[, c("group", "abbr_indiv", "start", "end")]), ]
agg_pred$row <- 1:nrow(agg_pred)
# this are repeated
# agg_pred[c(200, 206, 156, 219, 30, 31, 9, 60, 160, 225, 486, 435, 438, 432), ]
agg_pred <- agg_pred[-c(200, 156, 30, 9, 160, 435, 432), ]
out <- lapply(unique(agg_pred$group2), function(x) {
print(x)
Y <- agg_pred[agg_pred$group2 == x, ]
# jpeg(filename = file.path("img/group_call_through_time", paste0(x, ".jpeg")), width = 480 * 3, height = 480 * 2, res = 200)
par(mar = c(1, 0, 0, 0), mfrow = c(4, 1))
Y$middle.time <- Y$middle.time - min(Y$middle.time)
num.ind <- as.numeric(as.factor(Y$abbr_indiv))
num.ind <- scale(num.ind, center = TRUE)
# sq <- seq(0, max(Y$middle.time), length.out = 5)
sq <- seq(0, 120, length.out = 5)
for (i in 1:(length(sq) - 1)) {
plot(0, 0, col = "white", xlim = c(sq[i] - 0.05, sq[i + 1] + 0.05), ylim = range(num.ind) + c(-2, 2), bty = "u")
points(x = Y$middle.time, y = num.ind, cex = 7, col = viridis(max(as.numeric(as.factor(Y$abbr_indiv))), alpha = 0.5)[as.numeric(as.factor(Y$abbr_indiv))], pch = 20)
# text(x = Y$middle.time, y = num.ind + rnorm(nrow(Y), sd = 0.8), Y$row, cex = 0.6, col = "black")
}
# dev.off()
})
lapply(unique(agg_pred$group), function(x) {
Y <- agg_pred[agg_pred$group == x, ]
mc <- markov.test(Y$abbr_indiv, type = "rank")
return(mc)
}
)
seq <- simulateMarkovChain(5000, matrix(0.25, 4, 4), states=c("a","c","g","t"))
markov.test(seq)
rnd.seq <- sample(seq)
markov.test(rnd.seq)
sequenceMatr <- createSequenceMatrix(sequence, sanitize = FALSE)
mcFitMLE <- markovchainFit(data = sequence)
rnd.seq <- sample(sequence)
rnd <- createSequenceMatrix(rnd.seq, sanitize = FALSE)
mcFitMLE2 <- markovchainFit(data = rnd.seq)
mcFitBSP <- markovchainFit(data = sequence, method = "bootstrap", nboot = 5, name = "Bootstrap Mc")
sim.sq <- rep(c("a", "b", "c", "d"), 100)
verifyMarkovProperty(sim.sq)
sim.sq2 <- sample(c("a", "b", "c", "d"), size = 100, replace = TRUE)
verifyMarkovProperty(sim.sq2)
weatherStates <- c("sunny", "cloudy", "rain")
byRow <- TRUE
weatherMatrix <- matrix(data = c(0.70, 0.2, 0.1,0.3, 0.4, 0.3, 0.2, 0.45, 0.35), byrow = byRow, nrow = 3,dimnames = list(weatherStates, weatherStates))
mcWeather <- new("markovchain", states = weatherStates, byrow = byRow, transitionMatrix = weatherMatrix, name = "Weather")
mcIgraph <- as(mcWeather, "igraph")
plot(mcIgraph)
Session information
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
##
## locale:
## [1] LC_CTYPE=pt_BR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=es_CR.UTF-8 LC_COLLATE=pt_BR.UTF-8
## [5] LC_MONETARY=es_CR.UTF-8 LC_MESSAGES=pt_BR.UTF-8
## [7] LC_PAPER=es_CR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] purrr_0.3.4 tidybayes_3.0.1 ggridges_0.5.3 posterior_1.1.0
## [5] ggrepel_0.9.1 brms_2.16.3 Rcpp_1.0.8.3 soundgen_2.1.0
## [9] shinyBS_0.61 Sim.DiffProc_4.8 rlang_1.0.1 kableExtra_1.3.1
## [13] DT_0.18 viridis_0.6.2 viridisLite_0.4.0 pbapply_1.5-0
## [17] e1071_1.7-7 caret_6.0-88 ggplot2_3.3.5 lattice_0.20-44
## [21] ranger_0.13.1 readxl_1.3.1 warbleR_1.1.27 NatureSounds_1.0.4
## [25] knitr_1.38 seewave_2.1.8 tuneR_1.3.3.1 devtools_2.4.2
## [29] usethis_2.0.1 lubridate_1.7.10 data.table_1.14.0
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.2 tidyselect_1.1.1 lme4_1.1-27.1
## [4] fftw_1.0-6.1 htmlwidgets_1.5.3 grid_4.1.0
## [7] pROC_1.17.0.1 munsell_0.5.0 codetools_0.2-18
## [10] miniUI_0.1.1.1 withr_2.4.3 Brobdingnag_1.2-6
## [13] colorspace_2.0-2 highr_0.9 rstudioapi_0.13
## [16] stats4_4.1.0 dtw_1.22-3 bayesplot_1.8.1
## [19] labeling_0.4.2 rstan_2.21.2 farver_2.1.0
## [22] bridgesampling_1.1-2 rprojroot_2.0.2 coda_0.19-4
## [25] vctrs_0.3.8 generics_0.1.0 ipred_0.9-11
## [28] xfun_0.30 R6_2.5.1 markdown_1.1
## [31] HDInterval_0.2.2 gamm4_0.2-6 projpred_2.0.2
## [34] bitops_1.0-7 cachem_1.0.5 assertthat_0.2.1
## [37] promises_1.2.0.1 scales_1.1.1 nnet_7.3-16
## [40] gtable_0.3.0 processx_3.5.2 timeDate_3043.102
## [43] splines_4.1.0 ModelMetrics_1.2.2.2 checkmate_2.0.0
## [46] inline_0.3.19 yaml_2.3.5 reshape2_1.4.4
## [49] abind_1.4-5 threejs_0.3.3 crosstalk_1.1.1
## [52] backports_1.3.0 httpuv_1.6.1 rsconnect_0.8.18
## [55] tensorA_0.36.2 tools_4.1.0 lava_1.6.9
## [58] ellipsis_0.3.2 jquerylib_0.1.4 proxy_0.4-26
## [61] sessioninfo_1.1.1 plyr_1.8.6 base64enc_0.1-3
## [64] RCurl_1.98-1.6 ps_1.6.0 prettyunits_1.1.1
## [67] rpart_4.1-15 zoo_1.8-9 fs_1.5.0
## [70] magrittr_2.0.3 ggdist_3.0.0 colourpicker_1.1.0
## [73] mvtnorm_1.1-2 matrixStats_0.61.0 pkgload_1.2.1
## [76] shinyjs_2.0.0 mime_0.11 evaluate_0.15
## [79] arrayhelpers_1.1-0 xtable_1.8-4 shinystan_2.5.0
## [82] gridExtra_2.3 rstantools_2.1.1 testthat_3.0.4
## [85] compiler_4.1.0 tibble_3.1.6 V8_3.4.2
## [88] crayon_1.5.0 minqa_1.2.4 StanHeaders_2.21.0-7
## [91] htmltools_0.5.2 mgcv_1.8-36 later_1.2.0
## [94] tidyr_1.1.3 RcppParallel_5.1.4 DBI_1.1.1
## [97] MASS_7.3-54 boot_1.3-28 Matrix_1.3-4
## [100] cli_3.1.0 parallel_4.1.0 gower_0.2.2
## [103] igraph_1.2.6 pkgconfig_2.0.3 signal_0.7-7
## [106] recipes_0.1.16 svUnit_1.0.6 xml2_1.3.2
## [109] foreach_1.5.1 dygraphs_1.1.1.6 bslib_0.2.5.1
## [112] webshot_0.5.2 prodlim_2019.11.13 rvest_1.0.0
## [115] stringr_1.4.0 distributional_0.2.2 callr_3.7.0
## [118] digest_0.6.29 rmarkdown_2.13 cellranger_1.1.0
## [121] Deriv_4.1.3 curl_4.3.2 shiny_1.6.0
## [124] gtools_3.9.2 rjson_0.2.21 nloptr_1.2.2.2
## [127] lifecycle_1.0.1 nlme_3.1-152 jsonlite_1.8.0
## [130] desc_1.3.0 fansi_1.0.0 pillar_1.6.4
## [133] loo_2.4.1.9000 fastmap_1.1.0 httr_1.4.2
## [136] pkgbuild_1.2.0 survival_3.2-11 glue_1.6.2
## [139] xts_0.12.1 remotes_2.4.0 diffobj_0.3.4
## [142] shinythemes_1.2.0 iterators_1.0.13 class_7.3-19
## [145] stringi_1.7.6 sass_0.4.0 memoise_2.0.0
## [148] dplyr_1.0.7