Regressions are run in Stan through the R package brms
All models are run for 10,000 iterations, with the first half used as warm-up
We use weakly informative priors to regularize estimation while remaining biologically agnostic
Effect sizes are summarized as posterior medians with 95% credible intervals defined as highest posterior density intervals
Predictors whose credible intervals do not include zero are interpreted as having meaningful effects on the response variables
When group size is included as a predictor, it is treated as an ordered factor and modeled as a monotonic effect
Predictive performance is evaluated using leave-one-out cross-validation (via the leave-one-out Information Criterion) by comparing each model against its corresponding null model (i.e., excluding the group type predictor) using differences in expected log predictive density (ELPD)
Only models that outperform their null model (difference greater than twice the ELPD standard error) are retained for inference
Posterior predictive checks are used to evaluate model adequacy (default = 30 draws)
Model averaging for simulated data
For simulated data, we fit multiple models to account for variability in the stochastic simulation process
Each model corresponds to a different simulated dataset generated with a unique random seed
Posterior distributions are combined across all fitted models using LOO-based posterior model averaging
The resulting averaged posterior distribution is used to summarize fixed-effect estimates and uncertainty
opts_chunk$set(fig.width =12, fig.height =8)theme_set(theme_classic(base_size =24))fill_color <- viridis::mako(10, alpha =0.5)[7]graph_cols <-mako(4, begin =0, end =0.8)graph_cols_transp <-mako(4, begin =0, end =0.8, alpha =0.5)names(graph_cols_transp) <-names(graph_cols) <-c("solo", "natural", "artificial", "simulated")## settings for bayesian modelschains <-4# iterationsiter <-10000# if package cmdstanr is installed use cmdstanr as back endif ("cmdstanr"%in%installed.packages()[, "Package"]){options(brms.backend ="cmdstanr") }
Custom functions
Code
# Coefficient of variationcv <-function(x)sd(x) /mean(x) *100# Function to measure the euclidean distance in 3D space between bats in a group flight trajectorypairwise_dist <-function(X) {# keep only 3D coordinates data xyz <- X[, grep("^x|^y|^z", names(X))]# remove empty columns xyz <- xyz[, sapply(xyz, function(x)sum(is.na(x))) <10]# set to long format lng_xyz <-data.frame(x =stack(xyz[, grep("^x", names(xyz))])[, 1],y =stack(xyz[, grep("^y", names(xyz))])[, 1],z =stack(xyz[, grep("^z", names(xyz))]))names(lng_xyz)[names(lng_xyz) =="z.values"] <-"z" lng_xyz$ind <-gsub("z_", "", lng_xyz$z.ind) lng_xyz$z.ind <-NULL# lng_xyz$frame <- X$frame lng_xyz$frame <-1:nrow(X)# mean distance among all bats obs_dists <-sapply(unique(lng_xyz$frame), function(x)mean(dist(lng_xyz[lng_xyz$frame == x, c("x", "y", "z")]))) out <-data.frame(group = X$group[1],group.size =length(unique(lng_xyz$ind)),distance = obs_dists,type = X$type[1],frame =1:nrow(X) )# add group size as factor out$group.size.factor <-factor(out$group.size, levels =1:10, ordered =TRUE)return(out)}# function to create simulated data getting one individual trajectory from diferent groups for a total of individual equals to group_sizes[x]sim_group_traj <-function(group_sizes, seed, trajectories =FALSE) { trajs <- indiv_traj_list# randomize so each iteration is a differentset.seed(seed) trajs <-sample(trajs) dists_list <-list() group_num <-1 used_trajs <-list()for (e inseq_along(group_sizes)) { i <- group_sizes[e] sample_traj_indx <-which(!duplicated(names(trajs)))[1:i]set.seed(group_num + seed) sample_trajs <- trajs[sample_traj_indx]# force all to have the same number of frames min_frames <-min(sapply(sample_trajs, nrow)) sample_trajs <-lapply(sample_trajs, function(x) x[1:min_frames, ]) sample_traj_df <-do_call(cbind, sample_trajs)names(sample_traj_df) <-paste(c("x", "y", "z"), rep(1:i, 3), sep ="_") sample_traj_df$group <-paste("sim", group_num, sep ="-") sample_traj_df$type <-"simulated" dist_df <-pairwise_dist(sample_traj_df) dist_df$type <-"simulated"# add to output list dists_list[[length(dists_list) +1]] <- dist_df# remove sampled trajectories from trajs based on indices so idnviduals are only used once trajs <- trajs[-sample_traj_indx] group_num <- group_num +1names(sample_trajs) <-paste("sim", e, 1:length(sample_trajs), sep ="-") used_trajs <-c(used_trajs, sample_trajs) } dists_df <-do.call(rbind, dists_list)if (!trajectories)return(dists_df)elsereturn(used_trajs)}# function to simulate group calling from solo calling vectorssim_group_call <-function(group_sizes, seed) { indv_calls <-split(time_calls_indiv, list(time_calls_indiv$year.audio))# randomize so each iteration is a differentset.seed(seed) indv_calls <-sample(indv_calls)# dists_list <- list()# group_num <- 1# used_calls <- list() sim_call_list <-lapply(seq_along(group_sizes), function(i) { e <- group_sizes[i]# set.seed(e + seed) sampled_calls <- indv_calls[sample(1:length(indv_calls), e)] clms <-c("start", "end", "indiv", "year.audio") sampled_calls <-lapply(sampled_calls, function(x) x[, clms]) sample_calls_df <-do_call(rbind, sampled_calls) sample_calls_df$sound.files <-paste(i, e, sep ="-") sample_calls_df$year.audio sample_calls_df$group.size <- ereturn(sample_calls_df) }) sim_call_df <-do.call(rbind, sim_call_list)return(sim_call_df)}# Function to calculate contrasts based on a brms draws object# Function to calculate contrasts based on posterior draws and plot the posterior distributionsdraws_contrasts <-function(draws, predictor_name, basal_level,fill_color ="blue",beta.prefix ="^b_") {# Extract the model coefficients coef_names <-colnames(draws) intercept_name <-grep("^b_Intercept$", coef_names, value =TRUE) group_names <-grep(paste0(beta.prefix, predictor_name), coef_names, value =TRUE)# Determine the levels of the categorical predictor levels <-c(basal_level, sub(paste0(beta.prefix, predictor_name), "", group_names))# Calculate EMMs for each level group_values <-list() group_values[[1]] <- draws[[intercept_name]]for (i inseq_along(group_names)) { group_values[[i +1]] <- draws[[intercept_name]] + draws[[group_names[i]]] }# Compute pairwise contrasts contrast_list <-list() contrast_names <-combn(levels, 2, function(x)paste(x[2], "-", x[1]))for (i in1:length(contrast_names)) { contrast_list[[i]] <- group_values[[which(levels ==strsplit(contrast_names[i], " - ")[[1]][2])]] - group_values[[which(levels ==strsplit(contrast_names[i], " - ")[[1]][1])]] }names(contrast_list) <- contrast_names# Function to summarize contrasts summarize_contrast <-function(contrast) { mean_contrast <-mean(contrast) lower_contrast <-quantile(contrast, 0.025) upper_contrast <-quantile(contrast, 0.975)return(c(mean = mean_contrast,lower = lower_contrast,upper = upper_contrast )) }# Summarize each contrast summary_contrasts <-sapply(contrast_list, summarize_contrast, simplify ="array")# Convert summary to a data frame for plotting# contrast_summary_df <- data.frame(# Contrast = rep(names(contrast_list), each = 3),# Statistic = rep(c("Mean", "Lower", "Upper"), times = length(contrast_list)),# Value = as.vector(summary_contrasts)# )# flip summary summary_contrasts <-t(summary_contrasts)colnames(summary_contrasts) <-c("est", "l-95% CI", "u-95% CI") contrast_summary_df <- summary_contrasts summary_contrasts <- brmsish:::html_format_coef_table(as.data.frame(summary_contrasts),fill = fill_color,highlight =TRUE)# Plot the posterior distributions of contrasts contrast_df <-do.call(rbind, lapply(names(contrast_list), function(name) {data.frame(Contrast = name, Value = contrast_list[[name]]) })) p <-ggplot(contrast_df, aes(x = Value)) +geom_density(alpha =0.6,fill = fill_color,color = fill_color) +geom_vline(xintercept =0, linetype ="dotted") +facet_wrap( ~ Contrast, scales ="free_y", ncol =1) +theme_minimal() +theme(axis.title.y =element_blank(),axis.text.y =element_blank(),axis.ticks.y =element_blank(),plot.title =element_blank() ) results <-list(plot = p, contrasts_df = contrast_summary_df, contrasts_html = summary_contrasts)return(results) }# to create several posterior predictive check plots out of a brms fitcustom_ppc <-function(fit, group =NULL, ndraws =30) { by_group <-if (!is.null(group)) {TRUE } elseFALSEif (by_group) by_group <-if (any(names(fit$data) == group)) {TRUE } elseFALSEif (by_group) by_group <-if (is.character(fit$data[, group]) |is.factor(fit$data[, group])) {TRUE } elseFALSEif (by_group) { ppc_dens <-pp_check(fit,ndraws = ndraws,type ='dens_overlay_grouped',group = group) pp_mean <-pp_check( fit,type ="stat_grouped",stat ="mean",group = group,ndraws = ndraws ) +theme_classic() pp_scat <-pp_check(fit, type ="scatter_avg", # group = group,ndraws = ndraws) } else { ppc_dens <-pp_check(fit, ndraws = ndraws, type ='dens_overlay') pp_mean <-pp_check(fit,type ="stat",stat ="mean",ndraws = ndraws) +theme_classic() pp_scat <-pp_check(fit, type ="scatter_avg", ndraws = ndraws) } pp_stat2 <-pp_check(fit, type ="stat_2d", ndraws = ndraws) pp_plot_list <-list(ppc_dens, pp_mean, pp_scat, pp_stat2) pp_plot_list[c(1, 3:4)] <-lapply(pp_plot_list[c(1, 3:4)], function(x) x +scale_color_viridis_d(begin =0.3,end =0.8,alpha =0.5,option ="mako", ) +scale_fill_viridis_d(begin =0.3,end =0.8,alpha =0.5,option ="mako" ) +theme_classic()) ppc_plot <-plot_grid(plotlist = pp_plot_list, ncol =2)print(ppc_plot)}
Compute pairwise distances for natural and artificial groups:
Code
pairwise_dists_list <-sapply(split(group_traj, group_traj$group), pairwise_dist, simplify =FALSE)pairwise_dists <-do.call(rbind, pairwise_dists_list)# remove those with unlikely distancepairwise_dists <- pairwise_dists[pairwise_dists$distance >0.1& pairwise_dists$distance <7, ]
Generate simulated group flight trajectories:
Code
## split into single individual trajectories# split in a listsplit_group_traj <-split(group_traj, group_traj$group)# get single individual trajectoresindiv_traj_list <-lapply(split_group_traj, function(x) { x <- x[, sapply(x, function(x)sum(is.na(x))) <10]# get number of individualssuppressWarnings(indv_num <-unique(na.omit(as.numeric(gsub(".*?([0-9]+).*", "\\1", names(x)) )))) indiv_trajs <-lapply(indv_num, function(y) x[, grep(y, names(x))])return(indiv_trajs)})indiv_traj_list <-unlist(indiv_traj_list, recursive =FALSE, use.names =TRUE)# remove last character of the list names so all have the original group namenames(indiv_traj_list) <-gsub(".$", "", names(indiv_traj_list))# get number of individuals per groupn_per_group <-sapply(unique(group_traj$group), USE.NAMES =FALSE, function(x) {# get those for that group X <- group_traj[group_traj$group == x, ]# count number of individuals (No NAs) n_indiv <- (sum(!sapply(X, anyNA)) -3) /3return(n_indiv) })# mean number of individuals per group size in original datamean_n_indiv <-floor(mean(table(n_per_group)))group_sizes <-as.numeric(names(table(n_per_group)))group_sizes <-rep(group_sizes, each = mean_n_indiv)sim_dists <-lapply(1:30, function(x)sim_group_traj(group_sizes, seed = x))sim_trajs_list <-lapply(1:30, function(x)sim_group_traj(group_sizes, seed = x, trajectories =TRUE))# remove those with unlikely distancesim_dists <-lapply(sim_dists, function(x) { x <- x[x$distance >0.1& x$distance <7.81, ]return(x)})
1.2 Descriptive stats
Number of trails: 34
Number of groups per group size:
Code
table(n_per_group)
n_per_group
2 3 4 5 6
7 8 7 8 4
Mean distance between individuals in a group flight:
Code
sim_dists_df <-do.call(rbind, sim_dists)# mean distance by group typeagg <-aggregate(distance ~ type, sim_dists_df, mean)agg$sd <-aggregate(distance ~ type, sim_dists_df, sd)$distance# mean distance by group typeagg2 <-aggregate(distance ~ type, pairwise_dists, mean)agg2$sd <-aggregate(distance ~ type, pairwise_dists, sd)$distancerbind(agg, agg2)
type
distance
sd
simulated
2.050082
0.5696759
artificial
2.005765
0.6803455
natural
1.670052
0.7256127
Closest distance between individuals in a group flight:
Code
sim_dists_df <-do.call(rbind, sim_dists)# mean distance by group typeagg_min <-aggregate(distance ~ type + group, sim_dists_df, min)agg <-aggregate(distance ~ type, agg_min, mean)agg$sd <-aggregate(distance ~ type, agg_min, sd)$distance# mean distance by group typeagg_min2 <-aggregate(distance ~ type + group, pairwise_dists, min)agg2 <-aggregate(distance ~ type, agg_min2, mean)agg2$sd <-aggregate(distance ~ type, agg_min2, sd)$distancerbind(agg, agg2)
Data: combined natural, artificial and simulated group flight distance data
30 data sets were created with varying simulated group flight trajectories to acount for stochasticity in the simulation process
Add the group roost entry data to each simulated set and run a bayesian mixed effects model coordination on roost entry for each data set
The mean pairwise spatial distance between individuals at each frame was used as the response variable (modeled with a gaussian distribution), while group type (natural, artificial or simulated) was used as predictor
Group ID and group size were included as varying effects
The posterior distributions from the 30 models were combined into a single model fit for statistical inference
For each data set a model without the group type predictor was also fitted as a null model to further assess model fit
Weakly informative priors were used for all model parameters
Time frames were modeled using an AR(2) autocorrelation structure to account for temporal autocorrelation in the data
Code
# weakly informative priorspriors_flight_coord <-c(# Interceptprior(normal(0, 2), class ="Intercept"),# Fixed effects (including monotonic effects)prior(normal(0, 1), class ="b"),# Residual standard deviationprior(student_t(3, 0, 2.5), class ="sigma"),# AR(2) coefficientsprior(normal(0, 0.5), class ="ar"))mods_flight_coord <-brm_multiple(formula = distance ~ type +mo(group.size.factor) +ar(p =2, time = frame, gr = group),iter = iter,thin =1,data = nat_sim_dist_list,family =gaussian(),silent =2,prior = priors_flight_coord,chains = chains,cores = chains,combine =FALSE,control =list(adapt_delta =0.99, max_treedepth =15))custom_ppc(fit = mods_flight_coord[[1]], group ="type")mods_flight_coord <-pblapply(mods_flight_coord, function(x)add_criterion(x, criterion =c("loo")))saveRDS( mods_flight_coord,"./data/processed/model_list_flight_coordination_monotonic_weak_priors.RDS")# weakly informative priorspriors_null_flight_coord <-c(# Interceptprior(normal(0, 2), class ="Intercept"),# Residual standard deviationprior(student_t(3, 0, 2.5), class ="sigma"),# AR(2) coefficientsprior(normal(0, 0.5), class ="ar"))null_mods_flight_coord <-brm_multiple(formula = distance ~1+ar(p =2, time = frame, gr = group),iter = iter,thin =1,data = nat_sim_dist_list,family =gaussian(),silent =2,chains = chains,prior = priors_null_flight_coord,cores = chains,combine =FALSE,control =list(adapt_delta =0.99, max_treedepth =15))null_mods_flight_coord <-pblapply(null_mods_flight_coord, function(x)add_criterion(x, criterion =c("loo")))saveRDS( null_mods_flight_coord,"./data/processed/nullmodel_list_flight_coordination_monotonic_weak_priors.RDS")
1.3.1 Results
1.3.1.1 Fitted model vs null model
Mean ELPD (expected log predictive density) and associated standard deviation between each model and its correspondent null model:
Calculate time lapse between successive individuals when entering the roost for each group:
Code
group_dat <- dat[!is.infinite(dat$entry.time) &!is.na(dat$entry.time) & dat$type !="Individual", ]# get time difference between entriesgroup_dat_l <-lapply(unique(group_dat$group), function(x) {# print(x) X <- group_dat[group_dat$group == x, ] X <- X[!is.na(X$entry.time), ] X <- X[order(X$entry.time), ]# X$entry.time.diff <- X$entry.time - min(X$entry.time) X$entry.time.diff <-c(NA, X$entry.time[-1] - X$entry.time[-nrow(X)]) X <- X[!is.na(X$entry.time.diff), ]# add 1 milisecond to 0 X$entry.time.diff[X$entry.time.diff ==0] <-NA X$group.size <-if (nrow(X) >0)nrow(X) -1elsevector()return(X)})group_dat <-do.call(rbind, group_dat_l)
2.2 Simulating entry time difference
Simulated 30 data sets, each one with a number of simulated groups for each group size (2 to 5 individuals) similar to those in the natural group data. This approach allowed us to propagate uncertainty from random group simulations into our statistical inference.
Code
group_dat <- dat[!is.infinite(dat$entry.time) &!is.na(dat$entry.time) & dat$type !="Individual", ]# get difference to first entrygroup_dat_l <-lapply(unique(group_dat$group), function(x) {# print(x) X <- group_dat[group_dat$group == x, ] X <- X[!is.na(X$entry.time), ] X <- X[order(X$entry.time), ] X$entry.time.diff <-c(NA, X$entry.time[-1] - X$entry.time[-nrow(X)]) X <- X[!is.na(X$entry.time.diff), ] X$group.size <-if (nrow(X) >0)nrow(X) +1elsevector()return(X)})group_dat <-do.call(rbind, group_dat_l)group_dat$type <-factor(group_dat$type, levels =c("Artificial", "Natural", "Simulated"))# make random groups from individual flightsindiv_dat <- dat[!is.infinite(dat$entry.time) &!is.na(dat$entry.time) & dat$type =="Individual", ]indivs <-unique(indiv_dat$Individuo)group_sizes <- group_dat$group.size[!duplicated(group_dat$Video)]# use only group sizes 2:5group_sizes <- group_sizes[group_sizes <=5]table(group_sizes)
Data: combined natural, artificial and simulated group flight distance data
30 data sets were created with varying simulated group flight trajectories to account for stochasticity in the simulation process
The posterior distributions from the 30 models were combined into a single model fit for statistical inference
The difference in time between when entering the roost between consecutive individuals was used as the response variable (after log transformation, modeled with a normal distribution), while group type (natural, artificial or simulated) was used as predictor
Group ID was included as varying effect
For each data set a model without the group type predictor was also fitted as a null model to further assess model fit
# set effect contrasts to annotate them on the graphannots <-as.data.frame((contrasts$contrasts_df))# convert values into percentage fasterannots$est <-exp(annots$est)annots$`u-95% CI`<-exp(annots$`u-95% CI`)annots$`l-95% CI`<-exp(annots$`l-95% CI`)annots
coefs <- brmsish:::html_format_coef_table(contrasts, highlight =TRUE, fill = fill_color)coefs
Contrasts
Estimate
Est.Error
l-95% CI
u-95% CI
1
solo VS natural.group
-0.230
0.132
0.033
-0.487
2
solo VS artificial.group
0.384
0.164
0.707
0.063
3
natural.group VS artificial.group
0.614
0.208
0.210
1.025
Converted into times of fewer calls / min :
Code
# set effect contrasts to annotate them on the graphannots <-as.data.frame((contrasts))# convert values into percentage of more calls annots$Estimate <-exp(annots$Estimate)annots$`u-95% CI`<-exp(annots$`u-95% CI`)annots$`l-95% CI`<-exp(annots$`l-95% CI`)rownames(annots) <- annots$Contrastsannots$Contrasts <-NULLannots
Data: combined natural, artificial and simulated group flight distance data
30 data sets were created with varying simulated group flight trajectories to account for stochasticity in the simulation process
The posterior distributions from the 30 models were combined into a single model fit for statistical inference
The difference in time between when entering the roost between consecutive individuals was used as the response variable (after log transformation, modeled with a normal distribution), while group type (natural, artificial or simulated) was used as predictor
Group ID was included as varying effect
For each data set a model without the group type predictor was also fitted as a null model to further assess model fit
Code
# make group size a factorcall_rate_indiv$group.size.f <-factor(call_rate_indiv$group.size, ordered =TRUE)# weakly informative priorspriors_calls_indiv <-c(# Intercept: baseline log call rateprior(normal(0, 1.5), class ="Intercept"),# Fixed effects (type + monotonic group size)prior(normal(0, 1), class ="b"),# Individual-level random intercept SDprior(student_t(3, 0, 1), class ="sd"),# Negative binomial overdispersion (shape)prior(exponential(1), class ="shape"))mod <-brm(formula = calls |resp_rate(flight.time) ~ type +mo(group.size.f) + (1| indiv),iter = iter,thin =1,data = call_rate_indiv,family =negbinomial(),silent =2,chains = chains,cores = chains, prior = priors_calls_indiv,control =list(adapt_delta =0.99, max_treedepth =15),file ="./data/processed/individual_call_rate_solo_vs_group_group_size_monot_negbinomial_weak_priors",file_refit ="always" )mod <-add_criterion(mod, criterion ="loo") # weakly informative priorspriors_calls_indiv_null <-c(# Intercept: baseline log call rateprior(normal(0, 1.5), class ="Intercept"),# Individual-level random intercept SDprior(student_t(3, 0, 1), class ="sd"),# Negative binomial overdispersion (shape)prior(exponential(1), class ="shape")) null_mod <-brm(formula = calls |resp_rate(flight.time) ~1+ (1| indiv),iter = iter,thin =1,data = call_rate_indiv,family =negbinomial(),silent =2,chains = chains,cores = chains, prior = priors_calls_indiv_null,control =list(adapt_delta =0.99, max_treedepth =15),file ="./data/processed/null_model_individual_call_rate_solo_vs_group_group_size_monot_negbinomial_weak_priors",file_refit ="always" )null_mod <-add_criterion(null_mod, criterion ="loo")
coefs <- brmsish:::html_format_coef_table(contrasts, highlight =TRUE, fill = fill_color)coefs
Contrasts
Estimate
Est.Error
l-95% CI
u-95% CI
1
natural.group VS solo
-1.269
0.112
-1.487
-1.050
2
natural.group VS artificial.group
0.175
0.147
-0.112
0.464
3
solo VS artificial.group
1.444
0.123
1.684
1.201
Converted into mutliplicative differences:
Code
contrasts[3, 2:5] <- contrasts[3, 2:5] *-1contrasts[3, 1] <-"artificial.group VS solo"# set effect contrasts to annotate them on the graphannots <- annots2 <-as.data.frame((contrasts))# convert values into percentage of more callsannots$Estimate <-1/ (exp(annots$Estimate)) annots$l <- annots$`l-95% CI`annots$`l-95% CI`<-1/ (exp(annots$`u-95% CI`))annots$`u-95% CI`<-1/ (exp(annots$l))annots$l <-NULLrownames(annots) <- annots$Contrastsannots$Contrasts <-NULLannots
Individuals in natural and artificial groups produce lower call rates than in solo flights
No difference in call rate between natural and artificial groups
No effect of group size
3.2 Call overlap
3.2.1 Prepare data
Code
total_count_df <-readRDS("./data/processed/total_ovlp_count.RDS")# convert to overlaps per minutetotal_count_df$ovlp.rate <- total_count_df$ovlp.rate *60
# set effect contrasts to annotate them on the graphannots <-as.data.frame((contrasts$contrasts_df))# convert values into percentage of more callsannots$est <-round(exp(annots$est), 2)annots$l <- annots$`l-95% CI`annots$`l-95% CI`<-round(exp(annots$`u-95% CI`), 2)annots$`u-95% CI`<-round(exp(annots$l), 2)annots$l <-NULLannots
est
l-95% CI
u-95% CI
Natural - Simulated
10.38
15.25
7.26
Artificial - Simulated
61.42
153.92
28.30
Artificial - Natural
5.92
15.45
2.54
Simulated produce 10.38 times more overlaps than natural groups
Simulated produce 61.42 times more overlaps than artificial groups
Artificial produce 5.92 times more overlaps than natural groups
Plot raw data:
Code
agg_cnt <-aggregate(ovlp.rate ~ group.size + type, data = total_count_df, FUN = mean)agg_cnt$sd <-aggregate(ovlp.rate ~ group.size + type, data = total_count_df, FUN = sd)$ovlp.ratecols <- graph_cols[2:4]names(cols) <-paste(c("Natural", "Artificial", "Simulated"), "group")agg_cnt$type <-paste(agg_cnt$type, "group")agg_cnt$type <-factor(agg_cnt$type, levels =c("Natural group", "Artificial group", "Simulated group"))# convert previous graph in a point sd graph with number of overlaps in y and group size as factor in xgg_overlap <-ggplot(agg_cnt, aes(x =factor(group.size), y = ovlp.rate, color = type)) +geom_point(position =position_dodge(width =0.5), alpha =0.5, size =3) +geom_errorbar(aes(ymin = ovlp.rate - sd, ymax = ovlp.rate + sd), size =1, width =0, position =position_dodge(width =0.5), show.legend =FALSE, alpha =0.5) +scale_color_manual(values = cols) +labs(x ="Group size", y ="Overlaps / min", color ="") +theme(legend.position ="inside", legend.position.inside =c(0.2, 0.7)) gg_overlap
3.2.2.4 Posterior predictive checks
Code
custom_ppc(fit = call_overlap_mod[[1]], group ="type")
Takeaways
Simulated groups produce more overlapping calls than both artificial and natural groups
Artificial groups produce more overlapping calls than natural groups
The proportion of individuals that called in a group was used as the response variable (modeled with a zero-inflated beta distribution and a probit function), while group type (natural, artificial or simulated) was used as predictor
Group ID was included as varying effect
A model without the group type predictor was also fitted as a null model to further assess model fit
Code
# weakly informative priorspriors_prop <-c(# Mean (mu) modelprior(normal(0, 1.5), class ="Intercept"),prior(normal(0, 1), class ="b"),# Zero–one inflation (logit scale)prior(normal(0, 1.5), class ="zoi"),prior(normal(0, 1.5), class ="coi"),# Precision (phi > 0)prior(exponential(1), class ="phi"))mod <-brm(formula = prop.callers ~ type +mo(group.size.f),iter = iter,thin =1,data = prop_callers_data,family =zero_one_inflated_beta(link ="probit"),silent =2,chains = chains,cores = chains, prior = priors_prop,control =list(adapt_delta =0.99,max_treedepth =15),file_refit ="always",file ="./data/processed/unique.callers_by_experiment_type_group_size_monot_weak_priors" )custom_ppc(mod, group ="type")mod <-add_criterion(mod, criterion ="loo") # weakly informative priorspriors_prop_null <-c(# Mean (mu) modelprior(normal(0, 1.5), class ="Intercept"),# Zero–one inflation (logit scale)prior(normal(0, 1.5), class ="zoi"),prior(normal(0, 1.5), class ="coi"),# Precision (phi > 0)prior(exponential(1), class ="phi"))null_mod <-brm(formula = prop.callers ~1+mo(group.size.f),iter = iter,thin =1,data = prop_callers_data,family =zero_one_inflated_beta(link ="probit"),silent =2,chains = chains,cores = chains, prior = priors_prop_null,control =list(adapt_delta =0.99,max_treedepth =15),file_refit ="always",file ="./data/processed/null_model_unique.callers_by_experiment_type_group_size_monot_weak_priors" )null_mod <-add_criterion(null_mod, criterion ="loo")