Statistical analysis

Familiarity enhances efficiency in complex collective tasks

Author

Marcelo Araya-Salas, Damien R. Farine, Holger Goerlitz, Nazareth Rojas-Rojas, Silvia Chaves-Ramírez, Mariela Sánchez-Chavarría & Gloriana Chaverri

Data analysis for the paper

Marcelo Araya-Salas, Damien R. Farine, Holger Goerlitz, Nazareth Rojas-Rojas, Silvia Chaves-Ramírez, Mariela Sánchez-Chavarría, Gloriana Chaverri. Familiarity enhances efficiency in complex collective tasks

Set root directory

Users can clone the github repository and set the root directory to the cloned repository to run the code and reproduce the analyses. Replace “..” with the path to the cloned repository on your computer. For example, if you cloned the repository in a folder called “familiarity-collective-tasks” on your desktop, you would set the root directory to “~/Desktop/familiarity-collective-tasks/”…

Code

# set working directory 
knitr::opts_knit$set(root.dir =  "..")

Model fitting and evaluation

Regressions are run in Stan through the R package brms
All models are run for 10,000 iterations, with the first half used as warm-up
We use weakly informative priors to regularize estimation while remaining biologically agnostic
Effect sizes are summarized as posterior medians with 95% credible intervals defined as highest posterior density intervals
Predictors whose credible intervals do not include zero are interpreted as having meaningful effects on the response variables
When group size is included as a predictor, it is treated as an ordered factor and modeled as a monotonic effect
Predictive performance is evaluated using leave-one-out cross-validation (via the leave-one-out Information Criterion) by comparing each model against its corresponding null model (i.e., excluding the group type predictor) using differences in expected log predictive density (ELPD)
Only models that outperform their null model (difference greater than twice the ELPD standard error) are retained for inference
Posterior predictive checks are used to evaluate model adequacy (default = 30 draws)

Model averaging for simulated data

For simulated data, we fit multiple models to account for variability in the stochastic simulation process
Each model corresponds to a different simulated dataset generated with a unique random seed
Posterior distributions are combined across all fitted models using LOO-based posterior model averaging
The resulting averaged posterior distribution is used to summarize fixed-effect estimates and uncertainty

Load packages

Code

# install/ load packages
sketchy::load_packages(
  packages = c(
    "viridis",
    "ggplot2",
    "knitr",
    "readxl",
    "pbapply",
    "brms",
    "kableExtra",
    "ggridges",
    github = "maRce10/brmsish",
    "cowplot",
    "warbleR",
    "ggsignif",
    "ggdist",
    github = "erocoar/gghalves",
    "loo",
    "extrafont"
  )
)

Configure global settings

Code

opts_chunk$set(fig.width = 12, fig.height = 8)

theme_set(theme_classic(base_size = 24))

fill_color <- viridis::mako(10, alpha = 0.5)[7]

graph_cols <- mako(4, begin = 0, end = 0.8)

graph_cols_transp <- mako(4, begin = 0, end = 0.8, alpha = 0.5)

names(graph_cols_transp)  <- names(graph_cols) <- c("solo", "natural", "artificial", "simulated")

## settings for bayesian models
chains <- 4

# iterations
iter <- 10000

# if package cmdstanr is installed use cmdstanr as back end
 if ("cmdstanr" %in% installed.packages()[, "Package"]){
   options(brms.backend = "cmdstanr")
 }

Custom functions

Code

# Coefficient of variation
cv <- function(x)
  sd(x) / mean(x) * 100

# Function to measure the euclidean distance in 3D space between bats in a group flight trajectory
pairwise_dist <- function(X) {
  # keep only 3D coordinates data
  xyz <- X[, grep("^x|^y|^z", names(X))]
  
  # remove empty columns
  xyz <- xyz[, sapply(xyz, function(x)
    sum(is.na(x))) < 10]
  
  # set to long format
  lng_xyz  <-
    data.frame(x = stack(xyz[, grep("^x", names(xyz))])[, 1],
               y = stack(xyz[, grep("^y", names(xyz))])[, 1],
               z = stack(xyz[, grep("^z", names(xyz))]))
  
  names(lng_xyz)[names(lng_xyz) ==  "z.values"] <- "z"
  
  lng_xyz$ind <- gsub("z_", "", lng_xyz$z.ind)
  lng_xyz$z.ind <- NULL
  # lng_xyz$frame <- X$frame
  lng_xyz$frame <- 1:nrow(X)
  
  # mean distance among all bats
  obs_dists <-
    sapply(unique(lng_xyz$frame), function(x)
      mean(dist(lng_xyz[lng_xyz$frame == x, c("x", "y", "z")])))
  
  out <-
    data.frame(
      group = X$group[1],
      group.size = length(unique(lng_xyz$ind)),
      distance = obs_dists,
      type = X$type[1],
      frame = 1:nrow(X)
    )
  
  # add group size as factor
  out$group.size.factor <- factor(out$group.size, levels = 1:10, ordered = TRUE)
  return(out)
}

# function to create simulated data getting one individual trajectory from diferent groups for a total of individual equals to group_sizes[x]
sim_group_traj <- function(group_sizes, seed, trajectories = FALSE) {
  trajs <- indiv_traj_list
  
  # randomize so each iteration is a different
  set.seed(seed)
  trajs <- sample(trajs)
  
  dists_list <- list()
  
  group_num <- 1
  
  used_trajs <- list()
  
  
  for (e in seq_along(group_sizes)) {
    i <- group_sizes[e]
    sample_traj_indx <- which(!duplicated(names(trajs)))[1:i]
    
    set.seed(group_num + seed)
    sample_trajs <- trajs[sample_traj_indx]
    
    # force all to have the same number of frames
    min_frames <- min(sapply(sample_trajs, nrow))
    
    sample_trajs <-
      lapply(sample_trajs, function(x)
        x[1:min_frames, ])
    
    sample_traj_df <- do_call(cbind, sample_trajs)
    
    names(sample_traj_df) <-
      paste(c("x", "y", "z"), rep(1:i, 3), sep = "_")
    
    sample_traj_df$group <- paste("sim", group_num, sep = "-")
    sample_traj_df$type <- "simulated"
    
    dist_df <- pairwise_dist(sample_traj_df)
    dist_df$type <- "simulated"
    
    # add to output list
    dists_list[[length(dists_list) + 1]] <- dist_df
    
    # remove sampled trajectories from trajs based on indices so idnviduals are only used once
    trajs <- trajs[-sample_traj_indx]
    group_num <- group_num + 1
    names(sample_trajs) <- paste("sim", e, 1:length(sample_trajs), sep = "-")
    used_trajs <- c(used_trajs, sample_trajs)
  }
  
  dists_df <- do.call(rbind, dists_list)
  
  if (!trajectories)
    return(dists_df)
  else
    return(used_trajs)
}

# function to simulate group calling from solo calling vectors
sim_group_call <- function(group_sizes, seed) {
  indv_calls <- split(time_calls_indiv, list(time_calls_indiv$year.audio))
  
  # randomize so each iteration is a different
  set.seed(seed)
  indv_calls <- sample(indv_calls)
  
  # dists_list <- list()
  
  # group_num <- 1
  
  # used_calls <- list()
  
  
  sim_call_list <- lapply(seq_along(group_sizes), function(i) {
    e <- group_sizes[i]
    # set.seed(e + seed)
    sampled_calls <- indv_calls[sample(1:length(indv_calls), e)]
    
    clms <- c("start", "end", "indiv", "year.audio")
    
    sampled_calls <- lapply(sampled_calls, function(x)
      x[, clms])
    
    sample_calls_df <- do_call(rbind, sampled_calls)
    
    sample_calls_df$sound.files <- paste(i, e, sep = "-")
    sample_calls_df$year.audio
    
    sample_calls_df$group.size <- e
    
    
    return(sample_calls_df)
  })
  
  sim_call_df <- do.call(rbind, sim_call_list)
  
  return(sim_call_df)
}

# Function to calculate contrasts based on a brms draws object
# Function to calculate contrasts based on posterior draws and plot the posterior distributions
draws_contrasts <-
  function(draws,
           predictor_name,
           basal_level,
           fill_color = "blue",
           beta.prefix = "^b_") {
    # Extract the model coefficients
    coef_names <- colnames(draws)
    intercept_name <-
      grep("^b_Intercept$", coef_names, value = TRUE)
    group_names <-
      grep(paste0(beta.prefix, predictor_name), coef_names, value = TRUE)
    
    # Determine the levels of the categorical predictor
    levels <-
      c(basal_level, sub(paste0(beta.prefix, predictor_name), "", group_names))
    
    # Calculate EMMs for each level
    group_values <- list()
    group_values[[1]] <- draws[[intercept_name]]
    for (i in seq_along(group_names)) {
      group_values[[i + 1]] <-
        draws[[intercept_name]] + draws[[group_names[i]]]
    }
    
    # Compute pairwise contrasts
    contrast_list <- list()
    contrast_names <-
      combn(levels, 2, function(x)
        paste(x[2], "-", x[1]))
    for (i in 1:length(contrast_names)) {
      contrast_list[[i]] <-
        group_values[[which(levels == strsplit(contrast_names[i], " - ")[[1]][2])]] -
        group_values[[which(levels == strsplit(contrast_names[i], " - ")[[1]][1])]]
    }
    names(contrast_list) <- contrast_names
    
    # Function to summarize contrasts
    summarize_contrast <- function(contrast) {
      mean_contrast <- mean(contrast)
      lower_contrast <- quantile(contrast, 0.025)
      upper_contrast <- quantile(contrast, 0.975)
      return(c(
        mean = mean_contrast,
        lower = lower_contrast,
        upper = upper_contrast
      ))
    }
    
    # Summarize each contrast
    summary_contrasts <-
      sapply(contrast_list, summarize_contrast, simplify = "array")
    
    # Convert summary to a data frame for plotting
    # contrast_summary_df <- data.frame(
    #   Contrast = rep(names(contrast_list), each = 3),
    #   Statistic = rep(c("Mean", "Lower", "Upper"), times = length(contrast_list)),
    #   Value = as.vector(summary_contrasts)
    # )
    
    # flip summary
    summary_contrasts <- t(summary_contrasts)
    colnames(summary_contrasts) <- c("est", "l-95% CI", "u-95% CI")
    contrast_summary_df <- summary_contrasts
    summary_contrasts <- brmsish:::html_format_coef_table(as.data.frame(summary_contrasts),
                                                          fill = fill_color,
                                                          highlight = TRUE)
    
    
    # Plot the posterior distributions of contrasts
    contrast_df <-
      do.call(rbind, lapply(names(contrast_list), function(name) {
        data.frame(Contrast = name, Value = contrast_list[[name]])
      }))
    
    p <- ggplot(contrast_df, aes(x = Value)) +
      geom_density(alpha = 0.6,
                   fill = fill_color,
                   color = fill_color) +
      geom_vline(xintercept = 0, linetype = "dotted") +
      facet_wrap( ~ Contrast, scales = "free_y", ncol = 1) +
      theme_minimal() +
      theme(
        axis.title.y = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        plot.title = element_blank()
      )
    
    results <- list(plot = p, contrasts_df = contrast_summary_df, contrasts_html = summary_contrasts)
    
    return(results)
  }

# to create several posterior predictive check plots out of a brms fit
custom_ppc <- function(fit, group = NULL, ndraws = 30) {
  by_group  <- if (!is.null(group)) {
    TRUE
  } else
    FALSE
  
  if (by_group)
    by_group  <-  if (any(names(fit$data) == group)) {
      TRUE
    } else
      FALSE
  
  if (by_group)
    by_group <-
      if (is.character(fit$data[, group]) |
          is.factor(fit$data[, group])) {
        TRUE
      } else
        FALSE
  
  
  if (by_group) {
    ppc_dens <- pp_check(fit,
                         ndraws = ndraws,
                         type = 'dens_overlay_grouped',
                         group = group)
    
    pp_mean <- pp_check(
      fit,
      type = "stat_grouped",
      stat = "mean",
      group = group,
      ndraws = ndraws
    )  + theme_classic()
    
    pp_scat <- pp_check(fit, type = "scatter_avg", # group = group,
                        ndraws = ndraws)
  } else {
    ppc_dens <- pp_check(fit, ndraws = ndraws, type = 'dens_overlay')
    
    pp_mean <- pp_check(fit,
                        type = "stat",
                        stat = "mean",
                        ndraws = ndraws) + theme_classic()
    
    pp_scat <-  pp_check(fit, type = "scatter_avg", ndraws = ndraws)
  }
  
  pp_stat2 <- pp_check(fit, type = "stat_2d", ndraws = ndraws)
  
  pp_plot_list <-
    list(ppc_dens, pp_mean, pp_scat, pp_stat2)
  
  pp_plot_list[c(1, 3:4)] <-
    lapply(pp_plot_list[c(1, 3:4)], function(x)
      x  + scale_color_viridis_d(
        begin = 0.3,
        end = 0.8,
        alpha = 0.5,
        option = "mako",
      ) + scale_fill_viridis_d(
        begin = 0.3,
        end = 0.8,
        alpha = 0.5,
        option = "mako"
      ) + theme_classic())
  
  
  ppc_plot <- plot_grid(plotlist = pp_plot_list, ncol = 2)
  
  print(ppc_plot)
}


# custom function based on gghalves::geom_half_point
geom_half_point <- function(
  mapping = NULL,
  data = NULL,
  side = "l",
  range_scale = 0.4,
  alpha = 1,
  pch = 16,
  size = 1.5,
  transformation = NULL,   # <- absorb but ignore
  ...
) {

  ggplot2::geom_point(
    mapping = mapping,
    data = data,
    alpha = alpha,
    shape = pch,
    size = size,
    position = position_half_jitter(
      width = 0.1,
      height = 0,
      side = side,
      range_scale = range_scale
    ),
    ...
  )
}

position_half_jitter <- function(width = 0.1,
                                 height = 0,
                                 side = "l",
                                 range_scale = 0.4) {

  ggplot2::ggproto(
    NULL, ggplot2::Position,

    compute_panel = function(self, data, params, scales) {

      # apply regular jitter first
      data$x <- data$x + stats::runif(
        nrow(data),
        -width / 2,
        width / 2
      )

      # shift to one side
      shift <- if (side %in% c("l", "left")) {
        -range_scale / 2
      } else {
        range_scale / 2
      }

      data$x <- data$x + shift

      data
    }
  )
}

Stats

1 Group flight coordination

1.1 Prepare data

Read data:

Code

group_traj <- read.csv("./data/raw/flight_trajectories_by_group.csv")

Compute pairwise distances for natural and artificial groups:

Code

pairwise_dists_list <-
  sapply(split(group_traj, group_traj$group), pairwise_dist, simplify = FALSE)

pairwise_dists <- do.call(rbind, pairwise_dists_list)

# remove those with unlikely distance
pairwise_dists <- pairwise_dists[pairwise_dists$distance > 0.1 &
                                   pairwise_dists$distance < 7, ]

Generate simulated group flight trajectories:

Code

## split into single individual trajectories
# split in a list
split_group_traj <- split(group_traj, group_traj$group)

# get single individual trajectores
indiv_traj_list <- lapply(split_group_traj, function(x) {
  x <- x[, sapply(x, function(x)
    sum(is.na(x))) < 10]
  
  # get number of individuals
  suppressWarnings(indv_num <-
                     unique(na.omit(as.numeric(
                       gsub(".*?([0-9]+).*", "\\1", names(x))
                     ))))
  
  indiv_trajs <- lapply(indv_num, function(y)
    x[, grep(y, names(x))])
  
  return(indiv_trajs)
})

indiv_traj_list <-
  unlist(indiv_traj_list, recursive = FALSE, use.names = TRUE)

# remove last character of the list names so all have the original group name
names(indiv_traj_list) <- gsub(".$", "", names(indiv_traj_list))

# get number of individuals per group
n_per_group <-
  sapply(unique(group_traj$group), USE.NAMES = FALSE, function(x) {
    # get those for that group
    X <- group_traj[group_traj$group == x, ]
    
    # count number of individuals (No NAs)
    n_indiv <- (sum(!sapply(X, anyNA)) - 3) / 3
    
    return(n_indiv)
  })

# mean number of individuals per group size in original data
mean_n_indiv <- floor(mean(table(n_per_group)))

group_sizes <- as.numeric(names(table(n_per_group)))

group_sizes <- rep(group_sizes, each = mean_n_indiv)

sim_dists <-
  lapply(1:30, function(x)
    sim_group_traj(group_sizes, seed = x))

sim_trajs_list <-
  lapply(1:30, function(x)
    sim_group_traj(group_sizes, seed = x, trajectories = TRUE))

# remove those with unlikely distance
sim_dists <- lapply(sim_dists, function(x) {
  x <- x[x$distance > 0.1 & x$distance < 7.81, ]
  return(x)
})

1.2 Descriptive stats

Number of trails: 34

Number of groups per group size:

Code

table(n_per_group)

n_per_group
2 3 4 5 6 
7 8 7 8 4

Mean distance between individuals in a group flight:

Code

sim_dists_df <- do.call(rbind, sim_dists)
# mean distance by group type
agg <- aggregate(distance ~ type, sim_dists_df, mean)
agg$sd <- aggregate(distance ~ type, sim_dists_df, sd)$distance

# mean distance by group type
agg2 <- aggregate(distance ~ type, pairwise_dists, mean)
agg2$sd <- aggregate(distance ~ type, pairwise_dists, sd)$distance

rbind(agg, agg2)

type	distance	sd
simulated	2.050082	0.5696759
artificial	2.005765	0.6803455
natural	1.670052	0.7256127

Closest distance between individuals in a group flight:

Code

sim_dists_df <- do.call(rbind, sim_dists)

# mean distance by group type
agg_min <- aggregate(distance ~ type + group, sim_dists_df, min)

agg <- aggregate(distance ~ type, agg_min, mean)
agg$sd <- aggregate(distance ~ type, agg_min, sd)$distance

# mean distance by group type
agg_min2 <- aggregate(distance ~ type + group, pairwise_dists, min)
agg2 <- aggregate(distance ~ type, agg_min2, mean)
agg2$sd <- aggregate(distance ~ type, agg_min2, sd)$distance

rbind(agg, agg2)

type	distance	sd
simulated	0.6592624	0.3312211
artificial	0.9213170	0.5873041
natural	0.7190676	0.3595199

1.3 Fit regression model

Model: \[ \begin{split} \text{mean pairwise distance} \sim \text{type} + \text{monotonic(group size)} + \\ (1 \mid \text{group}) + \text{arma(frame)} \end{split} \]

Specifications:

Data: combined natural, artificial and simulated group flight distance data
30 data sets were created with varying simulated group flight trajectories to acount for stochasticity in the simulation process
Add the group roost entry data to each simulated set and run a bayesian mixed effects model coordination on roost entry for each data set
The mean pairwise spatial distance between individuals at each frame was used as the response variable (modeled with a gaussian distribution), while group type (natural, artificial or simulated) was used as predictor
Group ID and group size were included as varying effects
The posterior distributions from the 30 models were combined into a single model fit for statistical inference
For each data set a model without the group type predictor was also fitted as a null model to further assess model fit
Weakly informative priors were used for all model parameters
Time frames were modeled using an AR(2) autocorrelation structure to account for temporal autocorrelation in the data

Code

# weakly informative priors
priors_flight_coord <- c(
  # Intercept
  prior(normal(0, 2), class = "Intercept"),
  
  # Fixed effects (including monotonic effects)
  prior(normal(0, 1), class = "b"),
  
  # Residual standard deviation
  prior(student_t(3, 0, 2.5), class = "sigma"),
  
  # AR(2) coefficients
  prior(normal(0, 0.5), class = "ar")
)


mods_flight_coord <- brm_multiple(
  formula = distance ~ type + mo(group.size.factor) + ar(p = 2, time = frame, gr = group),
  iter = iter,
  thin = 1,
  data = nat_sim_dist_list,
  family = gaussian(),
  silent = 2,
  prior = priors_flight_coord,
  chains = chains,
  cores = chains,
  combine = FALSE,
  control = list(adapt_delta = 0.99, max_treedepth = 15)
)

custom_ppc(fit = mods_flight_coord[[1]], group = "type")

mods_flight_coord <- pblapply(mods_flight_coord, function(x)
  add_criterion(x, criterion = c("loo")))

saveRDS(
  mods_flight_coord,
  "./data/processed/model_list_flight_coordination_monotonic_weak_priors.RDS"
)

# weakly informative priors
priors_null_flight_coord <- c(
  # Intercept
  prior(normal(0, 2), class = "Intercept"),
  
  # Residual standard deviation
  prior(student_t(3, 0, 2.5), class = "sigma"),
  
  # AR(2) coefficients
  prior(normal(0, 0.5), class = "ar")
)

null_mods_flight_coord <- brm_multiple(
  formula = distance ~ 1 + ar(p = 2, time = frame, gr = group),
  iter = iter,
  thin = 1,
  data = nat_sim_dist_list,
  family = gaussian(),
  silent = 2,
  chains = chains,
  prior = priors_null_flight_coord,
  cores = chains,
  combine = FALSE,
  control = list(adapt_delta = 0.99, max_treedepth = 15)
)

null_mods_flight_coord <- pblapply(null_mods_flight_coord, function(x)
  add_criterion(x, criterion = c("loo")))

saveRDS(
  null_mods_flight_coord,
  "./data/processed/nullmodel_list_flight_coordination_monotonic_weak_priors.RDS"
)

1.3.1 Results

1.3.1.1 Fitted model vs null model

Mean ELPD (expected log predictive density) and associated standard deviation between each model and its correspondent null model:

Code

mods_flight_coord <- readRDS("./data/processed/model_list_flight_coordination_monotonic_weak_priors.RDS")
null_mods_flight_coord <- readRDS(
  "./data/processed/nullmodel_list_flight_coordination_monotonic_weak_priors.RDS"
)

loo_diffs <-
  lapply(seq_along(mods_flight_coord), function(x)
    loo::loo_compare(mods_flight_coord[[x]], null_mods_flight_coord[[x]]))

loo_diff <- do.call(rbind, loo_diffs)

rows <- rownames(loo_diff)
loo_diff <- as.data.frame(loo_diff)

loo_diff$model <- rows

agg_loo <- aggregate(cbind(elpd_diff, se_diff) ~ model, loo_diff, mean)

agg_loo$model <- ifelse(grepl("null", agg_loo$model), "Null model", "Full model")

agg_loo

model	elpd_diff	se_diff
Full model	0.00000	0.000000
Null model	-22.11837	7.192911

1.3.1.2 Model fit

Code

# average model
avrg_call <-
  paste0("posterior_average(",
         paste0(paste0(
           "mods_flight_coord[[", 1:length(mods_flight_coord), "]]"
         ), collapse = ", "),
         ", weights = 'loo')")

average_model_draws <- eval(parse(text = avrg_call))

Loading required namespace: rstan

Code

brmsish:::draw_extended_summary(
  average_model_draws,
  highlight = TRUE,
  fill = fill_color,
  beta.prefix = c("^b_", "^bsp_mo")
)

	Estimate	l-95% CI	u-95% CI	Rhat	Bulk_ESS	Tail_ESS
b_Intercept	1.494	1.369	1.613	1	11265.36	13640.30
b_typeartificial	0.323	0.188	0.457	1	17220.92	14945.61
b_typesimulated	0.338	0.227	0.446	1	17826.58	14888.07
bsp_mogroup.size.factor	0.084	0.050	0.119	1	11719.31	14298.03

1.3.1.3 Contrasts

Code

contrasts <-
  draws_contrasts(
    draws = average_model_draws,
    predictor_name = "type",
    basal_level = "natural",
    fill_color = fill_color
  )

contrasts$contrasts_html

	est	l-95% CI	u-95% CI
artificial - natural	-0.323	-0.457	-0.188
simulated - natural	-0.338	-0.446	-0.227
simulated - artificial	-0.015	-0.148	0.119

Code

contrasts$plot + theme_classic()

Plot raw data:

Code

# add original data to simulated data and calculate proximity
nat_sim_dist_list <- lapply(sim_dists, function(x) {
  return(rbind(x, pairwise_dists))
})

# remove distances above 10 m
nat_sim_dist_list <- lapply(nat_sim_dist_list, function(x) {
  x <- x[x$distance < 10, ]
  x$type <-
    factor(x$type, levels = c("natural", "artificial", "simulated"))
  return(x)
})

dat_all <- dat <- nat_sim_dist_list[[1]]

dat_all$group.size <- "All sizes"

dat <- rbind(dat, dat_all)

dat$group.size.factor <-
  factor(dat$group.size, levels = c(1:10, "All sizes"))

n_grps <-
  aggregate(group ~ group.size.factor + type, dat, function(x)
    length(unique(x)))

n_grps$frames <-
  aggregate(group ~ group.size.factor + type, dat, function(x)
    length(x))$group


n_grps$indivs <-
  n_grps$group * as.numeric(as.character(n_grps$group.size.factor))


n_grps$group.size.factor <- factor(n_grps$group.size.factor)
n_grps$distance <-
  ifelse(n_grps$group.size.factor %in% 2:3, -1.2, -0.9)
n_grps$distance[1] <- -0.8

n_grps$indivs[n_grps$group.size.factor == "All sizes"] <-
  aggregate(indivs ~ type, n_grps, sum)$indivs

n_grps$n.labels <-
  paste0(n_grps$group, " / ", n_grps$indivs, " / ", n_grps$frames)
n_grps$n.labels[1] <-
  " n = 3 groups /\n 6 individuals /\n 149 frames"
n_grps$n.labels[6] <-
  " n = 18 groups /\n 74 individuals /\n 888 frames"
n_grps$distance[6] <- -0.3
n_grps$distance[c(11, 17)] <- -0.4

custom_labels <- unique(nat_sim_dist_list[[1]]$group.size)
custom_labels <-
  paste(c("Group size = ", rep("", 5)), custom_labels)
names(custom_labels) <- unique(nat_sim_dist_list[[1]]$group.size)



custom_labels <- unique(dat$group.size)
custom_labels <-
  paste(c("Group size = ", rep("", 5)), custom_labels)
names(custom_labels) <- unique(dat$group.size)

# mean distance by group type
agg_gp <-
  aggregate(distance ~ type + group + group.size.factor, dat, mean)

agg_gp$group.size.factor <- factor(agg_gp$group.size.factor)

# set effect contrasts to annotate them on the graph
annots <- as.data.frame((contrasts$contrasts_df))
annots$annotation <- paste0(
  "\u03B2: ",
  round(annots$est, 2),
  "m ~ (",
  round(ifelse(annots$`l-95% CI` < annots$`u-95% CI`, annots$`l-95% CI`, annots$`u-95% CI`), 2),
  " – ",
  round(ifelse(annots$`u-95% CI` > annots$`l-95% CI`, annots$`u-95% CI`, annots$`l-95% CI`), 2),
  "m)"
)


# raincloud plot:
gg_all <- ggplot(dat[dat$group.size == "All sizes", ], aes(
  y = distance,
  x = type,
  color = type,
  fill = type
)) +
  geom_signif(
    y_position = c(4.35, 4.75),
    xmin = c(1, 1),
    xmax = c(2, 3),
    annotation = annots[c("artificial - natural", "simulated - natural"), "annotation"],
    parse = FALSE,
    tip_length = 0.02,
    textsize = 3,
    size = 0.5,
    color = "gray10",
    vjust = 0.2
  ) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    alpha = 0.5,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
  geom_boxplot(
    width = .15, # remove outliers
    outlier.shape = NA) +
  geom_boxplot(
    fill = adjustcolor("orange", alpha.f = 0.1),
    data = agg_gp[!agg_gp$group.size %in% 2:5, ],
    aes(y = distance, x = type),
    color = adjustcolor("orange", alpha.f = 0.6),
    width = .1,
    # remove outliers
    outlier.shape = NA
  ) +
  # add justified jitter from the {gghalves} package
  geom_half_point(
    # color = fill_color,
    # draw jitter on the left
    side = "l",
    # control range of jitter
    range_scale = .4,
    # add some transparency
    alpha = .5,
    transformation = ggplot2::position_jitter(height = 0)
  ) +
  scale_color_manual(values = graph_cols[2:4]) +
  scale_fill_manual(values = graph_cols_transp[2:4]) +
  geom_half_point(
    data = agg_gp[agg_gp$group.size != "All sizes", ],
    aes(y = distance, x = type),
    color = "orange",
    # draw jitter on the left
    side = "l",
    pch = 20,
    size = 4,
    # control range of jitter
    range_scale = .4,
    # add some transparency
    alpha = .5,
    transformation = ggplot2::position_jitter(height = 0)
  ) +
  ylim(c(-0.4, 4.8)) +
  scale_x_discrete(labels = c(
    "natural" = "Natural",
    "artificial" = "Artificial",
    "simulated" = "Simulated"
  )) +
  theme(legend.position = "none") +
  labs(x = "Flight type", y = "Distance (m)") +
  facet_wrap( ~ group.size,
              ncol = 1,
              labeller = labeller(group.size = c("All sizes" = "All group sizes"))) +
  geom_text(
    lineheight = 0.8,
    data = n_grps[!n_grps$group.size.factor %in% 2:6, ],
    aes(y = distance, x = type, label = n.labels),
    color = "gray10",
    nudge_x = 0,
    size = 3
  )


# raincloud plot:
gg_gs <- ggplot(dat[dat$group.size %in% 2:5, ], aes(
  y = distance,
  x = type,
  color = type,
  fill = type
)) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    # fill = fill_color,
    alpha = 0.6,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
  # add justified jitter from the {gghalves} package
  geom_half_point(
    # color = fill_color,
    # draw jitter on the left
    side = "l",
    # control range of jitter
    range_scale = .4,
    # add some transparency
    alpha = .3,
    transformation = ggplot2::position_jitter(height = 0)
  ) +
  scale_color_manual(values = graph_cols[2:4]) +
  scale_fill_manual(values = graph_cols_transp[2:4]) +
  geom_half_point(
    data = agg_gp[agg_gp$group.size %in% 2:5, ],
    aes(y = distance, x = type),
    color = "orange",
    # draw jitter on the left
    side = "l",
    pch = 20,
    size = 4,
    # control range of jitter
    range_scale = .4,
    # add some transparency
    alpha = .5,
    transformation = ggplot2::position_jitter(height = 0)
  ) +
  geom_boxplot(# fill = fill_color,
    width = .15, # remove outliers
    outlier.shape = NA) +
  geom_boxplot(
    fill = adjustcolor("orange", alpha.f = 0.1),
    data = agg_gp[agg_gp$group.size %in% 2:5, ],
    aes(y = distance, x = type),
    color = adjustcolor("orange", alpha.f = 0.6),
    width = .1,
    # remove outliers
    outlier.shape = NA
  ) +
  ylim(c(-1.2, 4.8)) +
  scale_x_discrete(labels = c(
    "natural" = "Natural",
    "artificial" = "Artificial",
    "simulated" = "Simulated"
  )) +
  theme(legend.position = "none") +
  facet_wrap(
    ~ group.size.factor,
    ncol = 2,
    labeller = labeller(group.size.factor = custom_labels)
  ) +
  labs(x = "Flight type", y = "Pairwise flight distance (m)") +
  geom_text(
    lineheight = 0.8,
    data = n_grps[n_grps$group.size.factor %in% 2:5, ],
    aes(y = distance, x = type, label = n.labels),
    color = "gray10",
    nudge_x = 0,
    size = 2.3
  )

# gg_gs

pg <-
  plot_grid(
    gg_gs + theme_classic(base_size = 15) + theme(
      legend.position = "none",
      axis.text.x = element_text(angle = 45, hjust = 1),
      axis.title.x = element_blank()
    ),
    gg_all + theme_classic(base_size = 15) + theme(legend.position = "none", axis.title.y = element_blank()) + labs(x = ""),
    ncol = 2
  )



# Add a common x-axis label
pg <- ggdraw(pg) +
  draw_label(
    "Group type",
    x = 0.51,
    y = 0.02,
    vjust = 0,
    size = 15
  )


hedFont <- "Arial"
pg <- pg +
  theme(plot.title = element_text(
    size = 20,
    family = hedFont,
    face = "bold"
  ))

pg

Code

if (!isTRUE(getOption('knitr.in.progress'))) {
  ggsave(
    "./output/flight_coordination_5_panels.png",
    pg,
    grDevices::png,
    width = 9,
    height = 4,
    dpi = 300
  )
}

1.3.1.4 Posterior predictive checks

Code

custom_ppc(readRDS("./data/processed/model_list_flight_coordination.RDS")[[1]],
           group = "type")

Takeaways

Flight coordination is higher in natural groups than in artificial and simulated groups
Flight coordination is similar in artificial and simulated groups
Flight distance between individuals increases with group size

2 Roost entry synchrony

2.1 Prepare data

Read data:

Code

dat <-
  as.data.frame(read_excel(path = "./data/raw/roost_entry_metadata.xlsx"))

Code

dat$entry.time <- as.numeric(dat$`Tiempo real`)

Warning: NAs introduced by coercion

Code

# remove missing data
dat <- dat[!is.na(dat$entry.time), ]

# rename treatment levels
dat$type <- dat$`Tipo grupo`

dat$type[dat$type == "Real"] <- "Natural"
dat$type[dat$type == "Mixto"] <- "Artificial"
dat$group <- dat$Grupo

Calculate time lapse between successive individuals when entering the roost for each group:

Code

group_dat <-
  dat[!is.infinite(dat$entry.time) &
        !is.na(dat$entry.time) & dat$type != "Individual", ]

# get time difference  between entries
group_dat_l <- lapply(unique(group_dat$group), function(x) {
  # print(x)
  X <- group_dat[group_dat$group == x, ]
  X <- X[!is.na(X$entry.time), ]
  X <- X[order(X$entry.time), ]
  # X$entry.time.diff <- X$entry.time - min(X$entry.time)
  X$entry.time.diff <- c(NA, X$entry.time[-1] - X$entry.time[-nrow(X)])
  X <- X[!is.na(X$entry.time.diff), ]
  
  
  # add 1 milisecond to 0
  X$entry.time.diff[X$entry.time.diff == 0] <- NA
  
  X$group.size <- if (nrow(X) > 0)
    nrow(X) - 1
  else
    vector()
  return(X)
})

group_dat <- do.call(rbind, group_dat_l)

2.2 Simulating entry time difference

Simulated 30 data sets, each one with a number of simulated groups for each group size (2 to 5 individuals) similar to those in the natural group data. This approach allowed us to propagate uncertainty from random group simulations into our statistical inference.

Code

group_dat <-
  dat[!is.infinite(dat$entry.time) &
        !is.na(dat$entry.time) & dat$type != "Individual", ]


# get difference to first entry
group_dat_l <- lapply(unique(group_dat$group), function(x) {
  # print(x)
  X <- group_dat[group_dat$group == x, ]
  X <- X[!is.na(X$entry.time), ]
  X <- X[order(X$entry.time), ]
  X$entry.time.diff <- c(NA, X$entry.time[-1] - X$entry.time[-nrow(X)])
  X <- X[!is.na(X$entry.time.diff), ]
  
  X$group.size <- if (nrow(X) > 0)
    nrow(X) + 1
  else
    vector()
  return(X)
})

group_dat <- do.call(rbind, group_dat_l)


group_dat$type <-
  factor(group_dat$type, levels = c("Artificial", "Natural", "Simulated"))

# make random groups from individual flights
indiv_dat <-
  dat[!is.infinite(dat$entry.time) &
        !is.na(dat$entry.time) & dat$type == "Individual", ]

indivs <- unique(indiv_dat$Individuo)

group_sizes <- group_dat$group.size[!duplicated(group_dat$Video)]

# use only group sizes 2:5
group_sizes <- group_sizes[group_sizes <= 5]

table(group_sizes)

group_sizes
2 3 4 5 
6 4 9 8

Code

# simulate group entries
sim_groups_l <- pblapply(1:30, cl = 10, function(x) {
  # randomize order of distribution of individuals per experiment
  set.seed(x)
  g_size <- sample((group_sizes), length(group_sizes) / 2)

  names(g_size) <- paste("sim", x, 1:length(g_size), sep = "-")
  
  set.seed(x)
  indivs_split <- lapply(g_size, function(w)
    sample(indivs, w))
  
  
  sub_sim_groups_l <- lapply(1:length(indivs_split), function(y) {
    sim_group <- indiv_dat[indiv_dat$Individuo %in% indivs_split[[y]], ]
    sim_group$group <- names(g_size)[y]
    
    sim_group <- sim_group[!is.na(sim_group$entry.time), ]
    sim_group <- sim_group[order(sim_group$entry.time), ]
    sim_group$entry.time.diff <- c(NA, sim_group$entry.time[-1] - sim_group$entry.time[-nrow(sim_group)])
    sim_group <- sim_group[!is.na(sim_group$entry.time.diff), ]
    
    
    sim_group$entry.time.diff[sim_group$entry.time.diff > 300] <- 300
    sim_group$group.size <- nrow(sim_group) + 1
    sim_group$type <- factor("Simulated")
    
    return(sim_group)
  })
  
  sub_sim_groups <- do.call(rbind, sub_sim_groups_l)
  
  
  # bind excluding first individual
  out <-
    rbind(sub_sim_groups, group_dat[group_dat$group.size %in% 2:5 &
                                      group_dat$entry.time.diff > 0.001, ])
  
  return(out)
})

sim_group_n <-
  sapply(sim_groups_l, function(x)
    length(unique(x$group[x$type == "Simulated"])))

all(sim_group_n == 4)

[1] FALSE

Code

sim_group_dat <-
  do.call(rbind, lapply(sim_groups_l, function(x)
    x[x$type == "Simulated", ]))

sim_group_dat <- rbind(sim_group_dat, group_dat)


# also make group.size.factor
sim_groups_l <- lapply(sim_groups_l, function(x) {
  x$type <- factor(x$type, levels = c("Natural", "Artificial", "Simulated"))
  x$group.size.factor <- factor(x$group.size)
  return(x)
})

2.3 Descriptive stats

Number of individuals per type:

Code

table(dat$type)


Artificial Individual    Natural 
        94         34        124

Number of individuals per type removing missing data (NAs and Inf):

Code

(table(dat$type[!is.infinite(dat$entry.time) & !is.na(dat$entry.time)]))


Artificial Individual    Natural 
        65         17         95

Individuals per experiment:

Code

ind_per_video <- table(dat$Video[dat$type != "Individual"])

table(ind_per_video)

ind_per_video
 2  3  4  5  6  7 
 8 16 14 17  1  1

Mean entry time per group type:

Code

out <- lapply(sim_groups_l, function(x){
  
  x <- x[order(x$group, x$entry.time),]
  
  out2 <- lapply(unique(x$group), function(g){
    Y <- x[x$group == g, ]
    
    if (nrow(Y) > 2){
    out_df <- data.frame(
      entry.time = c(as.numeric(Y$`Tiempo real`)[1] - Y$entry.time.diff[1], Y$entry.time),
      time.diff = c(NA, Y$entry.time.diff),
      type = Y$type[1],
      group = g
    )
    out_df$entry <- seq_len(nrow(out_df))
    return(out_df[1:3, ])
    } else {
      return(NULL)
    }
     })
  
  do.call(rbind, out2)
  })

mean_entry_time <- aggregate(cbind(entry.time, time.diff) ~ entry + type, do.call(rbind, out), mean, na.action = na.pass)

mean_entry_time

entry	type	entry.time	time.diff
1	Natural	32.25000	NA
2	Natural	45.75000	13.50000
3	Natural	51.25000	5.50000
1	Artificial	33.20000	NA
2	Artificial	95.00000	61.80000
3	Artificial	103.00000	8.00000
1	Simulated	28.16923	NA
2	Simulated	73.65769	45.48846
3	Simulated	124.94615	51.28846

2.4 Fit regression model

Model:

\[ \begin{split} \text{log(entry time difference + 1)} \sim \text{type} + \\ \text{monotonic(group size)} + (\text{1} \mid \text{group}) \end{split} \]

Specifications:

Data: combined natural, artificial and simulated group flight distance data
30 data sets were created with varying simulated group flight trajectories to account for stochasticity in the simulation process
The posterior distributions from the 30 models were combined into a single model fit for statistical inference
The difference in time between when entering the roost between consecutive individuals was used as the response variable (after log transformation, modeled with a normal distribution), while group type (natural, artificial or simulated) was used as predictor
Group ID was included as varying effect
For each data set a model without the group type predictor was also fitted as a null model to further assess model fit

Code

priors_entry <- c(
  # Intercept: log scale, centered, moderately wide
  prior(normal(0, 2), class = "Intercept"),

  # Fixed effects (type + monotonic effect)
  prior(normal(0, 1), class = "b"),

  # Random intercept SD (group-level variation)
  prior(student_t(3, 0, 1), class = "sd"),

  # Residual SD
  prior(student_t(3, 0, 1), class = "sigma")
)

entry_mod_list <- brm_multiple(
  formula = log(entry.time.diff + 1) ~ type + mo(group.size) + (1 |
                                                                  group),
  iter = iter,
  thin = 1,
  data = sim_groups_l,
  family = gaussian(),
  silent = 2,
  chains = chains,
  prior = priors_entry,
  threads = threading(2),
  cores = chains,
  combine = FALSE,
  control = list(adapt_delta = 0.99, max_treedepth = 15)
)

custom_ppc(fit = entry_mod_list[[1]], group = "type")

entry_mod_list <- pblapply(entry_mod_list, function(x)
  add_criterion(x, criterion = c("loo")))

saveRDS(
  entry_mod_list,
  "./data/processed/model_list_roost_entry_time_regression_monotonic_weak_priors.RDS"
)


priors_entry_null <- c(
  # Intercept: log scale, centered, moderately wide
  prior(normal(0, 2), class = "Intercept"),

  # Random intercept SD (group-level variation)
  prior(student_t(3, 0, 1), class = "sd"),

  # Residual SD
  prior(student_t(3, 0, 1), class = "sigma")
)

# run null model
null_entry_mod_list <- brm_multiple(
  formula = log(entry.time.diff + 1) ~ 1 + (1 | group),
  iter = iter,
  thin = 1,
  data = sim_groups_l,
  family = gaussian(),
  silent = 2,
  chains = chains,
  prior = priors_entry_null,
  threads = threading(2),
  cores = chains,
  combine = FALSE,
  control = list(adapt_delta = 0.99, max_treedepth = 15)
)

null_entry_mod_list <- pblapply(null_entry_mod_list, function(x)
  add_criterion(x, criterion = c("loo")))


saveRDS(
  null_entry_mod_list,
  "./data/processed/null_model_list_roost_entry_time_regression_monotonic_weak_priors.RDS"
)

2.4.1 Results

2.4.1.1 Fitted model vs null model

Code

model_list_roost_entry_time_regression <-
  readRDS("./data/processed/model_list_roost_entry_time_regression_monotonic_weak_priors.RDS")

null_model_list_roost_entry_time_regression <-
  readRDS("./data/processed/null_model_list_roost_entry_time_regression_monotonic_weak_priors.RDS")

loo_diffs <-
  lapply(seq_along(model_list_roost_entry_time_regression), function(x)
    loo::loo_compare(
      model_list_roost_entry_time_regression[[x]],
      null_model_list_roost_entry_time_regression[[x]]
    ))

loo_diff <- do.call(rbind, loo_diffs)

rows <- rownames(loo_diff)
loo_diff <- as.data.frame(loo_diff)

loo_diff$model <- rows

agg_loo <- aggregate(cbind(elpd_diff, se_diff) ~ model, loo_diff, mean)

agg_loo$model <- ifelse(grepl("null", agg_loo$model), "Null model", "Full model")

agg_loo

model	elpd_diff	se_diff
Full model	0.00000	0.000000
Null model	-13.84687	4.155004

2.4.1.2 Model fit

Code

# average model
avrg_call <-
  paste0("posterior_average(",
         paste0(
           paste0(
             "model_list_roost_entry_time_regression[[",
             1:length(model_list_roost_entry_time_regression),
             "]]"
           ),
           collapse = ", "
         ),
         ", weights = 'loo')")

average_model_draws <- eval(parse(text = avrg_call))

brmsish:::draw_extended_summary(
  average_model_draws,
  highlight = TRUE,
  fill = fill_color,
  beta.prefix = c("^b_", "^bsp_mo")
)

	Estimate	l-95% CI	u-95% CI	Rhat	Bulk_ESS	Tail_ESS
b_Intercept	2.678	1.932	3.430	1	15706.67	14872.42
b_typeArtificial	0.838	0.272	1.398	1	24313.53	14962.84
b_typeSimulated	1.536	0.952	2.089	1	21844.43	14856.31
bsp_mogroup.size	-0.305	-0.551	-0.045	1	15849.90	14577.40

2.4.1.3 Contrasts

Raw estimates:

Code

contrasts <-
  draws_contrasts(
    average_model_draws,
    predictor_name = "type",
    basal_level = "Natural",
    fill_color = fill_color
  )

contrasts$contrasts_html

	est	l-95% CI	u-95% CI
Artificial - Natural	-0.839	-1.398	-0.272
Simulated - Natural	-1.532	-2.089	-0.952
Simulated - Artificial	-0.693	-1.266	-0.110

Code

contrasts$plot + theme_classic()

Converted into multiplicative differences:

Code

# set effect contrasts to annotate them on the graph
annots <- as.data.frame((contrasts$contrasts_df))

# convert values into percentage faster
annots$est <- exp(annots$est)
annots$`u-95% CI` <- exp(annots$`u-95% CI`)
annots$`l-95% CI` <- exp(annots$`l-95% CI`)

annots

	est	l-95% CI	u-95% CI
Artificial - Natural	0.4321493	0.2471997	0.7617413
Simulated - Natural	0.2160992	0.1238605	0.3860395
Simulated - Artificial	0.5000568	0.2818553	0.8955846

Raw averages per group type:

Code

avgs <- do.call(rbind, lapply(sim_groups_l, function(x) aggregate(entry.time.diff ~ type,data =  x, mean)))

aggregate(entry.time.diff ~ type, avgs, mean)

type	entry.time.diff
Natural	12.40000
Artificial	40.00000
Simulated	54.31874

2.4.1.4 Plot raw data

All group sizes combined:

Code

# select colors
cols <- graph_cols[2:4]
cols_transp <- graph_cols_transp[2:4]

names(cols) <- names(cols_transp) <- c("Natural", "Artificial", "Simulated")

# raincloud plot:
gg_entry <- ggplot(sim_groups_l[[1]],
                   aes(
                     y = entry.time.diff,
                     x = type,
                     color = type,
                     fill = type
                   )) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    # fill = fill_color,
    alpha = 0.5,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
  geom_boxplot(# fill = fill_color,
    width = .15, # remove outliers
    outlier.shape = NA) +
  # add justified jitter from the {gghalves} package
  geom_half_point(
    # color = fill_color,
    # draw jitter on the left
    side = "l",
    # control range of jitter
    range_scale = .4,
    # add some transparency
    alpha = .5,
    transformation = ggplot2::position_jitter(height = 0)
  ) +
  scale_color_manual(values = cols) +
  scale_fill_manual(values = cols_transp) +
  scale_x_discrete(labels = c(
    "natural" = "Natural",
    "artificial" = "Artificial",
    "simulated" = "Simulated"
  )) +
  theme(legend.position = "none") +
  labs(x = "Group type", y = "Time difference (s)")

agg_grp_time <- aggregate(entry.time.diff ~ type + group, sim_groups_l[[1]], mean)

n_grps <-
  aggregate(group ~ type + group.size, sim_groups_l[[1]], function(x)
    length(unique(x)))

n_grps$indivs <-
  n_grps$group * n_grps$group.size

agg_grps <- aggregate(group ~ type, n_grps, sum)

agg_grps$indivs <- aggregate(indivs ~ type, n_grps, sum)$indivs

agg_grps$n.labels <-
  paste0(agg_grps$group, " / ", agg_grps$indivs)
agg_grps$n.labels[1] <-
  " n = 13 groups /\n 50 individuals"
agg_grps$distance <- -15


annots$annotation <- paste0(
  "\u03B2: ",
  round(annots$est, 2),
  "x ~ (",
  round(ifelse(annots$`l-95% CI` < annots$`u-95% CI`, annots$`l-95% CI`, annots$`u-95% CI`), 2),
  " – ",
  round(ifelse(annots$`u-95% CI` > annots$`l-95% CI`, annots$`u-95% CI`, annots$`l-95% CI`), 2),
  "x)"
)

gg_entry <- gg_entry + ylim(c(-20, 210)) +
  geom_signif(
    y_position = c(170, 190, 210),
    xmin = c(1, 1, 2),
    xmax = c(3, 2, 3),
    annotation = annots[c("Simulated - Natural", "Artificial - Natural", "Simulated - Artificial"), "annotation"],
    parse = FALSE,
    tip_length = 0.02,
    textsize = 3,
    size = 0.5,
    color = "gray10",
    vjust = 0.2
  ) +
  geom_half_point(
    data = agg_grp_time,
    aes(y = entry.time.diff, x = type),
    color = "orange",
    # draw jitter on the left
    side = "l",
    pch = 20,
    size = 4,
    # control range of jitter
    range_scale = .4,
    # add some transparency
    alpha = .5,
    transformation = ggplot2::position_jitter(height = 0)
  ) +
  geom_boxplot(
    fill = adjustcolor("orange", alpha.f = 0.1),
    data = agg_grp_time,
    aes(y = entry.time.diff, x = type),
    color = adjustcolor("orange", alpha.f = 0.6),
    width = .1,
    # remove outliers
    outlier.shape = NA
  ) +
  geom_text(
    lineheight = 0.8,
    data = agg_grps,
    aes(y = distance, x = type, label = n.labels),
    color = "gray10",
    nudge_x = 0,
    size = 4
  )  + theme(legend.position = "none")

gg_entry + theme_classic(base_size = 20) + theme(legend.position = "none")

Code

if (!isTRUE(getOption('knitr.in.progress'))) {
 ggsave(
  "./output/roost_entry_coordination_1_panel.png",
  gg_entry + theme_classic(base_size = 12) + theme(legend.position = "none"),
  grDevices::png,
  width = 4.5,
  height = 3.5,
  dpi = 300
)
}

2.4.1.5 Posterior predictive checks

Code

custom_ppc(fit = model_list_roost_entry_time_regression[[1]], group = "type")

Takeaways

Natural groups enter more coordinated than simulated and artificial groups
Artificial groups enter more coordinated than simulated groups
Entry time difference decreases with group size

3 Vocal coordination

Read data:

Code

clls <- readRDS(
  "./data/processed/curated_extended_selection_table_inquiry_calls_2020_&_2021.RDS"
)

sub_clls <- clls[!duplicated(clls$org.sound.files), ]
sub_clls$file.duration <- sub_clls$sound.file.samples / (sub_clls$sample.rate * 1000)


metadat <- as.data.frame(read_excel("./data/raw/group_flight_metadata.xlsx"))
names(metadat) <- gsub(" ", ".", names(metadat))
metadat$year <- substr(metadat$Día, 0, 4)

metadat$year[is.na(metadat$year)] <- "2021"
metadat <- metadat[metadat$tipo.de.video != "calibracion de video", ]

# audio solamente para identificacion de sonidos. TASA NO
metadat <- metadat[metadat$Audio != "60 y 61", ]

metadat$year.audio <- paste(metadat$year, metadat$Audio, sep = "-")

caps <- as.data.frame(read_excel("./data/raw/capture_metadata.xlsx", sheet = "Capturas"))

# read acoustic parameter data
acous_param_l <- readRDS(
  "./data/processed/acoustic_parameters_all_groups_specific_warbler_acoustic_measurements_curated_data_2020_&_2021.RDS"
)

# read as RDS
agg_pred <- readRDS("./data/processed/predicted_individual_in_group_flights_2020_&_2021.RDS")

# read diagnostics
diagnostics <- readRDS("./data/processed/random_forests_diagnostics_solo_flight.RDS")

3.1 Call rate

3.1.1 Individual in solo flight vs overall group flight

Code

call_rate_by_group <-
  readRDS("./data/processed/call_rate_by_group.RDS")

# remove those in which only one individual called
call_id_seq <- read.csv("./data/processed/group_call_individual_id_sequence.csv")

call_id_seq <- call_id_seq[order(call_id_seq$event, call_id_seq$start), ]

call_id_seq_list <- split(call_id_seq, call_id_seq$event)

# get diversity and entropy
unique_callers_list <- pblapply(call_id_seq_list, function(x) {
  x <- x[order(x$start), ]
  id_seq <- x$indiv
  id_seq <- as.character(as.numeric(factor(id_seq)))
  unique.callers <- length(unique(id_seq))
  
  out <- data.frame(group = x$group[1], unique.callers = unique.callers)
  return(out)
})

unique_callers_df <- do_call(rbind, unique_callers_list)

call_rate_by_group <- call_rate_by_group[!call_rate_by_group$group %in% unique_callers_df$group[unique_callers_df$unique.callers == 1], ]

call_rate_by_group$type <-
  ifelse(
    call_rate_by_group$type == "solo",
    "solo",
    paste(call_rate_by_group$experiment, call_rate_by_group$type, sep = "-")
  )

call_rate_by_group$type <-
  gsub("mixed.group", "artificial.group", call_rate_by_group$type)
call_rate_by_group$type <-
  gsub("regular.group", "natural.group", call_rate_by_group$type)

call_rate_by_group$type <-
  factor(call_rate_by_group$type,
         levels = c("solo", "natural.group", "artificial.group"))

# mean centering group size
call_rate_by_group$group.size.f <-
  factor(call_rate_by_group$group.size, ordered = TRUE)

3.1.1.1 Fit regression model

Model:

\[ \begin{split} \text{call rate} \sim \text{type} + (1 \mid \text{group}) + \text{monotonic(group size)} + \\ (\text{type} \mid \text{group}) \end{split} \]

Specifications:

Call rate was used as response variable, modeled with a negative binomial distribution
Group type (individual in solo flight, natural group or artificial group) was used as predictor
Group ID was included as varying effect (intercepts and slopes)
Group size was modeled as a monotonic effect to account for its ordinal nature

Code

# weakly informative priors
priors_calls <- c(
  # Intercept: log call rate
  prior(normal(0, 1.5), class = "Intercept"),

  # Fixed effects: type + monotonic group size
  prior(normal(0, 1), class = "b"),

  # Group-level SDs (intercepts + slopes)
  prior(student_t(3, 0, 1), class = "sd"),

  # Correlation among group-level effects
  prior(lkj(2), class = "cor"),

  # Negative binomial overdispersion parameter (shape)
  prior(exponential(1), class = "shape")
)

mod <- brm(
  formula = calls |
    resp_rate(flight.time) ~ type + mo(group.size.f) + (type | group),
  iter = iter,
  thin = 1,
  data = call_rate_by_group,
  family = negbinomial(),
  silent = 2,
  chains = chains,
  cores = chains, 
  prior = priors_calls,
  save_pars = save_pars(all = TRUE),
  control = list(adapt_delta = 0.99, max_treedepth = 15),
  file = "./data/processed/individual_vs_group_call_rate_group_size_monot_weak_priors",
  file_refit = "always"
)

mod <- add_criterion(mod, criterion = c("loo"))

# weakly informative priors
priors_calls_null <- c(
  # Intercept: log call rate
  prior(normal(0, 1.5), class = "Intercept"),

  # Group-level SDs (intercepts + slopes)
  prior(student_t(3, 0, 1), class = "sd"),

  # Negative binomial overdispersion parameter (shape)
  prior(exponential(1), class = "shape")
)

null_mod <- brm(
  formula = calls | resp_rate(flight.time) ~ 1 + (1 | group),
  iter = iter,
  thin = 1,
  data = call_rate_by_group,
  family = negbinomial(),
  silent = 2,
  chains = chains,
  cores = chains, 
  prior = priors_calls_null,
  save_pars = save_pars(all = TRUE),
  control = list(adapt_delta = 0.99, max_treedepth = 15),
  file = "./data/processed/null_mod_individual_vs_group_call_rate_group_size_weak_priors",
  file_refit = "always"
)

null_mod <- add_criterion(null_mod, criterion = c("loo"))

3.1.1.2 Results

3.1.1.3 Fitted model vs null model

Code

mod <- readRDS("./data/processed/individual_vs_group_call_rate_group_size_monot_weak_priors.rds")

null_mod <- readRDS("./data/processed/null_mod_individual_vs_group_call_rate_group_size_weak_priors.rds")

loo_diff <- loo::loo_compare(mod, null_mod)

rows <- rownames(loo_diff)
loo_diff <- as.data.frame(loo_diff)

loo_diff$model <- rows

agg_loo <- aggregate(cbind(elpd_diff, se_diff) ~ model, loo_diff, mean)

agg_loo$model <- ifelse(grepl("null", agg_loo$model), "Null model", "Full model")

agg_loo

model	elpd_diff	se_diff
Full model	0.00000	0.00000
Null model	-25.65083	10.47626

3.1.1.3.1 Model fit

Code

extended_summary(
  fit = mod,
  gsub.pattern = "type",
  gsub.replacement = "solo_vs_",
  remove.intercepts = TRUE,
  highlight = TRUE,
  print.name = FALSE,
  trace.palette = viridis::mako,
  fill = fill_color,
  beta.prefix = c("^b_", "^bsp_mo")
)

	priors	formula	iterations	chains	thinning	warmup	diverg_transitions	rhats > 1.05	min_bulk_ESS	min_tail_ESS	seed
1	b-normal(0, 1) Intercept-normal(0, 1.5) L-lkj_corr_cholesky(2) sd-student_t(3, 0, 1) shape-exponential(1) simo-dirichlet(1)	calls \| resp_rate(flight.time) ~ type + mo(group.size.f) + (type \| group)	10000	4	1	5000	0 (0%)	0	9004.415	13384.81	770595480

	Estimate	l-95% CI	u-95% CI	Rhat	Bulk_ESS	Tail_ESS
b_solo_vs_natural.group	0.230	-0.033	0.487	1	20972.003	13888.01
b_solo_vs_artificial.group	-0.384	-0.707	-0.063	1	15876.998	14341.27
bsp_mogroup.size.f	0.007	-0.090	0.098	1	9004.415	13384.81

3.1.1.3.2 Contrasts

Raw estimates:

Code

# contrasts
contrasts <-contrasts(
  fit = mod,
  predictor = "type",
  n.posterior = 2000,
  level.sep = " VS ",
  html.table = FALSE,
  plot = TRUE,
  highlight = TRUE,
  fill = fill_color
)

Code

coefs <- brmsish:::html_format_coef_table(contrasts, highlight = TRUE, fill = fill_color)

coefs

	Contrasts	Estimate	Est.Error	l-95% CI	u-95% CI
1	solo VS natural.group	-0.230	0.132	0.033	-0.487
2	solo VS artificial.group	0.384	0.164	0.707	0.063
3	natural.group VS artificial.group	0.614	0.208	0.210	1.025

Converted into times of fewer calls / min :

Code

# set effect contrasts to annotate them on the graph
annots <- as.data.frame((contrasts))

# convert values into percentage of more calls
  annots$Estimate <- exp(annots$Estimate)
annots$`u-95% CI` <- exp(annots$`u-95% CI`)
annots$`l-95% CI` <- exp(annots$`l-95% CI`)

rownames(annots) <- annots$Contrasts
annots$Contrasts <- NULL

annots

	Estimate	Est.Error	l-95% CI	u-95% CI
solo VS natural.group	0.794808	0.1315346	1.033551	0.614323
solo VS artificial.group	1.468618	0.1636883	2.027805	1.065139
natural.group VS artificial.group	1.847765	0.2080344	1.233546	2.787613

Artificial vs natural:

Code

1 / annots[1, ]

	Estimate	Est.Error	l-95% CI	u-95% CI
solo VS natural.group	1.258166	7.602565	0.9675379	1.627808

3.1.1.3.3 Plot raw data

All group sizes combined:

Code

# set colors
cols <- graph_cols[1:3]
cols_transp <- graph_cols_transp[1:3]

names(cols) <- names(cols_transp) <- c("solo", "natural.group", "artificial.group")

# raincloud plot:
ggrate_indv_grp <- ggplot(call_rate_by_group, aes(
  y = rate,
  x = type,
  color = type,
  fill = type
)) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    alpha = 0.5,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
  geom_boxplot(# fill = fill_color,
    width = .15, # remove outliers
    outlier.shape = NA) +
  # add justified jitter from the {gghalves} package
  geom_half_point(
    # color = fill_color,
    # draw jitter on the left
    side = "l",
    # control range of jitter
    range_scale = .4,
    # add some transparency
    alpha = .5,
    transformation = ggplot2::position_jitter(height = 0)
  ) +
  scale_color_manual(values = cols) +
  scale_fill_manual(values = cols_transp) +
  ylim(c(-0.1, 60)) +
  scale_x_discrete(
    labels = c(
      "solo" = "Solo",
      "natural.group" = "Natural group",
      "artificial.group" = "Artificial group"
    )
  ) +
  theme(legend.position = "none") +
  labs(x = "Flight type", y = "Calling rate (calls / min)")

n_grps <-
  aggregate(group ~ type, call_rate_by_group, function(x)
    length(unique(x)))

n_grps$group <- sapply(n_grps$type, function(x)
  length(unique(call_rate_by_group$group[call_rate_by_group$type == x &
                                           call_rate_by_group$indiv == "group"])))

n_grps$group[1] <- length(unique(call_rate_by_group$group[call_rate_by_group$type == "solo" &
                                                            call_rate_by_group$experiment == "regular"]))

n_grps$indivs <- 0


n_grps$indivs[1] <- length(unique(call_rate_by_group$indiv[call_rate_by_group$type == "solo"]))

n_grps$distance <- -4

n_grps$n.labels <-
  paste0(n_grps$group, " / ", n_grps$indivs)
n_grps$n.labels <- c("n = 131 individuals\nfrom 29 groups", "32 groups", "37 groups")

annots$annotation <- paste0(
  "beta: ",
  round(annots$Estimate, 1),
  "*'x' ~ (",
  round(ifelse(annots$`l-95% CI` < annots$`u-95% CI`, annots$`l-95% CI`, annots$`u-95% CI`), 1),
  " - ",
  round(ifelse(annots$`u-95% CI` > annots$`l-95% CI`, annots$`u-95% CI`, annots$`l-95% CI`), 1),
  "*'x')"
)



ggrate_indv_grp_pimp <- ggrate_indv_grp +
  scale_x_discrete(
    labels = c(
      "solo" = "Single individual",
      "natural.group" = "Natural group",
      "artificial.group" = "Artificial group"
    )
  ) +
  geom_signif(
    y_position = c(61, 68),
    xmin = c(1, 2),
    xmax = c(3, 3),
    annotation = annots[c("solo VS artificial.group", "natural.group VS artificial.group"), "annotation"],
    parse = TRUE,
    tip_length = 0.02,
    textsize = 3,
    size = 0.5,
    color = "gray10",
    vjust = 0.2
  ) +
  ylim(c(-5, 68)) +
  theme(axis.title.x = element_blank()) +
  geom_text(
    lineheight = 0.8,
    data = n_grps,
    aes(y = distance, x = type, label = n.labels),
    color = "gray10",
    nudge_x = 0,
    size = 3
  )

ggrate_indv_grp_pimp

3.1.1.3.4 Posterior predictive checks

Code

custom_ppc(fit = mod, group = "type")

Takeaways

No difference in call rate between solo individuals and natural groups
Artificial groups produce lower call rates than solo individuals and natural groups
No effect of group size

3.1.2 Individual in solo flight vs individuals in group flight

Code

call_rate_by_group <- readRDS("./data/processed/call_rate_by_group.RDS")

# remove those in which only one individual called
call_id_seq <- read.csv("./data/processed/group_call_individual_id_sequence.csv")

call_id_seq <- call_id_seq[order(call_id_seq$event, call_id_seq$start), ]

call_id_seq_list <- split(call_id_seq, call_id_seq$event)

# get diversity and entropy
unique_callers_list <- pblapply(call_id_seq_list, function(x) {
  x <- x[order(x$start), ]
  id_seq <- x$indiv
  id_seq <- as.character(as.numeric(factor(id_seq)))
  unique.callers <- length(unique(id_seq))
  
  out <- data.frame(group = x$group[1], unique.callers = unique.callers)
  return(out)
})

unique_callers_df <- do_call(rbind, unique_callers_list)

call_rate_by_group <- call_rate_by_group[!call_rate_by_group$group %in% unique_callers_df$group[unique_callers_df$unique.callers == 1], ]


call_rate_indiv <- readRDS("./data/processed/call_rate_by_individual.RDS")

call_rate_indiv <- call_rate_indiv[!call_rate_indiv$group %in% unique_callers_df$group[unique_callers_df$unique.callers == 1], ]


call_rate_by_group <- call_rate_by_group[call_rate_by_group$type != "solo", ]
call_rate_by_group$type <- "overall.group"

call_rate_indiv$type <- as.character(call_rate_indiv$type)
call_rate_indiv$type[call_rate_indiv$type == "group"] <- "indiv.group"

call_rate <- rbind(call_rate_by_group[, intersect(names(call_rate_by_group), names(call_rate_indiv))], call_rate_indiv[, intersect(names(call_rate_by_group), names(call_rate_indiv))])

# aggregate for plot
agg_rate <- aggregate(rate ~ experiment + type + group.size,
                      data = call_rate,
                      FUN = mean)

agg_rate$sd <- aggregate(rate ~ experiment + type + group.size,
                         data = call_rate,
                         FUN = sd)$rate

agg_rate$type <- factor(agg_rate$type,
                        levels = c("solo", "indiv.group", "overall.group"))

Code

call_rate_indiv <- call_rate_indiv[!is.na(call_rate_indiv$rate) &
                                     call_rate_indiv$rate > 0, ]

no_solo <- call_rate_indiv[call_rate_indiv$type != "solo", ]

agg <- aggregate(indiv ~ group + experiment, data = no_solo, function(x)
  length(unique(x)))

agg$group.size <- aggregate(group.size ~ group + experiment, data = no_solo, function(x)
  mean(x))$group.size

agg$prop.callers <- agg$indiv / agg$group.size

In average 0.81 (+/- 0.2 SD) individuals in group flights called

3.1.2.1 Fit regression model

Model: \[ \begin{split} \text{call rate} \sim \text{type} + (1 \mid \text{group}) + \text{monotonic(group size)} + \\ (1 \mid \text{individual}) \end{split} \]

Specifications:

Data: combined natural, artificial and simulated group flight distance data
30 data sets were created with varying simulated group flight trajectories to account for stochasticity in the simulation process
The posterior distributions from the 30 models were combined into a single model fit for statistical inference
The difference in time between when entering the roost between consecutive individuals was used as the response variable (after log transformation, modeled with a normal distribution), while group type (natural, artificial or simulated) was used as predictor
Group ID was included as varying effect
For each data set a model without the group type predictor was also fitted as a null model to further assess model fit

Code

# make group size a factor
call_rate_indiv$group.size.f <- 
  factor(call_rate_indiv$group.size, ordered = TRUE)

# weakly informative priors
priors_calls_indiv <- c(
  # Intercept: baseline log call rate
  prior(normal(0, 1.5), class = "Intercept"),

  # Fixed effects (type + monotonic group size)
  prior(normal(0, 1), class = "b"),

  # Individual-level random intercept SD
  prior(student_t(3, 0, 1), class = "sd"),

  # Negative binomial overdispersion (shape)
  prior(exponential(1), class = "shape")
)


mod <- brm(
          formula = calls | resp_rate(flight.time)  ~ type  + mo(group.size.f) + (1 | indiv),
          iter = iter,
          thin = 1,
          data = call_rate_indiv,
          family = negbinomial(),
          silent = 2,
          chains = chains,
          cores = chains, 
          prior = priors_calls_indiv,
          control = list(adapt_delta = 0.99, max_treedepth = 15),
          file = "./data/processed/individual_call_rate_solo_vs_group_group_size_monot_negbinomial_weak_priors",
          file_refit = "always"
          )

mod <- add_criterion(mod, criterion = "loo")  

# weakly informative priors
priors_calls_indiv_null <- c(
  # Intercept: baseline log call rate
  prior(normal(0, 1.5), class = "Intercept"),

  # Individual-level random intercept SD
  prior(student_t(3, 0, 1), class = "sd"),

  # Negative binomial overdispersion (shape)
  prior(exponential(1), class = "shape")
)      
    null_mod <- brm(
          formula = calls | resp_rate(flight.time)  ~ 1 + (1 | indiv),
          iter = iter,
          thin = 1,
          data = call_rate_indiv,
          family = negbinomial(),
          silent = 2,
          chains = chains,
          cores = chains, 
          prior = priors_calls_indiv_null,
          control = list(adapt_delta = 0.99, max_treedepth = 15),
          file = "./data/processed/null_model_individual_call_rate_solo_vs_group_group_size_monot_negbinomial_weak_priors",
          file_refit = "always"
          )
  
null_mod <- add_criterion(null_mod, criterion = "loo")

3.1.2.1.1 Results

3.1.2.1.1.1 Model performance vs null model

Code

mod <- readRDS("./data/processed/individual_call_rate_solo_vs_group_group_size_monot_negbinomial_weak_priors.rds")
null_mod <- readRDS("./data/processed/null_model_individual_call_rate_solo_vs_group_group_size_monot_negbinomial_weak_priors.rds")

loo_diff <- loo::loo_compare(mod, null_mod)

rows <- rownames(loo_diff)
loo_diff <- as.data.frame(loo_diff)

loo_diff$model <- rows

agg_loo <- aggregate(cbind(elpd_diff, se_diff) ~ model, loo_diff, mean)

agg_loo$model <- ifelse(grepl("null", agg_loo$model), "Null model", "Full model")

agg_loo

model	elpd_diff	se_diff
Full model	0.00000	0.00000
Null model	-81.77298	12.47872

3.1.2.1.1.2 Model fit

Code

extended_summary(
  fit = mod,
  gsub.pattern = "type",
  gsub.replacement = "solo_vs_",
  remove.intercepts = TRUE,
  highlight = TRUE,
  print.name = FALSE,
  trace.palette = viridis::mako, 
  fill = fill_color,
  beta.prefix = c("^b_", "^bsp_mo")
)

	priors	formula	iterations	chains	thinning	warmup	diverg_transitions	rhats > 1.05	min_bulk_ESS	min_tail_ESS	seed
1	b-normal(0, 1) Intercept-normal(0, 1.5) sd-student_t(3, 0, 1) shape-exponential(1) simo-dirichlet(1)	calls \| resp_rate(flight.time) ~ type + mo(group.size.f) + (1 \| indiv)	10000	4	1	5000	0 (0%)	0	7492.782	10895.44	1235233992

	Estimate	l-95% CI	u-95% CI	Rhat	Bulk_ESS	Tail_ESS
b_solo_vs_natural.group	-1.269	-1.487	-1.050	1	22738.261	15387.62
b_solo_vs_artificial.group	-1.444	-1.684	-1.201	1	17500.803	16292.59
bsp_mogroup.size.f	-0.111	-0.230	-0.007	1.001	7492.782	10895.44

3.1.2.1.1.3 Contrasts

Raw estimates:

Code

# contrasts
contrasts <-contrasts(
  fit = mod,
  predictor = "type",
  n.posterior = 2000,
  level.sep = " VS ",
  html.table = FALSE,
  plot = TRUE,
  highlight = TRUE,
  fill = fill_color
)

Code

coefs <- brmsish:::html_format_coef_table(contrasts, highlight = TRUE, fill = fill_color)

coefs

	Contrasts	Estimate	Est.Error	l-95% CI	u-95% CI
1	natural.group VS solo	-1.269	0.112	-1.487	-1.050
2	natural.group VS artificial.group	0.175	0.147	-0.112	0.464
3	solo VS artificial.group	1.444	0.123	1.684	1.201

Converted into mutliplicative differences:

Code

contrasts[3, 2:5] <- contrasts[3, 2:5] * -1
contrasts[3, 1] <- "artificial.group VS solo"

# set effect contrasts to annotate them on the graph
annots <- annots2 <- as.data.frame((contrasts))

# convert values into percentage of more calls
annots$Estimate <- 1 / (exp(annots$Estimate)) 
annots$l <- annots$`l-95% CI`
annots$`l-95% CI` <- 1 / (exp(annots$`u-95% CI`))
annots$`u-95% CI` <- 1 / (exp(annots$l))

annots$l <- NULL
rownames(annots) <- annots$Contrasts
annots$Contrasts <- NULL

annots

	Estimate	Est.Error	l-95% CI	u-95% CI
natural.group VS solo	3.5574261	0.1116194	2.8566783	4.423639
natural.group VS artificial.group	0.8397919	0.1465782	0.6289006	1.118838
artificial.group VS solo	4.2360803	-0.1226664	3.3241583	5.385989

3.1.2.1.2 Plot raw data

All group sizes combined:

Code

call_rate_indiv <- call_rate_indiv[!is.na(call_rate_indiv$rate) &
                                     call_rate_indiv$rate > 0, ]

if (!is.factor(call_rate_indiv$type)) {
  call_rate_indiv$type <- ifelse(
    call_rate_indiv$type == "solo",
    "solo",
    paste(call_rate_indiv$experiment, call_rate_indiv$type, sep = "-")
  )
  
  call_rate_indiv$type <- gsub("mixed-indiv.group",
                               "artificial.group",
                               call_rate_indiv$type)
  call_rate_indiv$type <- gsub("regular-indiv.group",
                               "natural.group",
                               call_rate_indiv$type)
  
  call_rate_indiv$type <- factor(call_rate_indiv$type,
                                 levels = c("solo", "natural.group", "artificial.group"))
}

# table(call_rate_by_group$group.size, call_rate_by_group$type)
cols <- graph_cols[1:3]
cols_transp <- graph_cols_transp[1:3]

names(cols) <- names(cols_transp) <- c("solo", "natural.group", "artificial.group")


# raincloud plot:
gg_rate_indiv <- ggplot(call_rate_indiv, aes(
  y = rate,
  x = type,
  color = type,
  fill = type
)) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    # fill = fill_color,
    alpha = 0.5,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
  geom_boxplot(# fill = fill_color,
    width = .15, # remove outliers
    outlier.shape = NA) +
  # add justified jitter from the {gghalves} package
  geom_half_point(
    # color = fill_color,
    # draw jitter on the left
    side = "l",
    # control range of jitter
    range_scale = .4,
    # add some transparency
    alpha = .5,
    transformation = ggplot2::position_jitter(height = 0)
  ) +
  scale_color_manual(values = cols) +
  scale_fill_manual(values = cols_transp) +
  ylim(c(-0.1, 40)) +
  theme(legend.position = "none") +
  labs(x = "Type", y = "Calling rate (calls / min)") +
  scale_x_discrete(
    labels = c(
      "solo" = "Solo",
      "natural.group" = "Natural group",
      "artificial.group" = "Artificial group"
    )
  )

n_grps <-
  aggregate(group ~ type, call_rate_indiv, function(x)
    length(unique(x)))

n_grps$indivs <- sapply(n_grps$type, function(x)
  length(unique(call_rate_indiv$indiv[call_rate_indiv$type == x])))

n_grps$group[1] <- length(unique(call_rate_indiv$group[call_rate_indiv$type == "solo" &
                                                         call_rate_indiv$experiment == "regular"]))

n_grps$indivs[1] <- length(unique(call_rate_indiv$indiv[call_rate_indiv$type == "solo" &
                                                          call_rate_indiv$experiment == "regular"]))

n_grps$distance <- -2.5

n_grps$n.labels <-
  paste0(n_grps$group, " / ", n_grps$indivs)
n_grps$n.labels <- c(
  "n = 81 individuals\nfrom 23 groups",
  "63 individuals\n21 groups",
  "44 individuals\n21 groups"
)

agg_rate <- aggregate(rate ~ type + group, data = call_rate_indiv, FUN = mean)

# keep only those solo from natural groups
solo_keep <- agg_rate[agg_rate$type == "natural.group", ]

solo_keep <- solo_keep$group[solo_keep$group %in% agg_rate$group[agg_rate$type == "solo"]]

agg_rate <- agg_rate[(agg_rate$group %in% solo_keep &
                        agg_rate$type == "solo") | agg_rate$type != "solo", ]

# annotate effect size
annots$annotation <- paste0(
  "\u03B2: ",
  round(annots$Estimate, 1),
  "x ~ (",
  round(ifelse(annots$`l-95% CI` < annots$`u-95% CI`, annots$`l-95% CI`, annots$`u-95% CI`), 1),
  " – ",
  round(ifelse(annots$`u-95% CI` > annots$`l-95% CI`, annots$`u-95% CI`, annots$`l-95% CI`), 1),
  "x)"
)

gg_rate_indiv_pimp <- gg_rate_indiv +
  ylim(c(-3, 35)) +
  geom_text(
    lineheight = 0.8,
    data = n_grps,
    aes(y = distance, x = type, label = n.labels),
    color = "gray10",
    nudge_x = 0,
    size = 3
  ) +
  geom_signif(
    y_position = c(34, 30),
    xmin = c(1, 1),
    xmax = c(2, 3),
    annotation = annots[c("natural.group VS solo", "artificial.group VS solo"), "annotation"],
    parse = FALSE,
    tip_length = 0.02,
    textsize = 3,
    size = 0.5,
    color = "gray10",
    vjust = 0.2
  ) +
  geom_boxplot(
    fill = adjustcolor("orange", alpha.f = 0.1),
    data = agg_rate,
    aes(y = rate, x = type),
    color = adjustcolor("orange", alpha.f = 0.6),
    width = .1,
    # remove outliers
    outlier.shape = NA
  ) +
  geom_half_point(
    data = agg_rate,
    aes(y = rate, x = type),
    color = "orange",
    # draw jitter on the left
    side = "l",
    pch = 20,
    size = 4,
    # control range of jitter
    range_scale = .4,
    # add some transparency
    alpha = .5,
    transformation = ggplot2::position_jitter(height = 0)
  ) +
  scale_x_discrete(
    labels = c(
      "solo" = "Single individual",
      "natural.group" = "Individual in\n natural group",
      "artificial.group" = "Individual in\nartificial group"
    )
  ) +
  theme(axis.title.x = element_blank())

gg_rate_indiv_pimp

3.1.2.1.2.1 Posterior predictive checks

Code

custom_ppc(fit = mod, group = "type")

Takeaways

Individuals in natural and artificial groups produce lower call rates than in solo flights
No difference in call rate between natural and artificial groups
No effect of group size

3.2 Call overlap

3.2.1 Prepare data

Code

total_count_df <- readRDS("./data/processed/total_ovlp_count.RDS")

# convert to overlaps per minute
total_count_df$ovlp.rate <- total_count_df$ovlp.rate * 60

Code

time_calls_indiv <- readRDS("./data/processed/time_position_calls_by_individual.RDS")  

# keep only 120 s duration flights
time_calls_indiv <- time_calls_indiv[time_calls_indiv$flight.time == 120, ]
time_calls_indiv <- time_calls_indiv[complete.cases(time_calls_indiv), ]

time_calls_indiv$sound.files <- time_calls_indiv$orig.sound.files
ovlp <- overlapping_sels(time_calls_indiv, parallel = 10)

# remove overlapping sounds
time_calls_indiv <- ovlp[is.na(ovlp$ovlp.sels), ]


sim_group_call_list <-
  lapply(1:1000, function(x) sim_group_call(c(group_sizes, rep(7, 6)), seed = x))

# add simulation tag
sim_group_call_list <- lapply(seq_along(sim_group_call_list), function(x) {
  Y <- sim_group_call_list[[x]]
  Y$sim <- x
  Y$sound.files <- paste0(Y$sound.files, "-sim", x)
  Y$selec <- seq_len(nrow(Y))
  
return(Y)  
})

sim_group_call_df <- do_call(rbind, sim_group_call_list)

ovlps <- overlapping_sels(sim_group_call_df, parallel = 10)

ovlp_count_list <- lapply(unique(ovlps$sound.files), function(x){
  X <- ovlps[ovlps$sound.files == x, ]
  cnt <- sum(!is.na(X$ovlp.sels))
  out <- data.frame(sound.files = x, group.size = X$group.size[1], ovlp.count = cnt)
  return(out)
})


ovlp_count_df <- do.call(rbind, ovlp_count_list)
  
ovlp_count_df$ovlp.count <- ovlp_count_df$ovlp.count / 2

ovlp_count_df$type <- "Simulated" 

ovlp_count_df$flight.time <- 120
ovlp_count_df$ovlp.rate <- ovlp_count_df$ovlp.count / ovlp_count_df$fligh.time

saveRDS(ovlp_count_df, "./data/processed/ovlps_simulated_group_call.RDS")

Code

sim_ovlp_count_df <- readRDS("./data/processed/ovlps_simulated_group_call.RDS")

time_calls_groups <- readRDS("./data/processed/time_position_calls_for_group_flights.RDS")  

# remove those in which only one individual called
call_id_seq <- read.csv("./data/processed/group_call_individual_id_sequence.csv")

# # keep only 120 s duration flights
# call_id_seq <- call_id_seq[call_id_seq$flight.time == 120,]

call_id_seq <- call_id_seq[order(call_id_seq$event, call_id_seq$start), ]

call_id_seq_list <- split(call_id_seq, call_id_seq$event)

unique_callers_list <- pblapply(call_id_seq_list, function(x){
  x <- x[order(x$start), ]
  id_seq <- x$indiv
  id_seq <- as.character(as.numeric(factor(id_seq)))
  unique.callers <- length(unique(id_seq))

  out <- data.frame(group = x$group[1], unique.callers = unique.callers)
  return(out)
 }
)

unique_callers_df <- do_call(rbind, unique_callers_list)

time_calls_groups <- call_id_seq[!call_id_seq$group %in% unique_callers_df$group[unique_callers_df$unique.callers == 1], ]


time_calls_groups$sound.files <- time_calls_groups$event

obs_ovlps <- overlapping_sels(time_calls_groups, parallel = 10)


obs_ovlp_count_list <- lapply(unique(obs_ovlps$sound.files), function(x){
  X <- obs_ovlps[obs_ovlps$sound.files == x, ]
  cnt <- sum(!is.na(X$ovlp.sels))
  out <- data.frame(sound.files = x, group.size = X$group.size[1], type = X$type[1], ovlp.count = cnt, flight.time = X$flight.time[1])
  return(out)
})


obs_ovlp_count_df <- do.call(rbind, obs_ovlp_count_list)
  
obs_ovlp_count_df$ovlp.count <- obs_ovlp_count_df$ovlp.count / 2

obs_ovlp_count_df$ovlp.rate <- obs_ovlp_count_df$ovlp.count / obs_ovlp_count_df$flight.time



sim_ovlp_count_df$simulation <- NULL
total_count_df <- rbind(sim_ovlp_count_df, obs_ovlp_count_df)


total_count_df$type <- gsub("Regular", "Natural", total_count_df$type)

total_count_df$type <- factor(total_count_df$type, levels = c("Simulated", "Natural", "Artificial"))

saveRDS(total_count_df, "./data/processed/total_ovlp_count.RDS")

Code

sim_ovlp_count_df <- readRDS("./data/processed/ovlps_simulated_group_call.RDS")

# observed data
obs_ovlp_count_df <- total_count_df[total_count_df$type != "Simulated", ]


sim_ovlp_count_df$simulation <- sapply(strsplit(sim_ovlp_count_df$sound.files, "-"), "[", 3)

# split by simulation
sim_ovlp_count_list <- split(sim_ovlp_count_df, sim_ovlp_count_df$simulation)

set.seed(123)
sim_ovlp_count_list <- sample(sim_ovlp_count_list, 30)

#combined each simulated data with the observed data
sim_call_groups_list <- lapply(sim_ovlp_count_list, function(x){
    x$simulation <- NULL
  out <- rbind(obs_ovlp_count_df, x)
  out$ovlp.count <- out$ovlp.count * 2
  out$group.size.sc <- scale(out$group.size, center = TRUE, scale = FALSE) 
  out$group.size.f <- factor(out$group.size, ordered = TRUE) 
  out$type <- gsub("Regular", "Natural", out$type)
  out$type <- factor(out$type, levels = c("Simulated", "Natural", "Artificial"))
  return(out)
})

# weakly informative priors
priors_call_overlap <- c(
  prior(normal(0, 1), class = "Intercept")
)



call_overlap_mod <- brm_multiple(
  formula = ovlp.count ~ type + mo(group.size.f) + offset(log(flight.time)),
  iter = iter,
  thin = 1,
  data = sim_call_groups_list,
  family = poisson(link = "log"),
  silent = 2,
  chains = chains, 
  prior = priors_call_overlap,
  threads = threading(2),
  cores = chains,
  combine = FALSE,
  control = list(adapt_delta = 0.99, max_treedepth = 15)
)

custom_ppc(fit = call_overlap_mod[[1]], group = "type")

call_overlap_mod <- pblapply(call_overlap_mod,  function(x) add_criterion(x, criterion = c("loo")))

saveRDS(call_overlap_mod,
        "./data/processed/simulated_call_overlap_model_weak_priors.RDS")


null_call_overlap_mod <- brm_multiple(
  formula = ovlp.count ~ 1 + mo(group.size.f) + offset(log(flight.time)),
  iter = iter,
  thin = 1,
  data = sim_call_groups_list,
  family = poisson(link = "log"),
  silent = 2,
  chains = chains,
  prior = priors_call_overlap,
  threads = threading(2),
  cores = chains,
  combine = FALSE,
  control = list(adapt_delta = 0.99, max_treedepth = 15)
)


null_call_overlap_mod <- pblapply(null_call_overlap_mod,  function(x) {
  out <- try(add_criterion(x, criterion = c("loo")), silent = TRUE)
  if (is(out, "try-error")) return(x)
  else return(out)
  })

saveRDS(null_call_overlap_mod,
        "./data/processed/simulated_call_overlap_null_model_weak_priors.RDS")

3.2.2 Results

3.2.2.1 Model performance vs null model

Code

call_overlap_mod <- readRDS("./data/processed/simulated_call_overlap_model_weak_priors.RDS") 
null_call_overlap_mod <- readRDS("./data/processed/simulated_call_overlap_null_model_weak_priors.RDS")


loo_diffs <- 
lapply(seq_along(call_overlap_mod), function(x)
  loo::loo_compare(call_overlap_mod[[x]], null_call_overlap_mod[[x]])
  )

loo_diff <- do.call(rbind, loo_diffs)

rows <- rownames(loo_diff)
loo_diff <- as.data.frame(loo_diff)

loo_diff$model <- rows

agg_loo <- aggregate(cbind(elpd_diff, se_diff) ~ model, loo_diff, mean)

agg_loo$model <- ifelse(grepl("null", agg_loo$model), "Null model", "Full model")

agg_loo

model	elpd_diff	se_diff
Full model	0.0000	0.00000
Null model	-211.0154	54.21822

3.2.2.2 Model fit

Code

# average model
avrg_call <-
  paste0("posterior_average(",
         paste0(paste0("call_overlap_mod[[", 1:length(call_overlap_mod), "]]"), collapse = ", "),
         ", weights = 'loo')")

average_model_draws <- eval(parse(text = avrg_call))

brmsish:::draw_extended_summary(average_model_draws,
                      highlight = TRUE,
                      fill = fill_color,
                      beta.prefix = c("^b_", "^bsp_mo"))

	Estimate	l-95% CI	u-95% CI	Rhat	Bulk_ESS	Tail_ESS
b_Intercept	-4.079	-4.663	-3.610	1	7175.955	9629.176
b_typeNatural	-2.336	-2.725	-1.983	1	16074.772	13100.865
b_typeArtificial	-4.093	-5.036	-3.342	1	15179.895	12029.328
bsp_mogroup.size.f	0.446	0.345	0.568	1	7563.200	10128.990

3.2.2.3 Contrasts

Raw estimates:

Code

contrasts <-
  draws_contrasts(
    average_model_draws,
    predictor_name = "type",
    basal_level = "Simulated",
    fill_color = fill_color
  )

contrasts$contrasts_html

	est	l-95% CI	u-95% CI
Natural - Simulated	2.340	1.983	2.725
Artificial - Simulated	4.117	3.342	5.036
Artificial - Natural	1.777	0.929	2.736

Code

contrasts$plot + theme_classic()

Converted into times of overlaps:

Code

# set effect contrasts to annotate them on the graph
annots <- as.data.frame((contrasts$contrasts_df))

# convert values into percentage of more calls
annots$est <- round(exp(annots$est), 2)
annots$l <- annots$`l-95% CI`
annots$`l-95% CI` <- round(exp(annots$`u-95% CI`), 2)
annots$`u-95% CI` <- round(exp(annots$l), 2)

annots$l <- NULL
annots

	est	l-95% CI	u-95% CI
Natural - Simulated	10.38	15.25	7.26
Artificial - Simulated	61.40	153.87	28.28
Artificial - Natural	5.91	15.42	2.53

Simulated produce 10.38 times more overlaps than natural groups
Simulated produce 61.4 times more overlaps than artificial groups
Artificial produce 5.91 times more overlaps than natural groups

Plot raw data:

Code

agg_cnt <- aggregate(ovlp.rate ~ group.size + type, data = total_count_df, FUN = mean)
agg_cnt$sd <- aggregate(ovlp.rate ~ group.size + type, data = total_count_df, FUN = sd)$ovlp.rate

cols <- graph_cols[2:4]

names(cols) <- paste(c("Natural", "Artificial", "Simulated"), "group")

agg_cnt$type <- paste(agg_cnt$type, "group")

agg_cnt$type <- factor(agg_cnt$type, levels = c("Natural group", "Artificial group", "Simulated group"))


# convert previous graph in a point sd graph with number of overlaps in y and group size as factor in x
gg_overlap <- ggplot(agg_cnt, aes(x = factor(group.size), y = ovlp.rate, color = type)) +
  geom_point(position = position_dodge(width = 0.5), alpha = 0.5, size = 3) +
  geom_errorbar(aes(ymin = ovlp.rate - sd, ymax = ovlp.rate + sd), size = 1, width = 0, position = position_dodge(width = 0.5), show.legend = FALSE,  alpha = 0.5) +
        scale_color_manual(values = cols) +
  labs(x = "Group size", y = "Overlaps / min", color = "") +
  theme(legend.position = "inside", legend.position.inside = c(0.2, 0.7)) 

gg_overlap

3.2.2.4 Posterior predictive checks

Code

custom_ppc(fit = call_overlap_mod[[1]], group = "type")

Takeaways

Simulated groups produce more overlapping calls than both artificial and natural groups
Artificial groups produce more overlapping calls than natural groups

3.3 Proportion of calling individuals

Prepare data:

Code

call_id_seq <- read.csv("./data/processed/group_call_individual_id_sequence.csv")

call_id_seq <- call_id_seq[order(call_id_seq$event, call_id_seq$start), ]

call_id_seq_list <- split(call_id_seq, call_id_seq$event)

# get proportion of callers
prop_callers_data_list <- pblapply(call_id_seq_list, function(x){
  x <- x[order(x$start), ]
  id_seq <- x$indiv
  id_seq <- as.character(as.numeric(factor(id_seq)))
  unique.callers <- length(unique(id_seq))
  id_seq <- paste(id_seq, collapse = "")

  out <- data.frame(
    event = x$event[1],
    group = x$group[1],
    group.size = x$group.size[1],
    experiment = x$experiment[1],
    sequence = id_seq,
    unique.callers = unique.callers,
    prop.callers = unique.callers/ x$group.size[1]
  )
  
  return(out)
  })

prop_callers_data <- do.call(rbind, prop_callers_data_list)
prop_callers_data$prop.callers[prop_callers_data$prop.callers >= 1] <- 0.99999


prop_callers_data$type <- ifelse(prop_callers_data$experiment == "Regular", "Natural", "Artificial")

prop_callers_data$group.size.f <- factor(prop_callers_data$group.size, levels = unique(prop_callers_data$group.size), ordered = TRUE)

3.3.1 Fit regression model

Model:

\[ \begin{split} \text{prop. calling individuals} \sim \text{type} + \\ \text{monotonic(group size)} \end{split} \]

Specifications:

The proportion of individuals that called in a group was used as the response variable (modeled with a zero-inflated beta distribution and a probit function), while group type (natural, artificial or simulated) was used as predictor
Group ID was included as varying effect
A model without the group type predictor was also fitted as a null model to further assess model fit

Code

# weakly informative priors
priors_prop <- c(
  # Mean (mu) model
  prior(normal(0, 1.5), class = "Intercept"),
  prior(normal(0, 1), class = "b"),

  # Zero–one inflation (logit scale)
  prior(normal(0, 1.5), class = "zoi"),
  prior(normal(0, 1.5), class = "coi"),

  # Precision (phi > 0)
  prior(exponential(1), class = "phi")
)

mod <- brm(
          formula = prop.callers ~ type + mo(group.size.f),
          iter = iter,
          thin = 1,
          data = prop_callers_data,
          family = zero_one_inflated_beta(link = "probit"),
          silent = 2,
          chains = chains,
          cores = chains, 
           prior = priors_prop,
          control = list(adapt_delta = 0.99,
                 max_treedepth = 15),
          file_refit = "always",
          file = "./data/processed/unique.callers_by_experiment_type_group_size_monot_weak_priors"
 )
 

custom_ppc(mod, group = "type")
  
mod <- add_criterion(mod, criterion = "loo")  

# weakly informative priors
priors_prop_null <- c(
  # Mean (mu) model
  prior(normal(0, 1.5), class = "Intercept"),
  
  # Zero–one inflation (logit scale)
  prior(normal(0, 1.5), class = "zoi"),
  prior(normal(0, 1.5), class = "coi"),

  # Precision (phi > 0)
  prior(exponential(1), class = "phi")
)

null_mod <- brm(
          formula = prop.callers ~ 1 + mo(group.size.f),
          iter = iter,
          thin = 1,
          data = prop_callers_data,
          family = zero_one_inflated_beta(link = "probit"),
          silent = 2,
          chains = chains,
          cores = chains, 
          prior = priors_prop_null,
          control = list(adapt_delta = 0.99,
                 max_treedepth = 15),
          file_refit = "always",
          file = "./data/processed/null_model_unique.callers_by_experiment_type_group_size_monot_weak_priors"
 )

null_mod <- add_criterion(null_mod, criterion = "loo")

3.3.1.1 Results

3.3.1.1.1 Model performance vs null model

Code

mod <- readRDS("./data/processed/unique.callers_by_experiment_type_group_size_monot_weak_priors.rds")
null_mod <- readRDS("./data/processed/null_model_unique.callers_by_experiment_type_group_size_monot_weak_priors.rds")

loo_diff <- loo_compare(mod, null_mod)


rows <- rownames(loo_diff)
loo_diff <- as.data.frame(loo_diff)

loo_diff$model <- rows

agg_loo <- aggregate(cbind(elpd_diff, se_diff) ~ model, loo_diff, mean)

agg_loo$model <- ifelse(grepl("null", agg_loo$model), "Null model", "Full model")

agg_loo

model	elpd_diff	se_diff
Full model	-0.5587733	0.5378702
Null model	0.0000000	0.0000000

3.3.1.1.2 Model fit

Code

extended_summary(
  fit = mod,
  gsub.pattern = "b_type",
  gsub.replacement = "Aritificial_vs_",
  remove.intercepts = TRUE,
  highlight = TRUE,
  print.name = FALSE,
  trace.palette = viridis::mako, 
  fill = fill_color, 
  beta.prefix = "^b_|^bsp_"
)

	priors	formula	iterations	chains	thinning	warmup	diverg_transitions	rhats > 1.05	min_bulk_ESS	min_tail_ESS	seed
1	b-normal(0, 1) coi-normal(0, 1.5) Intercept-normal(0, 1.5) phi-exponential(1) simo-dirichlet(1) zoi-normal(0, 1.5)	prop.callers ~ type + mo(group.size.f)	10000	4	1	5000	0 (0%)	0	13244.31	12099.43	1547601009

	Estimate	l-95% CI	u-95% CI	Rhat	Bulk_ESS	Tail_ESS
Aritificial_vs_Natural	-0.100	-0.389	0.194	1	19450.42	13765.22
bsp_mogroup.size.f	-0.008	-0.127	0.123	1	13244.31	12099.43

3.3.1.1.3 Plot raw data

Code

# table(call_rate_by_group$group.size, call_rate_by_group$type)
cols <- graph_cols[2:3]
cols_transp <- graph_cols_transp[2:3]

names(cols) <- names(cols_transp) <- c("Natural", "Artificial")

prop_callers_data$type <- factor(prop_callers_data$type, levels = c("Natural", "Artificial"))

n_grps <-
  aggregate(group ~ type, prop_callers_data, function(x)
    length(unique(x)))

n_grps$indivs <- sapply(n_grps$type, function(x)
  sum(prop_callers_data$group.size[prop_callers_data$type == x]))

n_grps$n.labels <-
  paste0(n_grps$group, " / ", n_grps$indivs)
n_grps$n.labels <- c(
  "n = 26 groups",
  "31 groups"
  )

n_grps$prop.callers <- 0.37

# raincloud plot:
gg_prop_indiv <-
  ggplot(prop_callers_data, aes(
  y = prop.callers,
  x = type,
  color = type,
  fill = type
)) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    # fill = fill_color,
    alpha = 0.5,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
    ylim(c(0.36, 1)) +
  geom_text(
    lineheight = 0.8,
    data = n_grps,
    aes(y = prop.callers, x = type, label = n.labels),
    color = "gray10",
    nudge_x = 0,
    size = 3
  ) +
  geom_boxplot(# fill = fill_color,
    width = .15, # remove outliers
    outlier.shape = NA) +
  # add justified jitter from the {gghalves} package
  geom_half_point(
    # color = fill_color,
    # draw jitter on the left
    side = "l",
    # control range of jitter
    range_scale = .4,
    # add some transparency
    alpha = .5,
    transformation = ggplot2::position_jitter(height = 0)
  ) +
  scale_color_manual(values = cols) +
  scale_fill_manual(values = cols_transp) +
  # ylim(c(-0.1, 40)) +
  theme(legend.position = "none",  axis.title.x = element_blank()) +
  labs(x = "Type", y = "Proportion of\ncalling individuals")

gg_prop_indiv

3.3.1.1.4 Posterior predictive checks

Code

custom_ppc(fit = mod, group = "type")

Takeaways

Similar proportion of calling individuals in natural and artificial groups

3.4 Combine vocal coordination plots

Code

  p_row1 <- plot_grid(gg_overlap + theme_classic(base_size = 12)  + theme(
      legend.position = "inside",
      legend.position.inside = c(0.33, 0.87)
    ),
    gg_prop_indiv + theme_classic(base_size = 12)  + theme(
      legend.position = "none"),
     labels = "AUTO")

pg <-
  plot_grid(
   p_row1,
   gg_rate_indiv_pimp + theme_classic(base_size = 12) + theme(legend.position = "none", axis.title.x = element_blank()) + labs(y = "Calling rate\n(calls / min)"),
    ggrate_indv_grp_pimp + theme_classic(base_size = 12) + theme(legend.position = "none", axis.title.x = element_blank()) + labs(y = "Calling rate\n(calls / min)"),
    ncol = 1,
    labels = c(" ", "C", "D")
  )


hedFont <- "Arial"
pg <- pg +
  theme(plot.title = element_text(
    size = 20,
    family = hedFont,
    face = "bold"
  ))

pg

Code

if (!isTRUE(getOption('knitr.in.progress'))){
ggsave(
  "./output/vocal_coordination_4_panels.png",
  pg,
  grDevices::png,
  width = 6,
  height = 8,
  dpi = 300
)
}

Session information

─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.5.2 (2025-10-31)
 os       Ubuntu 22.04.4 LTS
 system   x86_64, linux-gnu
 ui       X11
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       America/Costa_Rica
 date     2026-03-03
 pandoc   3.6.3 @ /usr/lib/rstudio/resources/app/bin/quarto/bin/tools/x86_64/ (via rmarkdown)
 quarto   1.8.25 @ /usr/lib/rstudio/resources/app/bin/quarto/bin/quarto

─ Packages ───────────────────────────────────────────────────────────────────
 package        * version   date (UTC) lib source
 abind            1.4-5     2016-07-21 [3] CRAN (R 4.0.1)
 ape              5.8-1     2024-12-16 [1] CRAN (R 4.5.2)
 arrayhelpers     1.1-0     2020-02-04 [1] CRAN (R 4.5.2)
 backports        1.4.1     2021-12-13 [3] CRAN (R 4.1.2)
 bayesplot        1.15.0    2025-12-12 [1] CRAN (R 4.5.2)
 bitops           1.0-7     2021-04-24 [3] CRAN (R 4.1.1)
 bridgesampling   1.2-1     2025-11-19 [1] CRAN (R 4.5.2)
 brio             1.1.5     2024-04-24 [1] CRAN (R 4.5.2)
 brms           * 2.23.0    2025-09-09 [1] CRAN (R 4.5.2)
 brmsish        * 1.0.0     2026-03-03 [1] Github (maRce10/brmsish@81ab826)
 Brobdingnag      1.2-9     2022-10-19 [1] CRAN (R 4.5.2)
 cachem           1.1.0     2024-05-16 [1] CRAN (R 4.5.2)
 cellranger       1.1.0     2016-07-27 [1] CRAN (R 4.5.2)
 checkmate        2.0.0     2020-02-06 [3] CRAN (R 4.0.1)
 cli              3.6.5     2025-04-23 [1] CRAN (R 4.5.2)
 coda             0.19-4.1  2024-01-31 [1] CRAN (R 4.5.2)
 codetools        0.2-19    2023-02-01 [4] CRAN (R 4.2.2)
 cowplot        * 1.2.0     2025-07-07 [1] CRAN (R 4.5.2)
 crayon           1.5.3     2024-06-20 [1] CRAN (R 4.5.2)
 curl             7.0.0     2025-08-19 [1] CRAN (R 4.5.2)
 devtools         2.4.6     2025-10-03 [1] CRAN (R 4.5.2)
 digest           0.6.39    2025-11-19 [1] CRAN (R 4.5.2)
 distributional   0.6.0     2026-01-14 [1] CRAN (R 4.5.2)
 dplyr            1.2.0     2026-02-03 [1] CRAN (R 4.5.2)
 dtw              1.23-1    2022-09-19 [1] CRAN (R 4.5.2)
 ellipsis         0.3.2     2021-04-29 [3] CRAN (R 4.1.1)
 evaluate         1.0.5     2025-08-27 [1] CRAN (R 4.5.2)
 extrafont      * 0.20      2025-09-24 [1] CRAN (R 4.5.2)
 extrafontdb      1.1       2025-09-28 [1] CRAN (R 4.5.2)
 farver           2.1.2     2024-05-13 [1] CRAN (R 4.5.2)
 fastmap          1.2.0     2024-05-15 [1] CRAN (R 4.5.2)
 fftw             1.0-9     2024-09-20 [1] CRAN (R 4.5.2)
 fs               1.6.6     2025-04-12 [1] CRAN (R 4.5.2)
 generics         0.1.2     2022-01-31 [3] CRAN (R 4.1.2)
 ggdist         * 3.3.3     2025-04-23 [1] CRAN (R 4.5.2)
 gghalves       * 0.1.4     2026-03-03 [1] Github (erocoar/gghalves@e5c3c79)
 ggplot2        * 4.0.2     2026-02-03 [1] CRAN (R 4.5.2)
 ggridges       * 0.5.7     2025-08-27 [1] CRAN (R 4.5.2)
 ggsignif       * 0.6.4     2022-10-13 [1] CRAN (R 4.5.2)
 glue             1.8.0     2024-09-30 [1] CRAN (R 4.5.2)
 gridExtra        2.3       2017-09-09 [3] CRAN (R 4.0.1)
 gtable           0.3.6     2024-10-25 [1] CRAN (R 4.5.2)
 htmltools        0.5.9     2025-12-04 [1] CRAN (R 4.5.2)
 htmlwidgets      1.5.4     2021-09-08 [3] CRAN (R 4.1.1)
 httr             1.4.2     2020-07-20 [3] CRAN (R 4.0.3)
 inline           0.3.21    2025-01-09 [1] CRAN (R 4.5.2)
 jsonlite         2.0.0     2025-03-27 [1] CRAN (R 4.5.2)
 kableExtra     * 1.4.0     2024-01-24 [1] CRAN (R 4.5.2)
 knitr          * 1.51      2025-12-20 [1] CRAN (R 4.5.2)
 labeling         0.4.3     2023-08-29 [1] CRAN (R 4.5.2)
 lattice          0.22-5    2023-10-24 [4] CRAN (R 4.3.1)
 lifecycle        1.0.5     2026-01-08 [1] CRAN (R 4.5.2)
 loo            * 2.9.0     2025-12-23 [1] CRAN (R 4.5.2)
 magrittr         2.0.4     2025-09-12 [1] CRAN (R 4.5.2)
 MASS             7.3-60    2023-05-04 [4] CRAN (R 4.3.1)
 Matrix           1.6-3     2023-11-14 [4] CRAN (R 4.3.2)
 matrixStats      0.61.0    2021-09-17 [3] CRAN (R 4.1.1)
 memoise          2.0.1     2021-11-26 [3] CRAN (R 4.1.2)
 mvtnorm          1.1-3     2021-10-08 [3] CRAN (R 4.1.1)
 NatureSounds   * 1.0.5     2025-01-17 [1] CRAN (R 4.5.2)
 nlme             3.1-163   2023-08-09 [4] CRAN (R 4.3.1)
 packrat          0.9.3     2025-06-16 [1] CRAN (R 4.5.2)
 pbapply        * 1.7-4     2025-07-20 [1] CRAN (R 4.5.2)
 pillar           1.11.1    2025-09-17 [1] CRAN (R 4.5.2)
 pkgbuild         1.4.8     2025-05-26 [1] CRAN (R 4.5.2)
 pkgconfig        2.0.3     2019-09-22 [3] CRAN (R 4.0.1)
 pkgload          1.5.0     2026-02-03 [1] CRAN (R 4.5.2)
 plyr             1.8.6     2020-03-03 [3] CRAN (R 4.0.1)
 posterior        1.6.1     2025-02-27 [1] CRAN (R 4.5.2)
 proxy            0.4-26    2021-06-07 [3] CRAN (R 4.1.1)
 purrr            1.2.1     2026-01-09 [1] CRAN (R 4.5.2)
 QuickJSR         1.9.0     2026-01-25 [1] CRAN (R 4.5.2)
 R6               2.6.1     2025-02-15 [1] CRAN (R 4.5.2)
 RColorBrewer     1.1-3     2022-04-03 [1] CRAN (R 4.5.2)
 Rcpp           * 1.1.1     2026-01-10 [1] CRAN (R 4.5.2)
 RcppParallel     5.1.11-1  2025-08-27 [1] CRAN (R 4.5.2)
 RCurl            1.98-1.17 2025-03-22 [1] CRAN (R 4.5.2)
 readxl         * 1.4.5     2025-03-07 [1] CRAN (R 4.5.2)
 remotes          2.5.0     2024-03-17 [1] CRAN (R 4.5.2)
 reshape2         1.4.4     2020-04-09 [3] CRAN (R 4.0.1)
 rjson            0.2.23    2024-09-16 [1] CRAN (R 4.5.2)
 rlang            1.1.7     2026-01-09 [1] CRAN (R 4.5.2)
 rmarkdown        2.30      2025-09-28 [1] CRAN (R 4.5.2)
 rstan            2.32.7    2025-03-10 [1] CRAN (R 4.5.2)
 rstantools       2.6.0     2026-01-10 [1] CRAN (R 4.5.2)
 rstudioapi       0.13      2020-11-12 [3] CRAN (R 4.0.3)
 Rttf2pt1         1.3.14    2025-09-26 [1] CRAN (R 4.5.2)
 S7               0.2.1     2025-11-14 [1] CRAN (R 4.5.2)
 scales           1.4.0     2025-04-24 [1] CRAN (R 4.5.2)
 seewave        * 2.2.4     2025-08-19 [1] CRAN (R 4.5.2)
 sessioninfo      1.2.3     2025-02-05 [1] CRAN (R 4.5.2)
 signal           1.8-1     2024-06-26 [1] CRAN (R 4.5.2)
 sketchy          1.0.7     2026-03-03 [1] CRANs (R 4.5.2)
 StanHeaders      2.32.10   2024-07-15 [1] CRAN (R 4.5.2)
 stringi          1.8.7     2025-03-27 [1] CRAN (R 4.5.2)
 stringr          1.6.0     2025-11-04 [1] CRAN (R 4.5.2)
 svglite          2.1.0     2022-02-03 [3] CRAN (R 4.1.2)
 svUnit           1.0.8     2025-08-26 [1] CRAN (R 4.5.2)
 systemfonts      1.3.1     2025-10-01 [1] CRAN (R 4.5.2)
 tensorA          0.36.2.1  2023-12-13 [1] CRAN (R 4.5.2)
 testthat         3.3.2     2026-01-11 [1] CRAN (R 4.5.2)
 tibble           3.3.1     2026-01-11 [1] CRAN (R 4.5.2)
 tidybayes        3.0.7     2024-09-15 [1] CRAN (R 4.5.2)
 tidyr            1.2.0     2022-02-01 [3] CRAN (R 4.1.2)
 tidyselect       1.2.1     2024-03-11 [1] CRAN (R 4.5.2)
 tuneR          * 1.4.7     2024-04-17 [1] CRAN (R 4.5.2)
 usethis          3.2.1     2025-09-06 [1] CRAN (R 4.5.2)
 vctrs            0.7.1     2026-01-23 [1] CRAN (R 4.5.2)
 viridis        * 0.6.2     2021-10-13 [3] CRAN (R 4.1.1)
 viridisLite    * 0.4.3     2026-02-04 [1] CRAN (R 4.5.2)
 warbleR        * 1.1.37    2025-10-22 [1] CRAN (R 4.5.2)
 withr            3.0.2     2024-10-28 [1] CRAN (R 4.5.2)
 xaringanExtra    0.8.0     2024-05-19 [1] CRAN (R 4.5.2)
 xfun             0.56      2026-01-18 [1] CRAN (R 4.5.2)
 xml2             1.5.2     2026-01-17 [1] CRAN (R 4.5.2)
 yaml             2.3.12    2025-12-10 [1] CRAN (R 4.5.2)

 [1] /home/m/R/x86_64-pc-linux-gnu-library/4.5
 [2] /usr/local/lib/R/site-library
 [3] /usr/lib/R/site-library
 [4] /usr/lib/R/library
 * ── Packages attached to the search path.

──────────────────────────────────────────────────────────────────────────────