Statistical analysis

Behavioral coordination in Spix’s disk-winged bats

Author
Published

April 8, 2025

Source code and data found at https://github.com/maRce10/flight_coordination_in_Thyroptera

To do

  • Make sure all estimates are using median and MAP
  • Can we back-transform response variables to use intuitive units in the results???
  • Make sure all models run for the same number of iterations and number of chains

Purpose

  • Generate simulated data
  • Run statistical analyses

Load packages and configure global settings

Code
# knitr is require for creating html/pdf/word reports formatR is
# used for soft-wrapping code

# install/ load packages
sketchy::load_packages(packages = c("viridis", "ggplot2", "knitr",
    "readxl", "pbapply", "brms", "kableExtra", "ggridges", "tidybayes",
    "brmsish", "bayestestR", "cowplot", bioconductor = "multtest",
    "metap", "ggsignif", "extrafont"))
source("~/Dropbox/R_package_testing/brmsish/R/helpers.R")
source("~/Dropbox/R_package_testing/brmsish/R/draw_extended_summary.R")
source("~/Dropbox/R_package_testing/brmsish/R/extended_summary.R")

opts_chunk$set(fig.width = 12, fig.height = 8)

theme_set(theme_classic(base_size = 24))

fill_color <- viridis::mako(10, alpha = 0.5)[7]

graph_cols <- mako(4, begin = 0, end = 0.8)

graph_cols_transp <- mako(4, begin = 0, end = 0.8, alpha = 0.5)

names(graph_cols_transp) <- names(graph_cols) <- c("solo", "natural",
    "artificial", "simulated")

chains <- 4

iter <- 10000

options(brms.backend = "cmdstanr")

Custom functions

Code
# Coefficient of variation
cv <- function(x) sd(x) / mean(x) * 100

# Function to measure the euclidean distance in 3D space between bats in a group flight trajectory
pairwise_dist <- function(X) {
  # keep only 3D coordinates data
  xyz <- X[, grep("^x|^y|^z", names(X))]
  
  # remove empty columns
  xyz <- xyz[, sapply(xyz, function(x)
    sum(is.na(x))) < 10]
  
  # set to long format
  lng_xyz  <-
    data.frame(x = stack(xyz[, grep("^x", names(xyz))])[, 1],
               y = stack(xyz[, grep("^y", names(xyz))])[, 1],
               z = stack(xyz[, grep("^z", names(xyz))]))
  
  names(lng_xyz)[names(lng_xyz) ==  "z.values"] <- "z"
  
  lng_xyz$ind <- gsub("z_", "", lng_xyz$z.ind)
  lng_xyz$z.ind <- NULL
  # lng_xyz$frame <- X$frame
  lng_xyz$frame <- 1:nrow(X)
  
  # mean distance among all bats
  obs_dists <-
    sapply(unique(lng_xyz$frame), function(x)
      mean(dist(lng_xyz[lng_xyz$frame == x, c("x", "y", "z")])))
  
  out <-
    data.frame(
      group = X$group[1],
      group.size = length(unique(lng_xyz$ind)),
      distance = obs_dists,
      type = X$type[1],
      frame = 1:nrow(X)
    )
  
  # add group size as factor
  out$group.size.factor <- factor(out$group.size, levels = 1:10, ordered = TRUE)
  return(out)
}

# function to create simulated data getting one individual trajectory from diferent groups for a total of individual equals to group_sizes[x]
sim_group_traj <- function(group_sizes, seed, trajectories = FALSE) {
  trajs <- indiv_traj_list
  
  # randomize so each iteration is a different
  set.seed(seed)
  trajs <- sample(trajs)
  
  dists_list <- list()
  
  group_num <- 1
  
  used_trajs <- list()
  
  
  for (e in seq_along(group_sizes)) {
    i <- group_sizes[e]
    sample_traj_indx <- which(!duplicated(names(trajs)))[1:i]
    
    set.seed(group_num + seed)
    sample_trajs <- trajs[sample_traj_indx]
    
    # force all to have the same number of frames
    min_frames <- min(sapply(sample_trajs, nrow))
    
    sample_trajs <-
      lapply(sample_trajs, function(x)
        x[1:min_frames, ])
    
    sample_traj_df <- do_call(cbind, sample_trajs)
    
    names(sample_traj_df) <-
      paste(c("x", "y", "z"), rep(1:i, 3), sep = "_")
    
    sample_traj_df$group <- paste("sim", group_num, sep = "-")
    sample_traj_df$type <- "simulated"
    
    dist_df <- pairwise_dist(sample_traj_df)
    dist_df$type <- "simulated"
    
    # add to output list
    dists_list[[length(dists_list) + 1]] <- dist_df
    
    # remove sampled trajectories from trajs based on indices so idnviduals are only used once
    trajs <- trajs[-sample_traj_indx]
    group_num <- group_num + 1
    names(sample_trajs) <- paste("sim", e, 1:length(sample_trajs), sep = "-")
    used_trajs <- c(used_trajs, sample_trajs)
  }
  
  dists_df <- do.call(rbind, dists_list)
  
  if (!trajectories)
    return(dists_df) else
      return(used_trajs)
}

# function to simulate group calling from solo calling vectors
sim_group_call <- function(group_sizes, seed) {
  indv_calls <- split(time_calls_indiv, list(time_calls_indiv$year.audio))

  # randomize so each iteration is a different
  set.seed(seed)
  indv_calls <- sample(indv_calls)
  
  # dists_list <- list()
  
  # group_num <- 1
  
  # used_calls <- list()
  
  
  sim_call_list <- lapply(seq_along(group_sizes), function(i) {
    
    e <- group_sizes[i]
    # set.seed(e + seed)
    sampled_calls <- indv_calls[sample(1:length(indv_calls), e)]

    clms <- c("start", "end", "indiv", "year.audio")
  
    sampled_calls <- lapply(sampled_calls, function(x) x[, clms])
  
    sample_calls_df <- do_call(rbind, sampled_calls)
    
    sample_calls_df$sound.files <- paste(i, e, sep = "-")
    sample_calls_df$year.audio
    
    sample_calls_df$group.size <- e
      
    
    return(sample_calls_df)
  })
  
  sim_call_df <- do.call(rbind, sim_call_list)
  
      return(sim_call_df)
}



# Function to calculate contrasts based on a brms draws object
# Function to calculate contrasts based on posterior draws and plot the posterior distributions
draws_contrasts <-
  function(draws,
           predictor_name,
           basal_level,
           fill_color = "blue",
           beta.prefix = "^b_") {
    # Extract the model coefficients
    coef_names <- colnames(draws)
    intercept_name <-
      grep("^b_Intercept$", coef_names, value = TRUE)
    group_names <-
      grep(paste0(beta.prefix, predictor_name), coef_names, value = TRUE)
    
    # Determine the levels of the categorical predictor
    levels <-
      c(basal_level, sub(paste0(beta.prefix, predictor_name), "", group_names))
    
    # Calculate EMMs for each level
    group_values <- list()
    group_values[[1]] <- draws[[intercept_name]]
    for (i in seq_along(group_names)) {
      group_values[[i + 1]] <-
        draws[[intercept_name]] + draws[[group_names[i]]]
    }
    
    # Compute pairwise contrasts
    contrast_list <- list()
    contrast_names <-
      combn(levels, 2, function(x)
        paste(x[2], "-", x[1]))
    for (i in 1:length(contrast_names)) {
      contrast_list[[i]] <-
        group_values[[which(levels == strsplit(contrast_names[i], " - ")[[1]][2])]] -
        group_values[[which(levels == strsplit(contrast_names[i], " - ")[[1]][1])]]
    }
    names(contrast_list) <- contrast_names
    
    # Function to summarize contrasts
    summarize_contrast <- function(contrast) {
      mean_contrast <- mean(contrast)
      lower_contrast <- quantile(contrast, 0.025)
      upper_contrast <- quantile(contrast, 0.975)
      return(c(
        mean = mean_contrast,
        lower = lower_contrast,
        upper = upper_contrast
      ))
    }
    
    # Summarize each contrast
    summary_contrasts <-
      sapply(contrast_list, summarize_contrast, simplify = "array")
    
    # Convert summary to a data frame for plotting
    contrast_summary_df <- data.frame(
      Contrast = rep(names(contrast_list), each = 3),
      Statistic = rep(c("Mean", "Lower", "Upper"), times = length(contrast_list)),
      Value = as.vector(summary_contrasts)
    )
    
    # flip summary
    summary_contrasts <- t(summary_contrasts)
    colnames(summary_contrasts) <- c("est", "l-95% CI", "u-95% CI")
    summary_contrasts <- html_format_coef_table(as.data.frame(summary_contrasts), fill = fill_color,  highlight = TRUE)

    # Plot the posterior distributions of contrasts
    contrast_df <-
      do.call(rbind, lapply(names(contrast_list), function(name) {
        data.frame(Contrast = name, Value = contrast_list[[name]])
      }))
    
    p <- ggplot(contrast_df, aes(x = Value)) +
      geom_density(alpha = 0.6,
                   fill = fill_color,
                   color = fill_color) +
      geom_vline(xintercept = 0, linetype = "dotted") +
      facet_wrap(~ Contrast, scales = "free_y", ncol = 1) +
      theme_minimal() +
      theme(
        axis.title.y = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        plot.title = element_blank()
      )
    
    results <- list(plot = p,
                    contrasts = summary_contrasts)
    
    return(results)
  }

# to create several posterior predictive check plots out of a brms fit

custom_ppc <- function(fit, group = NULL, ndraws = 30) {
  by_group  <- if (!is.null(group)){
    TRUE 
    } else
    FALSE
  
  if (by_group)
    by_group  <-  if (any(names(fit$data) == group)){
      TRUE
      } else
    FALSE
  
  if (by_group)
    by_group <-
      if (is.character(fit$data[, group]) |
          is.factor(fit$data[, group])){
        TRUE
        } else
    FALSE
  
  
  if (by_group) {
    ppc_dens <- pp_check(fit,
                         ndraws = ndraws,
                         type = 'dens_overlay_grouped',
                         group = group)
    
    pp_mean <- pp_check(
      fit,
      type = "stat_grouped",
      stat = "mean",
      group = group,
      ndraws = ndraws
    )  + theme_classic()
    
    pp_scat <- pp_check(fit,
                        type = "scatter_avg",
                        # group = group,
                        ndraws = ndraws)
  } else {
    ppc_dens <- pp_check(fit,
                         ndraws = ndraws,
                         type = 'dens_overlay')
    
    pp_mean <- pp_check(fit,
                        type = "stat",
                        stat = "mean",
                        ndraws = ndraws) + theme_classic()
    
    pp_scat <-  pp_check(fit,
                         type = "scatter_avg",
                         ndraws = ndraws)
  }
  
  pp_stat2 <- pp_check(fit, type = "stat_2d", ndraws = ndraws)
  
  pp_plot_list <-
    list(ppc_dens, pp_mean, pp_scat,  pp_stat2)
  
  pp_plot_list[c(1, 3:4)] <-
    lapply(pp_plot_list[c(1, 3:4)], function(x)
      x  + scale_color_viridis_d(
        begin = 0.3,
        end = 0.8,
        alpha = 0.5,
        option = "mako",
      ) + scale_fill_viridis_d(
        begin = 0.3,
        end = 0.8,
        alpha = 0.5,
        option = "mako"
      ) + theme_classic())
  
  
  ppc_plot <- plot_grid(plotlist = pp_plot_list, ncol = 2)
  
  print(ppc_plot)
}

## caller sequence features
# Function to calculate entropy
entropy <- function(sequence) {
  freq <- table(strsplit(sequence, NULL)[[1]])  # Frequency of each letter
  prob <- freq / sum(freq)  # Probability of each letter
  entropy_value <- -sum(prob * log2(prob))  # Shannon entropy
  return(entropy_value)
}

# Function to calculate normalized entropy
normalized_entropy <- function(sequence) {
  H <- entropy(sequence)
  unique_elements <- length(unique(strsplit(sequence, NULL)[[1]]))
  max_entropy <- log2(unique_elements)
  H_norm <- H / max_entropy
  return(H_norm)
}

# Function to calculate entropy of a probability distribution
entropy2 <- function(probs) {
  non_zero_probs <- probs[probs > 0]  # Ignore zero probabilities
  H <- -sum(non_zero_probs * log2(non_zero_probs))
  return(H)
}

# Function to create a Markov model and calculate average entropy
markov_entropy <- function(sequence, normalize = FALSE) {
  states <- strsplit(sequence, NULL)[[1]]
  transitions <- table(head(states, -1), tail(states, -1))  # Transition counts
  transition_probs <- prop.table(transitions, 1)  # Row-wise probabilities
  
  # Calculate entropy for each row (state) in the transition matrix
  state_entropies <- apply(transition_probs, 1, entropy2)
  
  # Average entropy across all states
  avg_entropy <- mean(state_entropies, na.rm = TRUE)

  if (normalize) avg_entropy <- avg_entropy / log2(length(unique(states)))
  
  return(avg_entropy)
}

1 Flight coordination

1.1 Descriptive stats

Read data:

Code
group_traj <- read.csv("./data/raw/flight_trajectories_by_group.csv")

Number of trails: 34

Group sizes:

Code
# get number of individuals per group
n_per_group <- sapply(unique(group_traj$group), USE.NAMES = FALSE,
    function(x) {
        # get those for that group
        X <- group_traj[group_traj$group == x, ]

        # count number of individuals (No NAs)
        n_indiv <- (sum(!sapply(X, anyNA)) - 3)/3

        return(n_indiv)
    })

table(n_per_group)
n_per_group
2 3 4 5 6 
7 8 7 8 4 

1.2 Compute pairwise distances for natural and artificial groups

Code
pairwise_dists_list <- sapply(split(group_traj, group_traj$group),
    pairwise_dist, simplify = FALSE)

pairwise_dists <- do.call(rbind, pairwise_dists_list)

# remove those with unlikely distance
pairwise_dists <- pairwise_dists[pairwise_dists$distance > 0.1 & pairwise_dists$distance <
    7, ]

1.3 Generate simulated group flight trajectories

Code
## split into single individual trajectories split in a list
split_group_traj <- split(group_traj, group_traj$group)

# get single individual trajectores
indiv_traj_list <- lapply(split_group_traj, function(x) {
    x <- x[, sapply(x, function(x) sum(is.na(x))) < 10]

    # get number of individuals
    suppressWarnings(indv_num <- unique(na.omit(as.numeric(gsub(".*?([0-9]+).*",
        "\\1", names(x))))))

    indiv_trajs <- lapply(indv_num, function(y) x[, grep(y, names(x))])

    return(indiv_trajs)
})

indiv_traj_list <- unlist(indiv_traj_list, recursive = FALSE, use.names = TRUE)

# remove last character of the list names so all have the
# original group name
names(indiv_traj_list) <- gsub(".$", "", names(indiv_traj_list))

# mean number of individuals per group size in original data
mean_n_indiv <- floor(mean(table(n_per_group)))

group_sizes <- as.numeric(names(table(n_per_group)))

group_sizes <- rep(group_sizes, each = mean_n_indiv)

sim_dists <- lapply(1:30, function(x) sim_group_traj(group_sizes,
    seed = x))

sim_trajs_list <- lapply(1:30, function(x) sim_group_traj(group_sizes,
    seed = x, trajectories = TRUE))

# remove those with unlikely distance
sim_dists <- lapply(sim_dists, function(x) {
    x <- x[x$distance > 0.1 & x$distance < 7.81, ]
    return(x)
})

1.4 Describe data

Mean distance between individuals in a group flight:

Code
sim_dists_df <- do.call(rbind, sim_dists)
# mean distance by group type
agg <- aggregate(distance ~ type, sim_dists_df, mean)
agg$sd <- aggregate(distance ~ type, sim_dists_df, sd)$distance

# mean distance by group type
agg2 <- aggregate(distance ~ type, pairwise_dists, mean)
agg2$sd <- aggregate(distance ~ type, pairwise_dists, sd)$distance

rbind(agg, agg2)
type distance sd
simulated 2.050082 0.5696759
artificial 2.005765 0.6803455
natural 1.670052 0.7256127

Closest distance between individuals in a group flight:

Code
sim_dists_df <- do.call(rbind, sim_dists)

# mean distance by group type
agg_min <- aggregate(distance ~ type + group, sim_dists_df, min)

agg <- aggregate(distance ~ type, agg_min, mean)
agg$sd <- aggregate(distance ~ type, agg_min, sd)$distance

# mean distance by group type
agg_min2 <- aggregate(distance ~ type + group, pairwise_dists, min)
agg2 <- aggregate(distance ~ type, agg_min2, mean)
agg2$sd <- aggregate(distance ~ type, agg_min2, sd)$distance

rbind(agg, agg2)
type distance sd
simulated 0.6592624 0.3312211
artificial 0.9213170 0.5873041
natural 0.7190676 0.3595199

1.5 Plot raw data

1.5.1 Reduce dimensionality

1.6 Generate simulated group flight trajectories

Code
## split into single individual trajectories
# split in a list
split_group_traj <- split(group_traj, group_traj$group)

# pca fun
fun1 <- function(x) prcomp(x, center = FALSE, scale. = FALSE)$x[,1]

# horizontal plane fun
# fun1 <- function(x) x[,3]

indiv_traj_pc1 <- lapply(indiv_traj_list, fun1)



pca_list <- lapply(unique(names(indiv_traj_pc1)), function(x){
  
  pca_df <- data.frame(indiv_traj_pc1[names(indiv_traj_pc1) == x])
  names(pca_df) <- paste0("ind", 1:ncol(pca_df))
  
  stacked_pca_df <- stack(pca_df)
  stacked_pca_df$frame <- 1:nrow(pca_df)
  stacked_pca_df$group <- x 
  stacked_pca_df$group.size <- ncol(pca_df)
  return(stacked_pca_df)
})

pca_df <- do.call(rbind, pca_list)


# for simulated data
sim_traj_pc1 <- lapply(sim_trajs_list[[1]], function(x) x[,3])

# pca_sim_list <- lapply(unique, function(x){
pca_sim_list <- lapply(paste("sim", 1:30, "", sep = "-"), function(x){
  
  pca_df <- data.frame(sim_traj_pc1[grep(pattern = x, names(sim_traj_pc1))])
  names(pca_df) <- paste0("ind", 1:ncol(pca_df))
  
  stacked_pca_df <- stack(pca_df)
  stacked_pca_df$frame <- 1:nrow(pca_df)
  stacked_pca_df$group <- x 
  stacked_pca_df$group.size <- ncol(pca_df)
  return(stacked_pca_df)
})


pca_sim_df <- do.call(rbind, pca_sim_list)
pca_sim_df$type <- "simulated"

pca_df$type <- sapply(pca_df$group, function(x) pairwise_dists$type[group_traj$group == x][1])


#bind
pca_df <- rbind(pca_df, pca_sim_df)


# order groups by type
pca_df$group <- factor(pca_df$group, levels = unique(pca_df$group[order(pca_df$type)]))

pca_df <- pca_df[complete.cases(pca_df), ]

pca_df$type <- factor(pca_df$type, levels = c("natural", "artificial", "simulated"))

# table(pca_df$type, pca_df$group.size)
#get 3 names of group for each type level

set.seed(1)

selected_groups <- lapply(unique(pca_df$type), function(x) {
  unique(pca_df$group[pca_df$type == x & pca_df$group.size == 4])[1:3]
})


pca_df <- pca_df[pca_df$frame < 26, ]

ggplot(pca_df[pca_df$group %in% unlist(selected_groups), ], aes(x = frame, y = values, group = ind, color = type)) + 
  # geom_point() + 
  geom_smooth(se = FALSE, span = 0.3) + 
  theme_classic() + 
  # theme(legend.position = "none") + 
  labs(x = "Frame", y = "Horizontal plane\nposition (m)") +
  # ylim(c = c(-3, 3)) +
    facet_wrap( dir = "v",
        type~ group, #scales = "free_y",
        ncol = 3) +
  theme(
  strip.background = element_blank(),
  strip.text.x = element_blank()
) +
     scale_color_viridis_d(option = "G",
                           end = 0.8,
                           alpha = 0.6)

1.6.1 Raw distances

All group sizes combined:

Code
# add original data to simulated data and calculate proximity
nat_sim_dist_list <- lapply(sim_dists, function(x) {
  return(rbind(x, pairwise_dists))
})

# remove distances above 10 m
nat_sim_dist_list <- lapply(nat_sim_dist_list, function(x) {
  x <- x[x$distance < 10, ]
  x$type <-
    factor(x$type, levels = c("natural", "artificial", "simulated"))
  return(x)
})

dat_all <- dat <- nat_sim_dist_list[[1]]

dat_all$group.size <- "All sizes"

dat <- rbind(dat, dat_all)

dat$group.size.factor <-
  factor(dat$group.size, levels = c(1:10, "All sizes"))

n_grps <-
  aggregate(group ~ group.size.factor + type, dat, function(x)
    length(unique(x)))

n_grps$frames <-
  aggregate(group ~ group.size.factor + type, dat, function(x)
    length(x))$group


n_grps$indivs <-
  n_grps$group * as.numeric(as.character(n_grps$group.size.factor))


n_grps$group.size.factor <- factor(n_grps$group.size.factor)
n_grps$distance <-
  ifelse(n_grps$group.size.factor %in% 2:3,-1.2,-0.9)
n_grps$distance[1] <- -0.8

n_grps$indivs[n_grps$group.size.factor == "All sizes"] <-
  aggregate(indivs ~ type, n_grps, sum)$indivs

n_grps$n.labels <-
  paste0(n_grps$group, " / ", n_grps$indivs, " / ", n_grps$frames)
n_grps$n.labels[1] <-
  " n = 3 groups /\n 6 individuals /\n 149 frames"
n_grps$n.labels[6] <-
  " n = 18 groups /\n 74 individuals /\n 888 frames"
n_grps$distance[6] <- -0.3
n_grps$distance[c(11, 17)] <- -0.4

custom_labels <- unique(nat_sim_dist_list[[1]]$group.size)
custom_labels <-
  paste(c("Group size = ", rep("", 5)), custom_labels)
names(custom_labels) <- unique(nat_sim_dist_list[[1]]$group.size)



custom_labels <- unique(dat$group.size)
custom_labels <-
  paste(c("Group size = ", rep("", 5)), custom_labels)
names(custom_labels) <- unique(dat$group.size)

# mean distance by group type
agg_gp <-
  aggregate(distance ~ type + group + group.size.factor, dat, mean)

agg_gp$group.size.factor <- factor(agg_gp$group.size.factor)

# raincloud plot:
gg_all <- ggplot(dat[dat$group.size == "All sizes", ],
                 aes(
                   y = distance,
                   x = type,
                   color = type,
                   fill = type
                 )) +
  geom_signif(
    y_position = c(4.35, 4.75),
    xmin = c(1, 1),
    xmax = c(2, 3),
    annotation = c('beta:0.32~ (0.19 - 0.45)', 'beta:0.36~ (0.24 - 0.47)'), 
    parse = TRUE,
    tip_length = 0.02,
    textsize = 3,
    size = 0.5,
    color = "gray60",
    vjust = 0.2
  ) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    # fill = fill_color,
    alpha = 0.5,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
  geom_boxplot(# fill = fill_color,
    width = .15,
    # remove outliers
    outlier.shape = NA) +
  geom_boxplot(
    fill = adjustcolor("orange", alpha.f = 0.1),
    data = agg_gp[!agg_gp$group.size %in% 2:5,],
    aes(y = distance, x = type),
    color = adjustcolor("orange", alpha.f = 0.6),
    width = .1,
    # remove outliers
    outlier.shape = NA
  ) +
  # add justified jitter from the {gghalves} package
  gghalves::geom_half_point(
    # color = fill_color,
    # draw jitter on the left
    side = "l",
    # control range of jitter
    range_scale = .4,
    # add some transparency
    alpha = .5,
    transformation = ggplot2::position_jitter(height = 0)
  ) +
  # scale_color_viridis_d(option = "G", end = 0.8) +
  # scale_fill_viridis_d(option = "G",
  #                      end = 0.8,
  #                      alpha = 0.6) +
  scale_color_manual(values = graph_cols[2:4]) +
  scale_fill_manual(values = graph_cols_transp[2:4]) +
  gghalves::geom_half_point(
    data = agg_gp[agg_gp$group.size != "All sizes", ],
    aes(y = distance, x = type),
    color = "orange",
    # draw jitter on the left
    side = "l",
    pch = 20,
    size = 4,
    # control range of jitter
    range_scale = .4,
    # add some transparency
    alpha = .5,
    transformation = ggplot2::position_jitter(height = 0)
  ) +
  ylim(c(-0.4, 4.8)) +
  scale_x_discrete(labels = c(
    "natural" = "Natural",
    "artificial" = "Artificial",
    "simulated" = "Simulated"
  )) +
  theme(legend.position = "none") +
  labs(x = "Flight type", y = "Distance (m)") +
  facet_wrap(~ group.size,
             ncol = 1,
             labeller = labeller(group.size = c("All sizes" = "All group sizes"))) +
  geom_text(lineheight = 0.8,
    data = n_grps[!n_grps$group.size.factor %in% 2:6, ],
    aes(y = distance, x = type, label = n.labels),
    color = "gray60",
    nudge_x = 0,
    size = 3
  )

# add significance
gg_all

Code
# raincloud plot:
gg_gs <- ggplot(dat[dat$group.size %in% 2:5,],
                aes(
                  y = distance,
                  x = type,
                  color = type,
                  fill = type
                )) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    # fill = fill_color,
    alpha = 0.6,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
  # add justified jitter from the {gghalves} package
  gghalves::geom_half_point(
    # color = fill_color,
    # draw jitter on the left
    side = "l",
    # control range of jitter
    range_scale = .4,
    # add some transparency
    alpha = .3,
    transformation = ggplot2::position_jitter(height = 0)
  ) +
  # scale_color_viridis_d(option = "G", end = 0.8) +
  # scale_fill_viridis_d(option = "G",
  #                      end = 0.8,
  #                      alpha = 0.6) +
    scale_color_manual(values = graph_cols[2:4]) +
  scale_fill_manual(values = graph_cols_transp[2:4]) +
  gghalves::geom_half_point(
    data = agg_gp[agg_gp$group.size %in% 2:5, ],
    aes(y = distance, x = type),
    color = "orange",
    # draw jitter on the left
    side = "l",
    pch = 20,
    size = 4,
    # control range of jitter
    range_scale = .4,
    # add some transparency
    alpha = .5,
    transformation = ggplot2::position_jitter(height = 0)
  ) +
  geom_boxplot(# fill = fill_color,
    width = .15,
    # remove outliers
    outlier.shape = NA) +
  geom_boxplot(
     fill = adjustcolor("orange", alpha.f = 0.1),
    data = agg_gp[agg_gp$group.size %in% 2:5, ],
    aes(y = distance, x = type),
    color = adjustcolor("orange", alpha.f = 0.6),
    width = .1,
    # remove outliers
    outlier.shape = NA
  ) +
  ylim(c(-1.2, 4.8)) +
  scale_x_discrete(labels = c(
    "natural" = "Natural",
    "artificial" = "Artificial",
    "simulated" = "Simulated"
  )) +
  theme(legend.position = "none") +
  facet_wrap(
    ~ group.size.factor,
    ncol = 2,
    labeller = labeller(group.size.factor = custom_labels)
  ) +
  labs(x = "Flight type", y = "Pairwise flight distance (m)") +
  geom_text(lineheight = 0.8,
    data = n_grps[n_grps$group.size.factor %in% 2:5, ],
    aes(y = distance, x = type, label = n.labels),
    color = "gray60",
    nudge_x = 0,
    size = 2.3
  )

# gg_gs

pg <-
  plot_grid(
    gg_gs + theme_classic(base_size = 15) + theme(
      legend.position = "none",
      axis.text.x = element_text(angle = 45, hjust = 1),
      axis.title.x = element_blank()
    ),
    gg_all + theme_classic(base_size = 15) + theme(legend.position = "none", axis.title.y = element_blank()) + labs(x = ""),
    ncol = 2
  )



# Add a common x-axis label
pg <- ggdraw(pg) +
  draw_label(
    "Group type",
    x = 0.51,
    y = 0.02,
    vjust = 0,
    size = 15
  )


hedFont <- "Arial"
pg <- pg + 
  theme(plot.title = element_text(size = 20, family = hedFont, face = "bold"))
                                           
pg

Code
ggsave(
  "./output/flight_coordination_5_panels.png",
  pg,
  grDevices::png,
  width = 9,
  height = 4,
  dpi = 300
)

1.7 Regression model

  • Add the group roost entry data to each simulated set and run a bayesian mixed effects model coordination on roost entry for each data set

  • The mean pairwise spatial distance between individuals at each frame was used as the response variable, while group type (natural, artificial or simulated) was used as predictor

  • Group ID and group size were included as varying effects.

  • The posterior distributions from the 30 models were combined into a single model fit for statistical inference

Model:

\[ \text{mean pairwise distance} \sim \text{type} + \text{monotonic(group size)} + (1 \mid \text{group}) + \text{arma(frame)} \]

Code
mods_flight_coord <- brm_multiple(
  formula = distance ~ type + mo(group.size.factor) + ar(p = 2, time = frame, gr = group),
  iter = iter,
  thin = 1,
  data = nat_sim_dist_list,
  family = gaussian(),
  silent = 2,
  chains = chains,
  backend = "cmdstanr",
  # only works if cmdstanr package is installed
  cores = chains,
  combine = FALSE,
  control = list(adapt_delta = 0.99, max_treedepth = 15)
)

custom_ppc(fit = mods_flight_coord[[1]], group = "type")

mods_flight_coord <- pblapply(mods_flight_coord,  function(x) add_criterion(x, criterion = c("loo")))

saveRDS(mods_flight_coord,
        "./data/processed/model_list_flight_coordination_monotonic.RDS")

null_mods_flight_coord <- brm_multiple(
  formula = distance ~ 1 + ar(p = 2, time = frame, gr = group),
  iter = iter,
  thin = 1,
  data = nat_sim_dist_list,
  family = gaussian(),
  silent = 2,
  chains = chains,
  backend = "cmdstanr",
  # only works if cmdstanr package is installed
  cores = chains,
  combine = FALSE,
  control = list(adapt_delta = 0.99, max_treedepth = 15)
)

null_mods_flight_coord <- pblapply(null_mods_flight_coord,  function(x) add_criterion(x, criterion = c("loo")))

saveRDS(null_mods_flight_coord,
        "./data/processed/nullmodel_list_flight_coordination_monotonic.RDS")

1.7.1 Results

1.7.1.1 Model performance vs null model

Mean ELPD and associated standard deviation between each model and its correspondent null model:

Code
mods_flight_coord <- readRDS("./data/processed/model_list_flight_coordination_monotonic.RDS")
null_mods_flight_coord <- readRDS("./data/processed/nullmodel_list_flight_coordination_monotonic.RDS")

loo_diffs <- lapply(seq_along(mods_flight_coord), function(x) loo::loo_compare(mods_flight_coord[[x]],
    null_mods_flight_coord[[x]]))

loo_diff <- do.call(rbind, loo_diffs)

rows <- rownames(loo_diff)
loo_diff <- as.data.frame(loo_diff)

loo_diff$model <- rows

aggregate(cbind(elpd_diff, se_diff) ~ model, loo_diff, mean)
model elpd_diff se_diff
mods_flight_coord[[x]] 0.00000 0.000000
null_mods_flight_coord[[x]] -21.90503 7.252173

1.7.1.2 Model fit

Code
# average model
avrg_call <- paste0("posterior_average(", paste0(paste0("mods_flight_coord[[",
    1:length(mods_flight_coord), "]]"), collapse = ", "), ", weights = 'loo')")

average_model_draws <- eval(parse(text = avrg_call))

draw_extended_summary(average_model_draws, highlight = TRUE, fill = fill_color,
    beta.prefix = c("^b_", "^bsp_mo"))
Estimate l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
b_Intercept 1.543 1.422 1.658 1 13408.43 12889.18
b_typeartificial 0.324 0.190 0.457 1 18885.33 14627.23
b_typesimulated 0.355 0.244 0.465 1 19059.53 14710.03
bsp_mogroup.size.factor 0.066 0.033 0.102 1 13809.64 14422.19

1.7.1.3 Contrasts

Code
contrasts <- draws_contrasts(draws = average_model_draws, predictor_name = "type",
    basal_level = "natural", fill_color = fill_color)

contrasts$contrasts
est l-95% CI u-95% CI
artificial - natural -0.324 -0.457 -0.190
simulated - natural -0.355 -0.465 -0.244
simulated - artificial -0.031 -0.162 0.098
Code
contrasts$plot + theme_classic()

1.7.1.4 Posterior predictive checks

Code
custom_ppc(readRDS("./data/processed/model_list_flight_coordination.RDS")[[1]],
    group = "type")

Takeaways

  • Flight coordination is higher in natural groups than in artificial and simulated groups
  • Flight coordination is similar in artificial and simulated groups
  • Flight distance between individuals increases with group size

2 Roost entry coordination

2.1 Data description

Code
dat <- as.data.frame(read_excel(path = "./data/raw/Anexo 1_Entrada a refugios-2022.xlsx"))

dat$entry.time <- as.numeric(dat$`Tiempo real`)
Warning: NAs introduced by coercion
Code
# remove missing data
dat <- dat[!is.na(dat$entry.time), ]

# rename treatment levels
dat$type <- dat$`Tipo grupo`

dat$type[dat$type == "Real"] <- "Natural"
dat$type[dat$type == "Mixto"] <- "Artificial"
dat$group <- dat$Grupo

Number of individuals per type:

Code
table(dat$type)

Artificial Individual    Natural 
        94         34        124 

Number of individuals per type removing missing data (NAs and Inf):

Code
(table(dat$type[!is.infinite(dat$entry.time) & !is.na(dat$entry.time)]))

Artificial Individual    Natural 
        65         17         95 

Individuals per experiment:

Code
ind_per_video <- table(dat$Video[dat$type != "Individual"])

table(ind_per_video)
ind_per_video
 2  3  4  5  6  7 
 8 16 14 17  1  1 

2.2 Estimating roost entry time lapse

Calculate time lapse between the first individual to enter the roost for each group:

Code
group_dat <- dat[!is.infinite(dat$entry.time) & !is.na(dat$entry.time) &
    dat$type != "Individual", ]

# get time difference between entries
group_dat_l <- lapply(unique(group_dat$group), function(x) {
    # print(x)
    X <- group_dat[group_dat$group == x, ]
    X <- X[!is.na(X$entry.time), ]
    X <- X[order(X$entry.time), ]
    # X$entry.time.diff <- X$entry.time - min(X$entry.time)
    X$entry.time.diff <- c(NA, X$entry.time[-1] - X$entry.time[-nrow(X)])
    X <- X[!is.na(X$entry.time.diff), ]

    # add 1 milisecond to 0
    X$entry.time.diff[X$entry.time.diff == 0] <- NA

    X$group.size <- if (nrow(X) > 0)
        nrow(X) - 1 else vector()
    return(X)
})

group_dat <- do.call(rbind, group_dat_l)

2.3 Simulating entry time lapse data

Simulated 30 data sets, each one with a number of simulated groups for each group size (2 to 5 individuals) similar to those in the natural group data. This approach allowed us to propagate uncertainty from random group simulations into our statistical inference.

Code
group_dat <- dat[!is.infinite(dat$entry.time) & !is.na(dat$entry.time) &
    dat$type != "Individual", ]


# get difference to first entry
group_dat_l <- lapply(unique(group_dat$group), function(x) {
    # print(x)
    X <- group_dat[group_dat$group == x, ]
    X <- X[!is.na(X$entry.time), ]
    X <- X[order(X$entry.time), ]
    # X$entry.time.diff <- X$entry.time - min(X$entry.time)
    X$entry.time.diff <- c(NA, X$entry.time[-1] - X$entry.time[-nrow(X)])
    X <- X[!is.na(X$entry.time.diff), ]

    X$group.size <- if (nrow(X) > 0)
        nrow(X) + 1 else vector()
    return(X)
})

group_dat <- do.call(rbind, group_dat_l)


group_dat$type <- factor(group_dat$type, levels = c("Artificial",
    "Natural", "Simulated"))

# make random groups from individual flights
indiv_dat <- dat[!is.infinite(dat$entry.time) & !is.na(dat$entry.time) &
    dat$type == "Individual", ]

indivs <- unique(indiv_dat$Individuo)

group_sizes <- group_dat$group.size[!duplicated(group_dat$Video)]

# use only group sizes 2:5
group_sizes <- group_sizes[group_sizes <= 5]

table(group_sizes)
group_sizes
2 3 4 5 
6 4 9 8 
Code
# simulate group entries
sim_groups_l <- pblapply(1:30, cl = 10, function(x) {
    # randomize order of distribution of individuals per
    # experiment
    set.seed(x)
    g_size <- sample((group_sizes), length(group_sizes)/2)

    # sampled_indivs <- sample(indivs, sum(g_size), replace =
    # TRUE)
    names(g_size) <- paste("sim", x, 1:length(g_size), sep = "-")

    # indivs_split <- split(sampled_indivs, f =
    # unlist(sapply(seq_along(g_size), function(x)
    # rep(names(g_size)[x], g_size[x]))))

    set.seed(x)
    indivs_split <- lapply(g_size, function(w) sample(indivs, w))

    sub_sim_groups_l <- lapply(1:length(indivs_split), function(y) {
        sim_group <- indiv_dat[indiv_dat$Individuo %in% indivs_split[[y]],
            ]
        sim_group$group <- names(g_size)[y]

        sim_group <- sim_group[!is.na(sim_group$entry.time), ]
        sim_group <- sim_group[order(sim_group$entry.time), ]
        # sim_group$entry.time.diff <- sim_group$entry.time -
        # min(sim_group$entry.time)
        sim_group$entry.time.diff <- c(NA, sim_group$entry.time[-1] -
            sim_group$entry.time[-nrow(sim_group)])
        sim_group <- sim_group[!is.na(sim_group$entry.time.diff),
            ]

        sim_group$entry.time.diff[sim_group$entry.time.diff > 300] <- 300
        # sim_group$entry.time.diff <- sim_group$entry.time -
        # min(sim_group$entry.time) sim_group <-
        # sim_group[-which.min(sim_group$entry.time.diff),]
        sim_group$group.size <- nrow(sim_group) + 1
        sim_group$type <- factor("Simulated")

        return(sim_group)
    })

    sub_sim_groups <- do.call(rbind, sub_sim_groups_l)

    # bind excluding first individual
    out <- rbind(sub_sim_groups, group_dat[group_dat$group.size %in%
        2:5 & group_dat$entry.time.diff > 0.001, ])

    return(out)
})

sim_group_n <- sapply(sim_groups_l, function(x) length(unique(x$group[x$type ==
    "Simulated"])))

all(sim_group_n == 4)
[1] FALSE
Code
sim_group_dat <- do.call(rbind, lapply(sim_groups_l, function(x) x[x$type ==
    "Simulated", ]))

sim_group_dat <- rbind(sim_group_dat, group_dat)


# also make group.size.factor
sim_groups_l <- lapply(sim_groups_l, function(x) {
    x$type <- factor(x$type, levels = c("Natural", "Artificial", "Simulated"))
    x$group.size.factor <- factor(x$group.size)
    return(x)
})

2.4 Plot raw data

All group sizes combined:

Code
# custom_labels <- unique(sim_groups_l[[3]]$group.size)
# custom_labels <- paste("group size = ", custom_labels)
# names(custom_labels) <- unique(sim_groups_l[[3]]$group.size)

# table(call_rate_by_group$group.size, call_rate_by_group$type)
cols <- graph_cols[2:4]
cols_transp <- graph_cols_transp[2:4]

names(cols) <- names(cols_transp) <- c("Natural", "Artificial", "Simulated")

# raincloud plot:
gg_entry <- ggplot(sim_groups_l[[1]],
       aes(
         y = entry.time.diff,
         x = type,
         color = type,
         fill = type
       )) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    # fill = fill_color,
    alpha = 0.5,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
  geom_boxplot(# fill = fill_color,
    width = .15,
    # remove outliers
    outlier.shape = NA) +
    # add justified jitter from the {gghalves} package
    gghalves::geom_half_point(
      # color = fill_color,
      # draw jitter on the left
      side = "l",
      # control range of jitter
      range_scale = .4,
      # add some transparency
      alpha = .5,
      transformation = ggplot2::position_jitter(height = 0)
    ) +
  scale_color_manual(values = cols) +
  scale_fill_manual(values = cols_transp) +
        # scale_color_viridis_d(option = "G", end = 0.8) +
      # scale_fill_viridis_d(option = "G",
      #                      end = 0.8,
      #                      alpha = 0.6) +
      # ylim(c(-0.1, 4.8)) +
           scale_x_discrete(labels = c(
        "natural" = "Natural",
        "artificial" = "Artificial",
        "simulated" = "Simulated"
      )) +
      theme(legend.position = "none") +
      labs(x = "Group type", y = "Time difference (s)")
   
agg_grp_time <- aggregate(entry.time.diff ~ type + group, sim_groups_l[[1]], mean)

n_grps <-
  aggregate(group ~ type + group.size, sim_groups_l[[1]], function(x)
    length(unique(x)))

n_grps$indivs <-
  n_grps$group * n_grps$group.size

agg_grps <- aggregate(group ~ type, n_grps, sum)

agg_grps$indivs <- aggregate(indivs ~ type, n_grps, sum)$indivs

agg_grps$n.labels <-
  paste0(agg_grps$group, " / ", agg_grps$indivs)
agg_grps$n.labels[1] <-
  " n = 13 groups /\n 50 individuals"
agg_grps$distance <- -15


gg_entry <- gg_entry + ylim(c(-20, 210)) +
     geom_signif(
    y_position = c(170, 190, 210),
    xmin = c(1, 1, 2),
    xmax = c(3, 2, 3),
    annotation = c('beta:-1.71~ (-2.31 - -1.103)', 'beta:-0.99~ (-1.59 - -0.40)', 'beta:-0.72 ~ (-1.30 - -0.12)'), 
    parse = TRUE,
    tip_length = 0.02,
    textsize = 3,
    size = 0.5,
    color = "gray60",
    vjust = 0.2
  ) +
   gghalves::geom_half_point(
    data = agg_grp_time,
    aes(y = entry.time.diff, x = type),
    color = "orange",
    # draw jitter on the left
    side = "l",
    pch = 20,
    size = 4,
    # control range of jitter
    range_scale = .4,
    # add some transparency
    alpha = .5,
    transformation = ggplot2::position_jitter(height = 0)
  ) +
    geom_boxplot(
    fill = adjustcolor("orange", alpha.f = 0.1),
    data = agg_grp_time,
    aes(y = entry.time.diff, x = type),
    color = adjustcolor("orange", alpha.f = 0.6),
    width = .1,
    # remove outliers
    outlier.shape = NA
  ) +
  geom_text(lineheight = 0.8,
    data = agg_grps,
    aes(y = distance, x = type, label = n.labels),
    color = "gray60",
    nudge_x = 0,
    size = 4
  ) + theme_classic(base_size = 12) + theme(legend.position = "none")

ggsave(
  "./output/roost_entry_coordination_1_panel.png",
  gg_entry,
  grDevices::png,
  width = 4.5,
  height = 3.5,
  dpi = 300
)

By group size:

Code
# raincloud plot:
ggplot(sim_groups_l[[1]],
       aes(
         y = entry.time.diff,
         x = type,
         color = type,
         fill = type
       )) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    # fill = fill_color,
    alpha = 0.5,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
  geom_boxplot(# fill = fill_color,
    width = .15,
    # remove outliers
    outlier.shape = NA) +
    # add justified jitter from the {gghalves} package
    gghalves::geom_half_point(
      # color = fill_color,
      # draw jitter on the left
      side = "l",
      # control range of jitter
      range_scale = .4,
      # add some transparency
      alpha = .5,
      transformation = ggplot2::position_jitter(height = 0)
    ) +
      scale_color_viridis_d(option = "G", end = 0.8) +
      scale_fill_viridis_d(option = "G",
                           end = 0.8,
                           alpha = 0.6) +
      # ylim(c(-0.1, 4.8)) +
           scale_x_discrete(labels = c(
        "natural" = "Natural",
        "artificial" = "Artificial",
        "simulated" = "Simulated"
      )) +
      theme(legend.position = "none") +
      facet_wrap(
        ~ group.size.factor,
        ncol = 2,
        labeller = labeller(group.size.factor = custom_labels)
      ) +
      labs(x = "Flight type", y = "Time difference (s)")

2.5 Regression model

  • Add the group roost entry data to each simulated set and run a bayesian mixed effects model coordination on roost entry for each data set

  • The difference in time between when entering the roost between consecutive individuals was used as the response variable (modeled with a normal distribution), while group type (natural, artificial or simulated) was used as predictor

  • Group ID was included as varying effect

  • The posterior distributions from the 30 models were combined into a single model fit for statistical inference

Model:

\[ \text{log(entry time difference + 1)} \sim \text{type} + \text{monotonic(group size)} + (\text{1} \mid \text{group}) \]

Code
entry_mod_list <- brm_multiple(
  formula = log(entry.time.diff + 1) ~ type + mo(group.size) + (1 | group),
  iter = iter,
  thin = 1,
  data = sim_groups_l,
  family = gaussian(),
  silent = 2,
  chains = chains,
  backend = "cmdstanr",
  # only works if cmdstanr package is installed
  threads = threading(2),
  cores = chains,
  combine = FALSE,
  control = list(adapt_delta = 0.99, max_treedepth = 15)
)

custom_ppc(fit = entry_mod_list[[1]], group = "type")

entry_mod_list <- pblapply(entry_mod_list,  function(x) add_criterion(x, criterion = c("loo")))

saveRDS(entry_mod_list,
        "./data/processed/model_list_roost_entry_time_regression_monotonic.RDS")

# run null model
null_entry_mod_list <- brm_multiple(
  formula = log(entry.time.diff + 1) ~ 1 + (1 | group),
  iter = iter,
  thin = 1,
  data = sim_groups_l,
  family = gaussian(),
  silent = 2,
  chains = chains,
  backend = "cmdstanr",
  # only works if cmdstanr package is installed
  threads = threading(2),
  cores = chains,
  combine = FALSE,
  control = list(adapt_delta = 0.99, max_treedepth = 15)
)

null_entry_mod_list <- pblapply(null_entry_mod_list,  function(x) add_criterion(x, criterion = c("loo")))

# loo_compare(entry_mod_list[[1]], null_entry_mod_list[[1]])

saveRDS(null_entry_mod_list,
        "./data/processed/null_model_list_roost_entry_time_regression_monotonic.RDS")

2.5.1 Results

2.5.1.1 Model performance vs null model

Code
model_list_roost_entry_time_regression <- readRDS("./data/processed/model_list_roost_entry_time_regression_monotonic.RDS")

null_model_list_roost_entry_time_regression <- readRDS("./data/processed/null_model_list_roost_entry_time_regression_monotonic.RDS")

loo_diffs <- lapply(seq_along(model_list_roost_entry_time_regression),
    function(x) loo::loo_compare(model_list_roost_entry_time_regression[[x]],
        null_model_list_roost_entry_time_regression[[x]]))

loo_diff <- do.call(rbind, loo_diffs)

rows <- rownames(loo_diff)
loo_diff <- as.data.frame(loo_diff)

loo_diff$model <- rows

aggregate(cbind(elpd_diff, se_diff) ~ model, loo_diff, mean)
model elpd_diff se_diff
model_list_roost_entry_time_regression[[x]] 0.00000 0.000000
null_model_list_roost_entry_time_regression[[x]] -14.07826 4.578788

2.5.1.2 Model fit

Code
# average model
avrg_call <- paste0("posterior_average(", paste0(paste0("model_list_roost_entry_time_regression[[",
    1:length(model_list_roost_entry_time_regression), "]]"), collapse = ", "),
    ", weights = 'loo')")

average_model_draws <- eval(parse(text = avrg_call))

draw_extended_summary(average_model_draws, highlight = TRUE, fill = fill_color,
    beta.prefix = c("^b_", "^bsp_mo"))
Estimate l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
b_Intercept 2.638 1.861 3.439 1 8933.489 12093.30
b_typeArtificial 0.990 0.405 1.585 1 14848.445 14559.90
b_typeSimulated 1.703 1.103 2.311 1 11169.393 14767.39
bsp_mogroup.size -0.331 -0.576 -0.078 1 9756.350 12736.37

2.5.1.3 Contrasts

Code
contrasts <- draws_contrasts(average_model_draws, predictor_name = "type",
    basal_level = "Natural", fill_color = fill_color)

contrasts$contrasts
est l-95% CI u-95% CI
Artificial - Natural -0.990 -1.585 -0.405
Simulated - Natural -1.705 -2.311 -1.103
Simulated - Artificial -0.715 -1.299 -0.121
Code
contrasts$plot + theme_classic()

2.5.1.4 Posterior predictive checks

Code
custom_ppc(fit = model_list_roost_entry_time_regression[[1]], group = "type")

Takeaways

  • Natural groups enter more coordinated than simulated and artificial groups
  • Artificial groups enter more coordinated than simulated groups
  • Entry time difference decreases with group size

3 Calling behavior

3.1 Read data

Code
clls <- readRDS("./data/processed/curated_extended_selection_table_inquiry_calls_2020_&_2021.RDS")

sub_clls <- clls[!duplicated(clls$org.sound.files), ]
sub_clls$file.duration <- sub_clls$sound.file.samples/(sub_clls$sample.rate *
    1000)
# metadat2 <-
# read.csv('./data/processed/metadata_inquiry_calls_2020_&_2021.csv',
# stringsAsFactors = FALSE)

metadat <- as.data.frame(read_excel("./data/raw/Anexo 1_Proyecto MPI enero 20-21.xlsx"))
names(metadat) <- gsub(" ", ".", names(metadat))
metadat$year <- substr(metadat$Día, 0, 4)

metadat$year[is.na(metadat$year)] <- "2021"
metadat <- metadat[metadat$tipo.de.video != "calibracion de video",
    ]

# audio solamente para identificacion de sonidos. TASA NO
metadat <- metadat[metadat$Audio != "60 y 61", ]

metadat$year.audio <- paste(metadat$year, metadat$Audio, sep = "-")

caps <- as.data.frame(read_excel("./data/raw/Proyecto MPI enero 2020_2.xlsx",
    sheet = "Capturas"))

# read acoustic parameter data
acous_param_l <- readRDS("./data/processed/acoustic_parameters_all_groups_specific_warbler_acoustic_measurements_curated_data_2020_&_2021.RDS")

# read as RDS
agg_pred <- readRDS("./data/processed/predicted_individual_in_group_flights_2020_&_2021.RDS")

# read diagnostics
diagnostics <- readRDS("./data/processed/random_forests_diagnostics_solo_flight.RDS")

3.2 Call rate

3.2.1 Individual in solo flight vs overall group flight

Code
call_rate_by_group <- readRDS("./data/processed/call_rate_by_group.RDS")

# remove those in which only one individual called
call_id_seq <- read.csv("./data/processed/group_call_individual_id_sequence.csv")

call_id_seq <- call_id_seq[order(call_id_seq$event, call_id_seq$start),
    ]

call_id_seq_list <- split(call_id_seq, call_id_seq$event)

# get diversity and entropy
unique_callers_list <- pblapply(call_id_seq_list, function(x) {
    x <- x[order(x$start), ]
    id_seq <- x$indiv
    id_seq <- as.character(as.numeric(factor(id_seq)))
    unique.callers <- length(unique(id_seq))

    out <- data.frame(group = x$group[1], unique.callers = unique.callers)
    return(out)
})

unique_callers_df <- do_call(rbind, unique_callers_list)

call_rate_by_group <- call_rate_by_group[!call_rate_by_group$group %in%
    unique_callers_df$group[unique_callers_df$unique.callers == 1],
    ]

call_rate_by_group$type <- ifelse(call_rate_by_group$type == "solo",
    "solo", paste(call_rate_by_group$experiment, call_rate_by_group$type,
        sep = "-"))

call_rate_by_group$type <- gsub("mixed.group", "artificial.group",
    call_rate_by_group$type)
call_rate_by_group$type <- gsub("regular.group", "natural.group",
    call_rate_by_group$type)

call_rate_by_group$type <- factor(call_rate_by_group$type, levels = c("solo",
    "natural.group", "artificial.group"))

# mean centering group size
call_rate_by_group$group.size.f <- factor(call_rate_by_group$group.size,
    ordered = TRUE)

3.2.1.1 Plot raw data

All group sizes combined:

Code
# table(call_rate_by_group$group.size, call_rate_by_group$type)
cols <- graph_cols[1:3]
cols_transp <- graph_cols_transp[1:3]

names(cols) <- names(cols_transp) <- c("solo", "natural.group", "artificial.group")

# raincloud plot:
  ggrate_indv_grp <- ggplot(call_rate_by_group,
       aes(
         y = rate,
         x = type,
         color = type,
         fill = type
       )) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    # fill = fill_color,
    alpha = 0.5,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
  geom_boxplot(# fill = fill_color,
    width = .15,
    # remove outliers
    outlier.shape = NA) +
    # add justified jitter from the {gghalves} package
    gghalves::geom_half_point(
      # color = fill_color,
      # draw jitter on the left
      side = "l",
      # control range of jitter
      range_scale = .4,
      # add some transparency
      alpha = .5,
      transformation = ggplot2::position_jitter(height = 0)
    ) +
      # scale_color_viridis_d(option = "G", begin = 0.2, end = 0.8) +
      # scale_fill_viridis_d(option = "G",
      #                      end = 0.8,
      #                      alpha = 0.6) +
      scale_color_manual(values = cols) +
  scale_fill_manual(values = cols_transp) +
      ylim(c(-0.1, 60)) +
           scale_x_discrete(labels = c(
        "solo" = "Solo",
        "natural.group" = "Natural group",
        "artificial.group" = "Artificial group"
      )) +
      theme(legend.position = "none") +
      labs(x = "Flight type", y = "Calling rate (calls / min)")
    

length(unique(call_rate_by_group$group))
[1] 69
Code
n_grps <-
  aggregate(group ~ type, call_rate_by_group, function(x)
    length(unique(x)))

n_grps$group <- sapply(n_grps$type, function(x) length(unique(call_rate_by_group$group[call_rate_by_group$type == x & call_rate_by_group$indiv == "group"])))

n_grps$group[1] <- length(unique(call_rate_by_group$group[call_rate_by_group$type == "solo" & call_rate_by_group$experiment == "regular"]))

n_grps$indivs <- 0


n_grps$indivs[1] <- length(unique(call_rate_by_group$indiv[call_rate_by_group$type == "solo"]))

n_grps$distance <- -4

n_grps$n.labels <-
  paste0(n_grps$group, " / ", n_grps$indivs)
n_grps$n.labels <- c(
  "n = 131 individuals\nfrom 29 groups",
  "32 groups",
  "37 groups")

ggrate_indv_grp_pimp <- ggrate_indv_grp +
  scale_x_discrete(labels = c(
        "solo" = "Single individual",
        "natural.group" = "Natural group",
        "artificial.group" = "Artificial group"
      )) +
    geom_signif(
    y_position = c(61, 68),
    xmin = c(1, 2),
    xmax = c(3, 3),
    annotation = c('beta:-0.41~ (-0.74 - -0.089)', 'beta:-0.64 ~ (-1.05 - -0.23)'), 
    parse = TRUE,
    tip_length = 0.02,
    textsize = 3,
    size = 0.5,
    color = "gray60",
    vjust = 0.2
  ) +
  ylim(c(-5, 68)) +
  theme(axis.title.x = element_blank()) +
   geom_text(lineheight = 0.8,
    data = n_grps,
    aes(y = distance, x = type, label = n.labels),
    color = "gray60",
    nudge_x = 0,
    size = 3
  )

By group size:

Code
custom_labels <- unique(call_rate_by_group$group.size)
custom_labels <- paste("group size = ", custom_labels)
names(custom_labels) <- unique(call_rate_by_group$group.size)

# raincloud plot:
ggplot(call_rate_by_group[call_rate_by_group$group.size <= 5,],
       aes(
         y = rate,
         x = type,
         color = type,
         fill = type
       )) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    # fill = fill_color,
    alpha = 0.5,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
  geom_boxplot(# fill = fill_color,
    width = .15,
    # remove outliers
    outlier.shape = NA) +
    # add justified jitter from the {gghalves} package
    gghalves::geom_half_point(
      # color = fill_color,
      # draw jitter on the left
      side = "l",
      # control range of jitter
      range_scale = .4,
      # add some transparency
      alpha = .5,
      transformation = ggplot2::position_jitter(height = 0)
    ) +
      scale_color_viridis_d(option = "G", end = 0.8) +
      scale_fill_viridis_d(option = "G",
                           end = 0.8,
                           alpha = 0.6) +
      ylim(c(-0.1, 60)) +
             scale_x_discrete(labels = c(
        "solo" = "Solo",
        "natural.group" = "Natural group",
        "artificial.group" = "Artificial group"
      )) +
      theme(legend.position = "none") +
      labs(x = "Flight type", y = "Calling rate (calls / min)") +
      facet_wrap(
        ~ group.size.f,
        ncol = 2,
        labeller = labeller(group.size.f = custom_labels)
      ) 

3.2.1.2 Regression model

Model:

\[ \text{call rate} \sim \text{type} + (1 \mid \text{group}) + \text{monotonic(group size)} + (\text{type} \mid \text{group}) \]

Response modeled with a negative binomial distribution.

Code
mod <- brm(formula = calls | resp_rate(flight.time) ~ type + mo(group.size.f) +
    (type | group), iter = iter, thin = 1, data = call_rate_by_group,
    family = negbinomial(), silent = 2, chains = chains, cores = chains,
    save_pars = save_pars(all = TRUE), control = list(adapt_delta = 0.99,
        max_treedepth = 15), file = "./data/processed/individual_vs_group_call_rate_group_size_monot",
    file_refit = "always")

# custom_ppc(fit = mod, group = 'type')

mod <- add_criterion(mod, criterion = c("loo"))

null_mod <- brm(formula = calls | resp_rate(flight.time) ~ 1 + (1 |
    group), iter = iter, thin = 1, data = call_rate_by_group, family = negbinomial(),
    silent = 2, chains = chains, cores = chains, save_pars = save_pars(all = TRUE),
    control = list(adapt_delta = 0.99, max_treedepth = 15), file = "./data/processed/null_mod_individual_vs_group_call_rate_group_size",
    file_refit = "always")

null_mod <- add_criterion(null_mod, criterion = c("loo"))

beepr::beep(2)

3.2.1.3 Results

3.2.1.3.1 Model performance vs null model
Code
mod <- readRDS("./data/processed/individual_vs_group_call_rate_group_size_monot.rds")

null_mod <- readRDS("./data/processed/null_mod_individual_vs_group_call_rate_group_size.rds")

loo_diff <- loo::loo_compare(mod, null_mod)

loo_diff
         elpd_diff se_diff
mod        0.0       0.0  
null_mod -27.4      11.0  
3.2.1.3.2 Model fit
Code
extended_summary(fit = mod, gsub.pattern = "type", gsub.replacement = "solo_vs_",
    remove.intercepts = TRUE, highlight = TRUE, print.name = FALSE,
    trace.palette = viridis::mako, fill = fill_color, beta.prefix = c("^b_",
        "^bsp_mo"))
priors formula iterations chains thinning warmup diverg_transitions rhats > 1.05 min_bulk_ESS min_tail_ESS seed
1 Intercept-student_t(3, 3.3, 2.5) L-lkj_corr_cholesky(1) sd-student_t(3, 0, 2.5) shape-inv_gamma(0.4, 0.3) simo-dirichlet(1) calls | resp_rate(flight.time) ~ type + mo(group.size.f) + (type | group) 10000 4 1 5000 0 (0%) 0 6836.813 10966.32 1764324928
Estimate l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
b_solo_vs_natural.group 0.226 -0.040 0.485 1 15615.687 14364.50
b_solo_vs_artificial.group -0.412 -0.743 -0.089 1 11809.438 13918.76
bsp_mogroup.size.f 0.008 -0.087 0.096 1 6836.813 10966.32

3.2.1.3.3 Contrasts
Code
# contrasts
contrasts(fit = mod, predictor = "type", n.posterior = 2000, level.sep = " VS ",
    html.table = TRUE, plot = TRUE, highlight = TRUE, fill = fill_color)
Contrasts Estimate Est.Error l-95% CI u-95% CI
1 solo VS natural.group -0.226 0.133 0.040 -0.485
2 solo VS artificial.group 0.412 0.165 0.743 0.089
3 natural.group VS artificial.group 0.637 0.211 0.225 1.052

3.2.1.3.4 Posterior predictive checks
Code
custom_ppc(fit = mod, group = "type")

Takeaways
  • No difference in call rate between solo individuals and natural groups
  • Artificial groups produce lower call rates than solo individuals and natural groups
  • No effect of group size

3.2.2 Individual in solo flight vs individuals in group flight

Code
call_rate_by_group <- readRDS("./data/processed/call_rate_by_group.RDS")

# remove those in which only one individual called
call_id_seq <- read.csv("./data/processed/group_call_individual_id_sequence.csv")

call_id_seq <- call_id_seq[order(call_id_seq$event, call_id_seq$start),
    ]

call_id_seq_list <- split(call_id_seq, call_id_seq$event)

# get diversity and entropy
unique_callers_list <- pblapply(call_id_seq_list, function(x) {
    x <- x[order(x$start), ]
    id_seq <- x$indiv
    id_seq <- as.character(as.numeric(factor(id_seq)))
    unique.callers <- length(unique(id_seq))

    out <- data.frame(group = x$group[1], unique.callers = unique.callers)
    return(out)
})

unique_callers_df <- do_call(rbind, unique_callers_list)

call_rate_by_group <- call_rate_by_group[!call_rate_by_group$group %in%
    unique_callers_df$group[unique_callers_df$unique.callers == 1],
    ]


call_rate_indiv <- readRDS("./data/processed/call_rate_by_individual.RDS")

call_rate_indiv <- call_rate_indiv[!call_rate_indiv$group %in% unique_callers_df$group[unique_callers_df$unique.callers ==
    1], ]


call_rate_by_group <- call_rate_by_group[call_rate_by_group$type !=
    "solo", ]
call_rate_by_group$type <- "overall.group"

call_rate_indiv$type <- as.character(call_rate_indiv$type)
call_rate_indiv$type[call_rate_indiv$type == "group"] <- "indiv.group"

call_rate <- rbind(call_rate_by_group[, intersect(names(call_rate_by_group),
    names(call_rate_indiv))], call_rate_indiv[, intersect(names(call_rate_by_group),
    names(call_rate_indiv))])

# aggregate for plot
agg_rate <- aggregate(rate ~ experiment + type + group.size, data = call_rate,
    FUN = mean)

agg_rate$sd <- aggregate(rate ~ experiment + type + group.size, data = call_rate,
    FUN = sd)$rate

agg_rate$type <- factor(agg_rate$type, levels = c("solo", "indiv.group",
    "overall.group"))
Code
call_rate_indiv <- call_rate_indiv[!is.na(call_rate_indiv$rate) &
    call_rate_indiv$rate > 0, ]

no_solo <- call_rate_indiv[call_rate_indiv$type != "solo", ]

agg <- aggregate(indiv ~ group + experiment, data = no_solo, function(x) length(unique(x)))

agg$group.size <- aggregate(group.size ~ group + experiment, data = no_solo,
    function(x) mean(x))$group.size

agg$prop.callers <- agg$indiv/agg$group.size
# t.test(agg$prop.callers ~ agg$experiment)
# aggregate(prop.callers ~ experiment, agg, mean)
# aggregate(prop.callers ~ experiment, agg, sd)

In average 0.81 (+/- 0.2 SD) individuals in group flights called

3.2.2.1 Plot raw data

All group sizes combined:

Code
call_rate_indiv <- call_rate_indiv[!is.na(call_rate_indiv$rate) & call_rate_indiv$rate > 0, ]

if (!is.factor(call_rate_indiv$type)){
call_rate_indiv$type <- ifelse(call_rate_indiv$type == "solo", "solo", paste(call_rate_indiv$experiment, call_rate_indiv$type, sep = "-"))

call_rate_indiv$type <- gsub("mixed-indiv.group", "artificial.group", call_rate_indiv$type)
call_rate_indiv$type <- gsub("regular-indiv.group", "natural.group", call_rate_indiv$type)

call_rate_indiv$type <- factor(call_rate_indiv$type, levels = c("solo", "natural.group", "artificial.group")) 
}

# table(call_rate_by_group$group.size, call_rate_by_group$type)
cols <- graph_cols[1:3]
cols_transp <- graph_cols_transp[1:3]

names(cols) <- names(cols_transp) <- c("solo", "natural.group", "artificial.group")


# raincloud plot:
gg_rate_indiv <- ggplot(call_rate_indiv,
       aes(
         y = rate,
         x = type,
         color = type,
         fill = type
       )) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    # fill = fill_color,
    alpha = 0.5,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
  geom_boxplot(# fill = fill_color,
    width = .15,
    # remove outliers
    outlier.shape = NA) +
    # add justified jitter from the {gghalves} package
    gghalves::geom_half_point(
      # color = fill_color,
      # draw jitter on the left
      side = "l",
      # control range of jitter
      range_scale = .4,
      # add some transparency
      alpha = .5,
      transformation = ggplot2::position_jitter(height = 0)
    ) +
      # scale_color_viridis_d(option = "G", end = 0.8) +
      # scale_fill_viridis_d(option = "G",
      #                      end = 0.8,
      #                      alpha = 0.6) +
     scale_color_manual(values = cols) +
  scale_fill_manual(values = cols_transp) +
      ylim(c(-0.1, 40)) +
      theme(legend.position = "none") +
      labs(x = "Type", y = "Calling rate (calls / min)") +
  scale_x_discrete(labels = c(
        "solo" = "Solo",
        "natural.group" = "Natural group",
        "artificial.group" = "Artificial group"
      ))
Code
n_grps <-
  aggregate(group ~ type, call_rate_indiv, function(x)
    length(unique(x)))

n_grps$indivs <- sapply(n_grps$type, function(x) length(unique(call_rate_indiv$indiv[call_rate_indiv$type == x])))

n_grps$group[1] <- length(unique(call_rate_indiv$group[call_rate_indiv$type == "solo" & call_rate_indiv$experiment == "regular"]))

n_grps$indivs[1] <- length(unique(call_rate_indiv$indiv[call_rate_indiv$type == "solo" & call_rate_indiv$experiment == "regular"]))

n_grps$distance <- -2.5

n_grps$n.labels <-
  paste0(n_grps$group, " / ", n_grps$indivs)
n_grps$n.labels <- c(
  "n = 81 individuals\nfrom 23 groups",
  "63 individuals\n21 groups",
  "44 individuals\n21 groups")

agg_rate <- aggregate(rate ~ type + group, data = call_rate_indiv, FUN = mean)


# sapply(agg_rate$group, function(x) if(any(agg_rate$type == "natural.group")) "natural" else "artificial")

# keep only those solo from natural groups
solo_keep <- agg_rate[agg_rate$type == "natural.group", ]

solo_keep <- solo_keep$group[solo_keep$group %in% agg_rate$group[agg_rate$type == "solo"]]

agg_rate <- agg_rate[(agg_rate$group %in% solo_keep & agg_rate$type == "solo") | agg_rate$type != "solo", ]


gg_rate_indiv_pimp <- gg_rate_indiv + 
  ylim(c(-3, 35)) +
  geom_text(lineheight = 0.8,
    data = n_grps,
    aes(y = distance, x = type, label = n.labels),
    color = "gray60",
    nudge_x = 0,
    size = 3
  ) +
  geom_signif(
    y_position = c(30, 34),
    xmin = c(1, 1),
    xmax = c(2, 3),
    annotation = c('beta:-1.60~ (-1.87 - -1.33)', 'beta:-1.31 ~ (-1.55 - -1.07)'), 
    parse = TRUE,
    tip_length = 0.02,
    textsize = 3,
    size = 0.5,
    color = "gray60",
    vjust = 0.2
  ) +
  geom_boxplot(
    fill = adjustcolor("orange", alpha.f = 0.1),
    data = agg_rate,
    aes(
         y = rate,
         x = type
       ),
    color = adjustcolor("orange", alpha.f = 0.6),
    width = .1,
    # remove outliers
    outlier.shape = NA
  ) +
   gghalves::geom_half_point(
    data = agg_rate,
    aes(y = rate, x = type),
    color = "orange",
    # draw jitter on the left
    side = "l",
    pch = 20,
    size = 4,
    # control range of jitter
    range_scale = .4,
    # add some transparency
    alpha = .5,
    transformation = ggplot2::position_jitter(height = 0)
  ) + 
  scale_x_discrete(labels = c(
        "solo" = "Single individual",
        "natural.group" = "Individual in\n natural group",
        "artificial.group" = "Individual in\nartificial group"
      )) +
  theme(
  axis.title.x = element_blank())

gg_rate_indiv_pimp

By group size:

Code
# raincloud plot:
ggplot(call_rate_indiv,
       aes(
         y = rate,
         x = type,
         color = type,
         fill = type
       )) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    # fill = fill_color,
    alpha = 0.5,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
  geom_boxplot(# fill = fill_color,
    width = .15,
    # remove outliers
    outlier.shape = NA) +
    # add justified jitter from the {gghalves} package
    gghalves::geom_half_point(
      # color = fill_color,
      # draw jitter on the left
      side = "l",
      # control range of jitter
      range_scale = .4,
      # add some transparency
      alpha = .5,
      transformation = ggplot2::position_jitter(height = 0)
    ) +
      scale_color_viridis_d(option = "G", end = 0.8) +
      scale_fill_viridis_d(option = "G",
                           end = 0.8,
                           alpha = 0.6) +
      ylim(c(-0.1, 40)) +
      theme(legend.position = "none") +
      labs(x = "Type", y = "Rate (calls / min)") + 
      facet_wrap(
        ~ group.size,
        ncol = 2,
        labeller = labeller(group.size = custom_labels)
      ) +
   scale_x_discrete(labels = c(
        "solo" = "Solo",
        "natural.group" = "Natural group",
        "artificial.group" = "Artificial group"
      ))

3.2.2.2 Regression model

\[ \text{call rate} \sim \text{type} + (1 \mid \text{group}) + \text{monotonic(group size)} + (1 \mid \text{individual}) \]

Response modeled with a log-normal distribution.

Code
mod <- brm(formula = calls | resp_rate(flight.time) ~ type + mo(group.size.f) +
    (1 | indiv), iter = iter, thin = 1, data = call_rate_indiv, family = negbinomial(),
    silent = 2, chains = chains, cores = chains, control = list(adapt_delta = 0.99,
        max_treedepth = 15), file = "./data/processed/individual_call_rate_solo_vs_group_group_size_monot_negbinomial",
    file_refit = "always")

mod <- add_criterion(mod, criterion = "loo")

null_mod <- brm(formula = calls | resp_rate(flight.time) ~ 1 + (1 |
    indiv), iter = iter, thin = 1, data = call_rate_indiv, family = negbinomial(),
    silent = 2, chains = chains, cores = chains, control = list(adapt_delta = 0.99,
        max_treedepth = 15), file = "./data/processed/null_model_individual_call_rate_solo_vs_group_group_size_monot_negbinomial",
    file_refit = "always")

null_mod <- add_criterion(null_mod, criterion = "loo")
3.2.2.2.1 Results
3.2.2.2.1.1 Model performance vs null model
Code
mod <- readRDS("./data/processed/individual_call_rate_solo_vs_group_group_size_monot.rds")
null_mod <- readRDS("./data/processed/null_model_individual_call_rate_solo_vs_group_group_size_monot.rds")

loo_compare(mod, null_mod)
         elpd_diff se_diff
mod        0.0       0.0  
null_mod -79.4      10.4  
3.2.2.2.1.2 Model fit
Code
extended_summary(fit = mod, gsub.pattern = "type", gsub.replacement = "solo_vs_",
    remove.intercepts = TRUE, highlight = TRUE, print.name = FALSE,
    trace.palette = viridis::mako, fill = fill_color, beta.prefix = c("^b_",
        "^bsp_mo"))
priors formula iterations chains thinning warmup diverg_transitions rhats > 1.05 min_bulk_ESS min_tail_ESS seed
1 Intercept-student_t(3, 1.9, 2.5) sd-student_t(3, 0, 2.5) sigma-student_t(3, 0, 2.5) simo-dirichlet(1) rate ~ type + mo(group.size.f) + (1 | indiv) 10000 4 1 5000 0 (0%) 0 10027.95 13624.49 2138101156
Estimate l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
b_solo_vs_natural.group -1.308 -1.550 -1.067 1 29407.61 16023.32
b_solo_vs_artificial.group -1.601 -1.873 -1.329 1 29640.05 15070.35
bsp_mogroup.size.f -0.093 -0.211 0.013 1 10027.95 13624.49

3.2.2.2.1.3 Contrasts
Code
# contrasts
contrasts(fit = mod, predictor = "type", n.posterior = 2000, level.sep = " VS ",
    html.table = TRUE, plot = TRUE, highlight = TRUE, fill = fill_color)
Contrasts Estimate Est.Error l-95% CI u-95% CI
1 natural.group VS solo -1.308 0.122 -1.550 -1.067
2 natural.group VS artificial.group 0.293 0.167 -0.033 0.623
3 solo VS artificial.group 1.601 0.139 1.873 1.329

3.2.2.2.1.4 Posterior predictive checks
Code
custom_ppc(fit = mod, group = "type")

Takeaways
  • Individuals in natural and artificial groups produce lower call rates than in solo flights
  • No difference in call rate between natural and artificial groups
  • No effect of group size

3.2.3 Individual consistency in call rate across flight types

3.2.3.1 Plot raw data

All group sizes combined:

Code
call_rate_indiv <- readRDS("./data/processed/call_rate_by_individual.RDS")

call_rate_indiv <- call_rate_indiv[!call_rate_indiv$group %in% unique_callers_df$group[unique_callers_df$unique.callers ==
    1], ]


call_rate_by_group <- call_rate_by_group[call_rate_by_group$type !=
    "solo", ]
call_rate_by_group$type <- "overall.group"

call_rate_indiv$type <- as.character(call_rate_indiv$type)
call_rate_indiv$type[call_rate_indiv$type == "group"] <- "indiv.group"

call_rate_indiv <- call_rate_indiv[!is.na(call_rate_indiv$rate) &
    call_rate_indiv$rate > 0, ]

call_rate_indiv_cov_l <- lapply(call_rate_indiv$indiv, function(x) {
    Y <- call_rate_indiv[call_rate_indiv$indiv == x, ]

    out2 <- lapply(unique(Y$group), function(y) {
        Y2 <- Y[Y$group == y, ]
        res <- as.data.frame(t(unstack(Y2[, c("rate", "type")])))
        res$group <- y
        res$experiment <- Y2$experiment[1]
        res$group.size <- Y2$group.size[1]
        return(res)
    })
    out2 <- out2[sapply(out2, ncol) == 5]

    rate_df <- do.call(rbind, out2)
    rate_df$indiv <- x

    return(rate_df)

})


call_rate_indiv_cov <- do.call(rbind, call_rate_indiv_cov_l[sapply(call_rate_indiv_cov_l,
    class) == "data.frame"])

call_rate_indiv_cov$experiment <- ifelse(call_rate_indiv_cov$experiment ==
    "mixed", "artificial", "natural")

# make group size a factor
call_rate_indiv_cov$group.size.f <- factor(call_rate_indiv_cov$group.size,
    ordered = TRUE)


ggplot(call_rate_indiv_cov, aes(y = indiv.group, x = solo, color = experiment)) +
    geom_point() + scale_color_viridis_d(option = "G", end = 0.8) +
    ylim(c(-0.1, 40)) + xlim(c(-0.1, 40)) + theme(legend.position = "right") +
    labs(x = "Type", y = "Rate (calls / min)")

By group size:

Code
ggplot(call_rate_indiv_cov, aes(y = indiv.group, x = solo, color = experiment)) +
    geom_point() + scale_color_viridis_d(option = "G", end = 0.8) +
    ylim(c(-0.1, 40)) + xlim(c(-0.1, 40)) + theme(legend.position = "right") +
    labs(x = "Type", y = "Rate (calls / min)") + facet_wrap(~group.size.f,
    ncol = 2, labeller = labeller(group.size.f = custom_labels))

3.2.3.2 Regression model

\[ \text{indiv call rate in group} \sim \text{solo call rate} + \text{experiment} + (1 \mid \text{group}) + \text{monotonic(group size)} + (1 \mid \text{individual}) \]

Code
mod <- brm(formula = indiv.group ~ solo + experiment + mo(group.size.f) +
    (1 | indiv), iter = iter, thin = 1, data = call_rate_indiv_cov,
    family = gaussian(), silent = 2, chains = chains, cores = chains,
    control = list(adapt_delta = 0.99, max_treedepth = 15), file = "./data/processed/individual_call_rate_solo_vs_group_covariation_group_size_monot_negbinomial",
    file_refit = "always")

mod <- add_criterion(mod, criterion = "loo")

mod_inter <- brm(formula = indiv.group ~ solo * experiment + mo(group.size.f) +
    (1 | indiv), iter = iter, thin = 1, data = call_rate_indiv_cov,
    family = gaussian(), silent = 2, chains = chains, cores = chains,
    control = list(adapt_delta = 0.99, max_treedepth = 15), file = "./data/processed/individual_call_rate_solo_vs_group_covariation_group_size_monot_negbinomial_interaction",
    file_refit = "always")

mod_inter <- add_criterion(mod_inter, criterion = "loo")

null_mod <- brm(formula = indiv.group ~ 1 + mo(group.size.f) + (1 |
    indiv), iter = iter, thin = 1, data = call_rate_indiv_cov, family = gaussian(),
    silent = 2, chains = chains, cores = chains, control = list(adapt_delta = 0.99,
        max_treedepth = 15), file = "./data/processed/null_model_individual_call_rate_solo_vs_group_covariation_group_size_monot_negbinomial",
    file_refit = "always")

null_mod <- add_criterion(null_mod, criterion = "loo")
3.2.3.2.1 Results
3.2.3.2.1.1 Model performance vs null model
Code
mod <- readRDS("./data/processed/individual_call_rate_solo_vs_group_covariation_group_size_monot_negbinomial.rds")
mod_inter <- readRDS("./data/processed/individual_call_rate_solo_vs_group_covariation_group_size_monot_negbinomial_interaction.rds")
null_mod <- readRDS("./data/processed/null_model_individual_call_rate_solo_vs_group_covariation_group_size_monot_negbinomial.rds")

loo_compare(mod, mod_inter, null_mod)
          elpd_diff se_diff
mod        0.0       0.0   
mod_inter -0.8       1.6   
null_mod  -1.1       1.5   
3.2.3.2.1.2 Model fit
Code
extended_summary(
  fit = mod,

  remove.intercepts = TRUE,
  highlight = TRUE,
  print.name = FALSE,
  trace.palette = viridis::mako, 
  fill = fill_color,
  beta.prefix = c("^b_", "^bsp_mo")
)
priors formula iterations chains thinning warmup diverg_transitions rhats > 1.05 min_bulk_ESS min_tail_ESS seed
1 Intercept-student_t(3, 2.2, 2.5) sd-student_t(3, 0, 2.5) sigma-student_t(3, 0, 2.5) simo-dirichlet(1) indiv.group ~ solo + experiment + mo(group.size.f) + (1 | indiv) 10000 4 1 5000 0 (0%) 0 6969.194 7450.404 734280391
Estimate l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
b_solo 0.113 0.014 0.212 1.001 6969.194 10272.338
b_experimentnatural 0.605 -0.818 2.041 1 14164.781 15121.912
bsp_mogroup.size.f -0.586 -1.400 0.047 1 7170.390 7450.404

3.2.3.2.1.3 Posterior predictive checks
Code
custom_ppc(fit = mod, group = "type")

Takeaways
  • Individual call rate is positively associated between solo and group flights
  • Association of call rate between solo and group flights is similar between natural and artificial groups
  • No effect of group size

3.3 Simulated group calling

Code
time_calls_indiv <- readRDS("./data/processed/time_position_calls_by_individual.RDS")

# keep only 120 s duration flights
time_calls_indiv <- time_calls_indiv[time_calls_indiv$flight.time ==
    120, ]
time_calls_indiv <- time_calls_indiv[complete.cases(time_calls_indiv),
    ]

time_calls_indiv$sound.files <- time_calls_indiv$orig.sound.files
ovlp <- overlapping_sels(time_calls_indiv, parallel = 10)

# remove overlapping sounds
time_calls_indiv <- ovlp[is.na(ovlp$ovlp.sels), ]


sim_group_call_list <- lapply(1:1000, function(x) sim_group_call(c(group_sizes,
    rep(7, 6)), seed = x))

# add simulation tag
sim_group_call_list <- lapply(seq_along(sim_group_call_list), function(x) {
    Y <- sim_group_call_list[[x]]
    Y$sim <- x
    Y$sound.files <- paste0(Y$sound.files, "-sim", x)
    Y$selec <- seq_len(nrow(Y))

    return(Y)
})

sim_group_call_df <- do_call(rbind, sim_group_call_list)

ovlps <- overlapping_sels(sim_group_call_df, parallel = 10)

ovlp_count_list <- lapply(unique(ovlps$sound.files), function(x) {
    X <- ovlps[ovlps$sound.files == x, ]
    cnt <- sum(!is.na(X$ovlp.sels))
    out <- data.frame(sound.files = x, group.size = X$group.size[1],
        ovlp.count = cnt)
    return(out)
})


ovlp_count_df <- do.call(rbind, ovlp_count_list)

ovlp_count_df$ovlp.count <- ovlp_count_df$ovlp.count/2

ovlp_count_df$type <- "Simulated"

ovlp_count_df$flight.time <- 120
ovlp_count_df$ovlp.rate <- ovlp_count_df$ovlp.count/ovlp_count_df$fligh.time

saveRDS(ovlp_count_df, "./data/processed/ovlps_simulated_group_call.RDS")
Code
sim_ovlp_count_df <- readRDS("./data/processed/ovlps_simulated_group_call.RDS")

# ggplot with histogram of # of overlapping sounds by group size
ggplot(sim_ovlp_count_df, aes(x = ovlp.rate, fill = factor(group.size))) +
    geom_histogram(position = "dodge") + scale_fill_viridis_d(option = "G",
    end = 0.8) + labs(x = "Number of overlaps", y = "Frequency") +
    theme(legend.position = "none") + facet_wrap(~group.size)


time_calls_groups <- readRDS("./data/processed/time_position_calls_for_group_flights.RDS")

# remove those in which only one individual called
call_id_seq <- read.csv("./data/processed/group_call_individual_id_sequence.csv")

# # keep only 120 s duration flights call_id_seq <-
# call_id_seq[call_id_seq$flight.time == 120,]

call_id_seq <- call_id_seq[order(call_id_seq$event, call_id_seq$start),
    ]

call_id_seq_list <- split(call_id_seq, call_id_seq$event)

unique_callers_list <- pblapply(call_id_seq_list, function(x) {
    x <- x[order(x$start), ]
    id_seq <- x$indiv
    id_seq <- as.character(as.numeric(factor(id_seq)))
    unique.callers <- length(unique(id_seq))

    out <- data.frame(group = x$group[1], unique.callers = unique.callers)
    return(out)
})

unique_callers_df <- do_call(rbind, unique_callers_list)

time_calls_groups <- call_id_seq[!call_id_seq$group %in% unique_callers_df$group[unique_callers_df$unique.callers ==
    1], ]


time_calls_groups$sound.files <- time_calls_groups$event

obs_ovlps <- overlapping_sels(time_calls_groups, parallel = 10)


obs_ovlp_count_list <- lapply(unique(obs_ovlps$sound.files), function(x) {
    X <- obs_ovlps[obs_ovlps$sound.files == x, ]
    cnt <- sum(!is.na(X$ovlp.sels))
    out <- data.frame(sound.files = x, group.size = X$group.size[1],
        type = X$type[1], ovlp.count = cnt, flight.time = X$flight.time[1])
    return(out)
})


obs_ovlp_count_df <- do.call(rbind, obs_ovlp_count_list)

obs_ovlp_count_df$ovlp.count <- obs_ovlp_count_df$ovlp.count/2

obs_ovlp_count_df$ovlp.rate <- obs_ovlp_count_df$ovlp.count/obs_ovlp_count_df$flight.time

# ggplot with histogram of # of overlapping sounds by group size
ggplot(obs_ovlp_count_df, aes(x = ovlp.rate, fill = factor(group.size))) +
    geom_histogram(position = "dodge") + scale_fill_viridis_d(option = "G",
    end = 0.8) + labs(x = "Number of overlaps", y = "Frequency") +
    theme(legend.position = "none") + facet_wrap(~group.size)

sim_ovlp_count_df$simulation <- NULL
total_count_df <- rbind(sim_ovlp_count_df, obs_ovlp_count_df)


total_count_df$type <- gsub("Regular", "Natural", total_count_df$type)

total_count_df$type <- factor(total_count_df$type, levels = c("Natural",
    "Artificial", "Simulated"))

saveRDS(total_count_df, "./data/processed/total_ovlp_count.RDS")
Code
total_count_df <- readRDS("./data/processed/total_ovlp_count.RDS")

# convert to overlaps per minute
total_count_df$ovlp.rate <- total_count_df$ovlp.rate * 60

ggplot(total_count_df, aes(x = ovlp.rate, fill = factor(type))) +
    geom_density(adjust = 3) + scale_fill_viridis_d(option = "G",
    begin = 0.2, end = 0.8, alpha = 0.6) + labs(x = "Number of overlaps",
    y = "Density", fill = "Group type") + facet_wrap(~group.size,
    scales = "free")

Code
agg_cnt <- aggregate(ovlp.rate ~ group.size + type, data = total_count_df,
    FUN = mean)
agg_cnt$sd <- aggregate(ovlp.rate ~ group.size + type, data = total_count_df,
    FUN = sd)$ovlp.rate

# table(call_rate_by_group$group.size, call_rate_by_group$type)
cols <- graph_cols[2:4]

names(cols) <- paste(c("Natural", "Artificial", "Simulated"), "group")

agg_cnt$type <- paste(agg_cnt$type, "group")

agg_cnt$type <- factor(agg_cnt$type, levels = c("Natural group", "Artificial group",
    "Simulated group"))


# convert previous graph in a point sd graph with number of
# overlaps in y and group size as factor in x
gg_overlap <- ggplot(agg_cnt, aes(x = factor(group.size), y = ovlp.rate,
    color = type)) + geom_point(position = position_dodge(width = 0.5),
    alpha = 0.5, size = 3) + geom_errorbar(aes(ymin = ovlp.rate -
    sd, ymax = ovlp.rate + sd), size = 1, width = 0, position = position_dodge(width = 0.5),
    show.legend = FALSE, alpha = 0.5) + scale_color_manual(values = cols) +
    labs(x = "Group size", y = "Overlaps / min", color = "") + theme(legend.position = "inside",
    legend.position.inside = c(0.2, 0.7))
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
Code
gg_overlap

Code
pg <- plot_grid(gg_rate_indiv_pimp + theme_classic(base_size = 12) +
    theme(legend.position = "none", axis.title.x = element_blank()),
    ggrate_indv_grp_pimp + theme_classic(base_size = 12) + theme(legend.position = "none",
        axis.title.x = element_blank()), gg_overlap + theme_classic(base_size = 12) +
        theme(legend.position = "inside", legend.position.inside = c(0.2,
            0.7)), ncol = 1, labels = "AUTO")
Warning: Removed 7 rows containing missing values or values outside the scale range
(`stat_slabinterval()`).
Warning: Removed 7 rows containing non-finite outside the scale range
(`stat_boxplot()`).
Warning: Removed 7 rows containing non-finite outside the scale range
(`stat_half_point()`).
Warning: Removed 7 rows containing non-finite outside the scale range
(`stat_signif()`).
Warning: Removed 1 row containing non-finite outside the scale range
(`stat_boxplot()`).
Warning: Removed 1 row containing non-finite outside the scale range
(`stat_half_point()`).
Warning: Removed 26 rows containing missing values or values outside the scale range
(`stat_slabinterval()`).
Warning: Removed 26 rows containing non-finite outside the scale range
(`stat_boxplot()`).
Warning: Removed 26 rows containing non-finite outside the scale range
(`stat_half_point()`).
Warning: Removed 26 rows containing non-finite outside the scale range
(`stat_signif()`).
Code
hedFont <- "Arial"
pg <- pg + theme(plot.title = element_text(size = 20, family = hedFont,
    face = "bold"))

pg

Code
# ggsave( './output/vocal_coordination_3_panels.png', pg,
# grDevices::png, width = 6, height = 8, dpi = 300 )
Code
# observed data
obs_ovlp_count_df <- total_count_df[total_count_df$type != "Simulated", ]


sim_ovlp_count_df$simulation <- sapply(strsplit(sim_ovlp_count_df$sound.files, "-"), "[", 3)

# split by simulation
sim_ovlp_count_list <- split(sim_ovlp_count_df, sim_ovlp_count_df$simulation)

set.seed(123)
sim_ovlp_count_list <- sample(sim_ovlp_count_list, 30)

#combined each simulated data with the observed data
sim_call_groups_list <- lapply(sim_ovlp_count_list, function(x){
    x$simulation <- NULL
  out <- rbind(obs_ovlp_count_df, x)
  out$ovlp.count <- out$ovlp.count * 2
  out$group.size.sc <- scale(out$group.size, center = TRUE, scale = FALSE) 
  out$type <- gsub("Regular", "Natural", out$type)
  out$type <- factor(out$type, levels = c("Natural", "Artificial", "Simulated"))
  return(out)
})

group_call_mod <- brm_multiple(
  formula = ovlp.count ~ type + group.size + offset(log(flight.time)),
  iter = iter,
  thin = 1,
  data = sim_call_groups_list,
  family = zero_inflated_poisson(link = "log"),
  silent = 2,
  chains = chains,
  backend = "cmdstanr",
  # only works if cmdstanr package is installed
  threads = threading(2),
  cores = chains,
  combine = FALSE,
  control = list(adapt_delta = 0.99, max_treedepth = 15)
)

custom_ppc(fit = group_call_mod[[1]], group = "type")

group_call_mod <- pblapply(group_call_mod,  function(x) add_criterion(x, criterion = c("loo")))

saveRDS(group_call_mod,
        "./data/processed/simulated_group_call_model.RDS")


null_group_call_mod <- brm_multiple(
  formula = ovlp.count ~ 1 + group.size + offset(log(flight.time)),
  iter = iter,
  thin = 1,
  data = sim_call_groups_list,
  family = zero_inflated_poisson(link = "log"),
  silent = 2,
  chains = chains,
  backend = "cmdstanr",
  # only works if cmdstanr package is installed
  threads = threading(2),
  cores = chains,
  combine = FALSE,
  control = list(adapt_delta = 0.99, max_treedepth = 15)
)


custom_ppc(fit = null_group_call_mod[[1]], group = "type")

null_group_call_mod <- pblapply(null_group_call_mod,  function(x) {
  out <- try(add_criterion(x, criterion = c("loo")), silent = TRUE)
  if (is(out, "try-error")) return(x)
  else return(out)
  })

loo_compare(group_call_mod[[1]], null_group_call_mod[[1]])

saveRDS(null_group_call_mod,
        "./data/processed/simulated_group_call_null_model.RDS")

3.3.1 Results

3.3.1.1 Model performance vs null model

Code
group_call_mod <- readRDS("./data/processed/simulated_group_call_model.RDS")

null_group_call_mod <- readRDS("./data/processed/simulated_group_call_null_model.RDS")

loo_diffs <- lapply(seq_along(group_call_mod), function(x) loo::loo_compare(group_call_mod[[x]],
    null_group_call_mod[[x]]))

loo_diff <- do.call(rbind, loo_diffs)

rows <- rownames(loo_diff)
loo_diff <- as.data.frame(loo_diff)

loo_diff$model <- rows

aggregate(cbind(elpd_diff, se_diff) ~ model, loo_diff, mean)
model elpd_diff se_diff
group_call_mod[[x]] 0.00000 0.00000
null_group_call_mod[[x]] -27.23418 20.58624

3.3.1.2 Model fit

Code
# average model
avrg_call <- paste0("posterior_average(", paste0(paste0("group_call_mod[[",
    1:length(group_call_mod), "]]"), collapse = ", "), ", weights = 'loo')")

average_model_draws <- eval(parse(text = avrg_call))

draw_extended_summary(average_model_draws, highlight = TRUE, fill = fill_color,
    beta.prefix = c("^b_", "^bsp_mo"))
Estimate l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
b_Intercept -6.046 -6.747 -5.394 1 14618.493 12215.200
b_typeArtificial -2.763 -4.118 -1.598 1 9360.303 8966.711
b_typeSimulated 0.931 0.546 1.375 1 13855.804 11095.257
b_group.size 0.465 0.359 0.576 1 13451.932 12438.804

3.3.1.3 Contrasts

Code
contrasts <- draws_contrasts(average_model_draws, predictor_name = "type",
    basal_level = "Natural", fill_color = fill_color)

contrasts$contrasts
est l-95% CI u-95% CI
Artificial - Natural 2.789 1.598 4.118
Simulated - Natural -0.938 -1.375 -0.546
Simulated - Artificial -3.727 -5.053 -2.558
Code
contrasts$plot + theme_classic()

3.3.1.4 Posterior predictive checks

Code
custom_ppc(fit = group_call_mod[[1]], group = "type")

Takeaways

  • Simulated groups produce more overlapping calls than both artificial and natural groups
  • Artificial groups produce more overlapping calls than natural groups

3.4 Gap duration

3.4.1 Gap variation of individual in solo flight vs overall group flight

Variation measured as the coefficient of variation of gap duration

Code
gaps_indiv <- readRDS("./data/processed/gaps_by_individual.RDS")

# remove those in which only one individual called
call_id_seq <- read.csv("./data/processed/group_call_individual_id_sequence.csv")

call_id_seq <- call_id_seq[order(call_id_seq$event, call_id_seq$start),
    ]

call_id_seq_list <- split(call_id_seq, call_id_seq$event)

unique_callers_list <- pblapply(call_id_seq_list, function(x) {
    x <- x[order(x$start), ]
    id_seq <- x$indiv
    id_seq <- as.character(as.numeric(factor(id_seq)))
    unique.callers <- length(unique(id_seq))

    out <- data.frame(group = x$group[1], unique.callers = unique.callers)
    return(out)
})

unique_callers_df <- do_call(rbind, unique_callers_list)

gaps_indiv <- gaps_indiv[!gaps_indiv$group %in% unique_callers_df$group[unique_callers_df$unique.callers ==
    1], ]

sub_gaps_indiv <- gaps_indiv[grep("indiv.group", gaps_indiv$experiment.type,
    invert = TRUE), ]
sub_gaps_indiv$experiment.type[sub_gaps_indiv$experiment.type == "mixed-overall.group"] <- "artificial.group"
sub_gaps_indiv$experiment.type[sub_gaps_indiv$experiment.type == "regular-overall.group"] <- "real.group"
sub_gaps_indiv$type <- sub_gaps_indiv$experiment.type

sub_gaps_indiv$type <- factor(sub_gaps_indiv$type, levels = c("solo",
    "real.group", "artificial.group"))

sub_gaps_indiv$group.size.f <- factor(sub_gaps_indiv$group.size, levels = unique(sub_gaps_indiv$group.size),
    ordered = TRUE)

cv_gaps_groups <- aggregate(gaps ~ experiment.type + group.size +
    group, data = sub_gaps_indiv, FUN = cv)
cv_gaps_groups$indiv <- "group"
cv_gaps_groups <- cv_gaps_groups[cv_gaps_groups$experiment.type !=
    "solo", ]

cv_gaps_indiv <- aggregate(gaps ~ indiv + experiment.type + group.size +
    group, data = sub_gaps_indiv, FUN = cv)
cv_gaps_indiv <- cv_gaps_indiv[cv_gaps_indiv$experiment.type == "solo",
    ]


# duplicate individuals in artificial group flights
dup_indivs <- lapply(unique(cv_gaps_groups$group), function(x) {
    ids <- unique(gaps_indiv$indiv[gaps_indiv$group == x])
    ids <- ids[ids != "group"]

    X <- cv_gaps_indiv[cv_gaps_indiv$indiv %in% ids, ]
    X$group <- x
    X$group.size <- gaps_indiv$group.size[gaps_indiv$group == x][1]
    return(X)
})

dup_indivs <- do.call(rbind, dup_indivs)

cv_gaps <- rbind(dup_indivs, cv_gaps_groups)

names(cv_gaps)[ncol(cv_gaps)] <- "cv.gaps"

cv_gaps$group.size.f <- factor(cv_gaps$group.size, levels = unique(cv_gaps$group.size),
    ordered = TRUE)

names(cv_gaps)[2] <- "type"

cv_gaps$type <- factor(cv_gaps$type, levels = c("solo", "real.group",
    "artificial.group"))

3.4.1.1 Plot raw data

All group sizes combined:

Code
custom_labels <- unique(sub_gaps_indiv$group.size.f)
custom_labels <- paste("group size = ", custom_labels)
names(custom_labels) <- unique(sub_gaps_indiv$group.size.f)

# raincloud plot:
ggplot(cv_gaps, 
       aes(y = log(cv.gaps + 1), 
           x = type,
         color = type,
         fill = type
       )) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    # fill = fill_color,
    alpha = 0.5,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
  geom_boxplot(# fill = fill_color,
    width = .15,
    # remove outliers
    outlier.shape = NA) +
    # add justified jitter from the {gghalves} package
    gghalves::geom_half_point(
      # color = fill_color,
      # draw jitter on the left
      side = "l",
      # control range of jitter
      range_scale = .4,
      # add some transparency
      alpha = .5,
      transformation = ggplot2::position_jitter(height = 0)
    ) +
      scale_color_viridis_d(option = "G", end = 0.8) +
      scale_fill_viridis_d(option = "G",
                           end = 0.8,
                           alpha = 0.6) +
      # ylim(c(-0.1, 60)) +
           scale_x_discrete(labels = c(
        "solo" = "Solo",
        "real.group" = "Natural group",
        "artificial.group" = "Artificial group"
      )) +
      theme(legend.position = "none") +
      labs(x = "Flight type", y = "log(coefficient of variation\nof gap duration)")

By group size:

Code
# raincloud plot:
ggplot(cv_gaps, 
       aes(y = log(cv.gaps + 1), 
           x = type,
         color = type,
         fill = type
       )) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    # fill = fill_color,
    alpha = 0.5,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
  geom_boxplot(# fill = fill_color,
    width = .15,
    # remove outliers
    outlier.shape = NA) +
    # add justified jitter from the {gghalves} package
    gghalves::geom_half_point(
      # color = fill_color,
      # draw jitter on the left
      side = "l",
      # control range of jitter
      range_scale = .4,
      # add some transparency
      alpha = .5,
      transformation = ggplot2::position_jitter(height = 0)
    ) +
      scale_color_viridis_d(option = "G", end = 0.8) +
      scale_fill_viridis_d(option = "G",
                           end = 0.8,
                           alpha = 0.6) +
      # ylim(c(-0.1, 60)) +
           scale_x_discrete(labels = c(
        "solo" = "Solo",
        "real.group" = "Natural group",
        "artificial.group" = "Artificial group"
      )) +
      theme(legend.position = "none") +
         labs(x = "Flight type", y = "log(coefficient of variation\nof gap duration)") +
   facet_wrap(
        ~ group.size,
        ncol = 2,
        labeller = labeller(group.size = custom_labels)
      ) 

3.4.1.2 Regression model

\[ \text{log(gap variation + 1)} \sim \text{type} + \text{monotonic(group size)} + (1 \mid \text{group}) \]

Response modeled with an normal distribution.

Code
mod <- brm(formula = log(cv.gaps + 1) ~ type + mo(group.size.f) +
    (1 | group), iter = iter, thin = 1, data = cv_gaps, family = gaussian(),
    silent = 2, chains = chains, cores = chains, file = "./data/processed/cv_gaps_solo_vs_group",
    file_refit = "always")

mod <- add_criterion(mod, criterion = "loo")

null_mod <- brm(formula = log(cv.gaps + 1) ~ 1 + (1 | group), iter = iter,
    thin = 1, data = cv_gaps, family = gaussian(), silent = 2, chains = chains,
    cores = chains, file = "./data/processed/null_cv_gaps_solo_vs_group",
    file_refit = "always")

null_mod <- add_criterion(null_mod, criterion = "loo")

beepr::beep(2)
3.4.1.2.1 Results
3.4.1.2.1.1 Model performance vs null model
Code
mod <- readRDS("./data/processed/cv_gaps_solo_vs_group.rds")
null_mod <- readRDS("./data/processed/null_cv_gaps_solo_vs_group.rds")

loo_compare(mod, null_mod)
         elpd_diff se_diff
mod        0.0       0.0  
null_mod -12.2       4.9  
3.4.1.2.1.2 Model fit
Code
extended_summary(fit = mod, gsub.pattern = "type", gsub.replacement = "solo_vs_",
    remove.intercepts = TRUE, highlight = TRUE, print.name = FALSE,
    fill = fill_color, trace.palette = viridis::mako, beta.prefix = c("^b_",
        "^bsp_mo"))
priors formula iterations chains thinning warmup diverg_transitions rhats > 1.05 min_bulk_ESS min_tail_ESS seed
1 Intercept-student_t(3, 4.2, 2.5) sd-student_t(3, 0, 2.5) sigma-student_t(3, 0, 2.5) simo-dirichlet(1) log(cv.gaps + 1) ~ type + mo(group.size.f) + (1 | group) 10000 4 1 5000 1 (5e-05%) 0 11914.22 11792.06 1479252149
Estimate l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
b_solo_vs_real.group 0.340 0.196 0.485 1 24349.40 14520.99
b_solo_vs_artificial.group 0.251 0.115 0.386 1 25461.90 15330.95
bsp_mogroup.size.f 0.006 -0.023 0.034 1.001 11914.22 11792.06

3.4.1.2.2 Contrasts
Code
# contrasts
contrasts(fit = mod, predictor = "type", n.posterior = 2000, level.sep = " VS ",
    html.table = TRUE, plot = TRUE, highlight = TRUE, fill = fill_color)
Contrasts Estimate Est.Error l-95% CI u-95% CI
1 solo VS real.group -0.340 0.074 -0.196 -0.485
2 solo VS artificial.group -0.251 0.069 -0.115 -0.386
3 real.group VS artificial.group 0.089 0.097 -0.102 0.275

3.4.1.2.3 Posterior predictive checks
Code
custom_ppc(fit = mod, group = "experiment.type")

Takeaways
  • Variation in gap duration is larger in both natural and artificial groups compared to solo flights
  • No difference in gap variation between natural and artificial groups

3.4.2 Individual in solo flight vs individuals in group flight

3.4.2.1 Plot raw data

All group sizes combined:

Code
sub_gaps_indiv <- gaps_indiv[grep("overall.group", gaps_indiv$experiment.type, invert = TRUE), ]
sub_gaps_indiv$experiment.type[sub_gaps_indiv$experiment.type == "mixed-indiv.group"] <- "artificial.group"
sub_gaps_indiv$experiment.type[sub_gaps_indiv$experiment.type == "mixed-indiv.group"] <- "artificial.group"
sub_gaps_indiv$experiment.type[sub_gaps_indiv$experiment.type == "regular-indiv.group"] <- "real.group"
sub_gaps_indiv$type <- sub_gaps_indiv$experiment.type

sub_gaps_indiv$type <- factor(sub_gaps_indiv$type, levels = c("solo", "real.group", "artificial.group")) 

sub_gaps_indiv$group.size.f <- factor(sub_gaps_indiv$group.size, levels = unique(sub_gaps_indiv$group.size), ordered = TRUE)

# raincloud plot:
ggplot(sub_gaps_indiv,
       aes(
         y = gaps,
         x = type,
         color = type,
         fill = type
       )) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    # fill = fill_color,
    alpha = 0.5,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
  geom_boxplot(# fill = fill_color,
    width = .15,
    # remove outliers
    outlier.shape = NA) +
    # add justified jitter from the {gghalves} package
    gghalves::geom_half_point(
      # color = fill_color,
      # draw jitter on the left
      side = "l",
      # control range of jitter
      range_scale = .4,
      # add some transparency
      alpha = .5,
      transformation = ggplot2::position_jitter(height = 0)
    ) +
      scale_color_viridis_d(option = "G", end = 0.8) +
      scale_fill_viridis_d(option = "G",
                           end = 0.8,
                           alpha = 0.6) +
      ylim(c(-0.1, 40)) +
      theme(legend.position = "none") +
      labs(x = "Type", y = "Gap duration (s)") +
  scale_x_discrete(labels = c(
        "solo" = "Solo",
        "real.group" = "Natural group",
        "artificial.group" = "Artificial group"
      ))

By group size:

Code
# raincloud plot:
ggplot(sub_gaps_indiv,
       aes(
         y = gaps,
         x = type,
         color = type,
         fill = type
       )) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    # fill = fill_color,
    alpha = 0.5,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
  geom_boxplot(# fill = fill_color,
    width = .15,
    # remove outliers
    outlier.shape = NA) +
    # add justified jitter from the {gghalves} package
    gghalves::geom_half_point(
      # color = fill_color,
      # draw jitter on the left
      side = "l",
      # control range of jitter
      range_scale = .4,
      # add some transparency
      alpha = .5,
      transformation = ggplot2::position_jitter(height = 0)
    ) +
      scale_color_viridis_d(option = "G", end = 0.8) +
      scale_fill_viridis_d(option = "G",
                           end = 0.8,
                           alpha = 0.6) +
      ylim(c(-0.1, 40)) +
      theme(legend.position = "none") +
      labs(x = "Type", y = "Gap duration (s)") +
  scale_x_discrete(labels = c(
        "solo" = "Solo",
        "real.group" = "Natural group",
        "artificial.group" = "Artificial group"
      ))  +  facet_wrap(
        ~ group.size,
        ncol = 2,
        labeller = labeller(group.size = custom_labels)
      ) 

3.4.2.2 Regression model

\[ \text{log(gaps + 1)} \sim \text{type} + \text{monotonic(group size)} + (1 \mid \text{group}) + (1 \mid \text{individual}) \]

Response modeled with an exponential distribution.

Code
mod <- brm(formula = log(gaps + 1) ~ type + mo(group.size.f) + (1 |
    indiv) + (1 | group), iter = iter, thin = 1, data = sub_gaps_indiv,
    family = gaussian(), silent = 2, chains = chains, cores = chains,
    control = list(adapt_delta = 0.99, max_treedepth = 15), file_refit = "always",
    file = "./data/processed/group_gaps_solo_vs_group_group_size_monot")

custom_ppc(mod, group = "type")

mod <- add_criterion(mod, criterion = "loo")


null_mod <- brm(formula = log(gaps + 1) ~ 1 + (1 | indiv) + (1 | group),
    iter = iter, thin = 1, data = sub_gaps_indiv, family = gaussian(),
    silent = 2, chains = chains, cores = chains, control = list(adapt_delta = 0.99,
        max_treedepth = 15), file_refit = "always", file = "./data/processed/null_group_gaps_solo_vs_group_group_size_monot")

null_mod <- add_criterion(null_mod, criterion = "loo")

beepr::beep(2)
3.4.2.2.1 Results
3.4.2.2.2 Model performance vs null model
Code
mod <- readRDS("./data/processed/group_gaps_solo_vs_group_group_size_monot.rds")
null_mod <- readRDS("./data/processed/null_group_gaps_solo_vs_group_group_size_monot.rds")

comp <- loo_compare(mod, null_mod)

print(comp)
         elpd_diff se_diff
mod         0.0       0.0 
null_mod -165.3      24.3 
3.4.2.2.3 Model fit
Code
extended_summary(fit = mod, gsub.pattern = "type", gsub.replacement = "solo_vs_",
    remove.intercepts = TRUE, highlight = TRUE, print.name = FALSE,
    trace.palette = viridis::mako, fill = fill_color, beta.prefix = "^b_|^bsp_")
priors formula iterations chains thinning warmup diverg_transitions rhats > 1.05 min_bulk_ESS min_tail_ESS seed
1 Intercept-student_t(3, 1.5, 2.5) sd-student_t(3, 0, 2.5) sigma-student_t(3, 0, 2.5) simo-dirichlet(1) log(gaps + 1) ~ type + mo(group.size.f) + (1 | indiv) + (1 | group) 10000 4 1 5000 0 (0%) 0 10737.22 13140.68 1431504020
Estimate l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
b_solo_vs_real.group 0.421 0.368 0.472 1 32664.43 16783.83
b_solo_vs_artificial.group 0.488 0.407 0.569 1 26529.35 15155.39
bsp_mogroup.size.f 0.003 -0.065 0.072 1 10737.22 13140.68

3.4.2.2.4 Contrasts
Code
# contrasts
contrasts(fit = mod, predictor = "type", n.posterior = 2000, level.sep = " VS ",
    html.table = TRUE, plot = TRUE, highlight = TRUE, fill = fill_color)
Contrasts Estimate Est.Error l-95% CI u-95% CI
1 real.group VS solo 0.421 0.027 0.368 0.472
2 real.group VS artificial.group -0.067 0.049 -0.163 0.027
3 solo VS artificial.group -0.488 0.041 -0.407 -0.569

3.4.2.2.5 Posterior predictive checks
Code
custom_ppc(fit = mod, group = "type")

Takeaways

  • Individual gaps are longer in both natural and artificial groups than in solo flights
  • No difference in gap duration between natural and artificial groups
  • Group size does not affects gap duration

3.5 Group calling sequence predictability

  • Measure as the entropy of transition probabilities of the individual ID in a first order markov chain model
Code
call_id_seq <- read.csv("./data/processed/group_call_individual_id_sequence.csv")

call_id_seq <- call_id_seq[order(call_id_seq$event, call_id_seq$start),
    ]

call_id_seq_list <- split(call_id_seq, call_id_seq$event)

# get diversity and entropy
entropy_data_list <- pblapply(call_id_seq_list, function(x) {
    x <- x[order(x$start), ]
    id_seq <- x$indiv
    id_seq <- as.character(as.numeric(factor(id_seq)))
    unique.callers <- length(unique(id_seq))
    id_seq <- paste(id_seq, collapse = "")
    entrp <- entropy(id_seq)
    norm.entrp <- normalized_entropy(id_seq)
    markov.entropy <- markov_entropy(id_seq, normalize = FALSE)
    norm.markov.entropy <- markov_entropy(id_seq, normalize = TRUE)

    out <- data.frame(event = x$event[1], group = x$group[1], group.size = x$group.size[1],
        experiment = x$experiment[1], sequence = id_seq, entropy = entrp,
        normalized.entropy = norm.entrp, unique.callers = unique.callers,
        prop.callers = unique.callers/x$group.size[1], markov.entropy = markov.entropy,
        norm.markov.entropy = norm.markov.entropy)

    return(out)
})

seq_entropy_data <- do.call(rbind, entropy_data_list)
seq_entropy_data$prop.callers[seq_entropy_data$prop.callers >= 1] <- 0.99999

# remove those in which only 1 individual called
seq_entropy_data <- seq_entropy_data[seq_entropy_data$unique.callers >
    1, ]

# cor(seq_entropy_data[, c('entropy', 'normalized.entropy',
# 'unique.callers', 'markov.entropy', 'norm.markov.entropy')],
# use = 'pairwise.complete.obs')

seq_entropy_data$type <- ifelse(seq_entropy_data$experiment == "Regular",
    "Natural", "Artificial")

seq_entropy_data$group.size.f <- factor(seq_entropy_data$group.size,
    levels = unique(seq_entropy_data$group.size), ordered = TRUE)

3.5.1 Plot raw data

All groups combined:

Code
# raincloud plot:
ggplot(seq_entropy_data,
       aes(
         y = norm.markov.entropy,
         x = type,
         color = type,
         fill = type
       )) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    # fill = fill_color,
    alpha = 0.5,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
  geom_boxplot(# fill = fill_color,
    width = .15,
    # remove outliers
    outlier.shape = NA) +
    # add justified jitter from the {gghalves} package
    gghalves::geom_half_point(
      # color = fill_color,
      # draw jitter on the left
      side = "l",
      # control range of jitter
      range_scale = .4,
      # add some transparency
      alpha = .5,
      transformation = ggplot2::position_jitter(height = 0)
    ) +
      scale_color_viridis_d(option = "G", end = 0.8) +
      scale_fill_viridis_d(option = "G",
                           end = 0.8,
                           alpha = 0.6) +
      # ylim(c(-0.1, 4.8)) +
      #      scale_x_discrete(labels = c(
      #   "natural" = "Natural",
      #   "artificial" = "Artificial",
      #   "simulated" = "Simulated"
      # )) +
      theme(legend.position = "none") +
      labs(x = "Flight type", y = "Normalized markov\ntransitions entropy")

By group size:

Code
# raincloud plot:
ggplot(seq_entropy_data[seq_entropy_data$group.size < 6, ],
       aes(
         y = norm.markov.entropy,
         x = type,
         color = type,
         fill = type
       )) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    # fill = fill_color,
    alpha = 0.5,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
  geom_boxplot(# fill = fill_color,
    width = .15,
    # remove outliers
    outlier.shape = NA) +
    # add justified jitter from the {gghalves} package
    gghalves::geom_half_point(
      # color = fill_color,
      # draw jitter on the left
      side = "l",
      # control range of jitter
      range_scale = .4,
      # add some transparency
      alpha = .5,
      transformation = ggplot2::position_jitter(height = 0)
    ) +
      scale_color_viridis_d(option = "G", end = 0.8) +
      scale_fill_viridis_d(option = "G",
                           end = 0.8,
                           alpha = 0.6) +
      # ylim(c(-0.1, 4.8)) +
      #      scale_x_discrete(labels = c(
      #   "natural" = "Natural",
      #   "artificial" = "Artificial",
      #   "simulated" = "Simulated"
      # )) +
      theme(legend.position = "none") +
      facet_wrap(
        ~ group.size,
        ncol = 2
      ) +
      labs(x = "Flight type", y = "Normalized markov\ntransitions entropy")

3.5.2 Regression model

Model:

\[ \text{normalize markov transition entropy} \sim \text{type} \]

Code
mod <- brm(formula = norm.markov.entropy ~ type, iter = iter, thin = 1,
    data = seq_entropy_data, family = zero_one_inflated_beta(), silent = 2,
    chains = chains, cores = chains, control = list(adapt_delta = 0.99,
        max_treedepth = 15), file_refit = "always", file = "./data/processed/entropy_by_experiment_type_group_size_monot")

custom_ppc(mod, group = "type")

mod <- add_criterion(mod, criterion = "loo")

null_mod <- brm(formula = norm.markov.entropy ~ 1, iter = iter, thin = 1,
    data = seq_entropy_data, family = zero_one_inflated_beta(), silent = 2,
    chains = chains, cores = chains, control = list(adapt_delta = 0.99,
        max_treedepth = 15), file_refit = "always", file = "./data/processed/null_model_entropy_by_experiment_type_group_size_monot")

null_mod <- add_criterion(null_mod, criterion = "loo")

3.5.2.1 Results

3.5.2.1.1 Model performance vs null model
Code
mod <- readRDS("./data/processed/entropy_by_experiment_type_group_size_monot.rds")
null_mod <- readRDS("./data/processed/null_model_entropy_by_experiment_type_group_size_monot.rds")

comp <- loo_compare(mod, null_mod)

print(comp)
         elpd_diff se_diff
null_mod  0.0       0.0   
mod      -0.6       0.9   
3.5.2.1.2 Model fit
Code
extended_summary(fit = mod, gsub.pattern = "b_type", gsub.replacement = "Aritificial_vs_",
    remove.intercepts = TRUE, highlight = TRUE, print.name = FALSE,
    trace.palette = viridis::mako, fill = fill_color, beta.prefix = "^b_|^bsp_")
priors formula iterations chains thinning warmup diverg_transitions rhats > 1.05 min_bulk_ESS min_tail_ESS seed
1 coi-beta(1, 1) Intercept-student_t(3, 0, 2.5) phi-gamma(0.01, 0.01) zoi-beta(1, 1) norm.markov.entropy ~ type 10000 4 1 5000 0 (0%) 0 17934.94 13326.27 1241459851
Estimate l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Aritificial_vs_Natural 0.209 -0.253 0.675 1 17934.94 13326.27

3.5.2.1.3 Posterior predictive checks
Code
custom_ppc(fit = mod, group = "type")

Takeaways

  • No difference in predictability between natural and artificial groups

3.6 Number of calling individuals per experiment

3.6.1 Plot raw data

All groups combined:

Code
# raincloud plot:
ggplot(seq_entropy_data,
       aes(
         y = prop.callers,
         x = type,
         color = type,
         fill = type
       )) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    # fill = fill_color,
    alpha = 0.5,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
  geom_boxplot(# fill = fill_color,
    width = .15,
    # remove outliers
    outlier.shape = NA) +
    # add justified jitter from the {gghalves} package
    gghalves::geom_half_point(
      # color = fill_color,
      # draw jitter on the left
      side = "l",
      # control range of jitter
      range_scale = .4,
      # add some transparency
      alpha = .5,
      transformation = ggplot2::position_jitter(height = 0)
    ) +
      scale_color_viridis_d(option = "G", end = 0.8) +
      scale_fill_viridis_d(option = "G",
                           end = 0.8,
                           alpha = 0.6) +
      # ylim(c(-0.1, 4.8)) +
      #      scale_x_discrete(labels = c(
      #   "natural" = "Natural",
      #   "artificial" = "Artificial",
      #   "simulated" = "Simulated"
      # )) +
      theme(legend.position = "none") +
      labs(x = "Flight type", y = "Proportion of unique callers")
Warning in bandwidth_dpi(): Bandwidth calculation failed.
→ Falling back to `bandwidth_nrd0()`.
ℹ This often occurs when a sample contains many duplicates, which suggests that
  a dotplot (e.g., `geom_dots()`) or histogram (e.g., `density_histogram()`,
  `stat_slab(density = 'histogram')`, or `stat_histinterval()`) may better
  represent the data.
Caused by error in `bw.SJ()`:
! sample is too sparse to find TD

By group size:

Code
# raincloud plot:
ggplot(seq_entropy_data[seq_entropy_data$group.size < 6, ],
       aes(
         y = prop.callers,
         x = type,
         color = type,
         fill = type
       )) +
  # add half-violin from {ggdist} package
  ggdist::stat_halfeye(
    # fill = fill_color,
    alpha = 0.5,
    # custom bandwidth
    adjust = .5,
    # adjust height
    width = .6,
    .width = 0,
    # move geom to the cright
    justification = -.2,
    point_colour = NA
  ) +
  geom_boxplot(# fill = fill_color,
    width = .15,
    # remove outliers
    outlier.shape = NA) +
    # add justified jitter from the {gghalves} package
    gghalves::geom_half_point(
      # color = fill_color,
      # draw jitter on the left
      side = "l",
      # control range of jitter
      range_scale = .4,
      # add some transparency
      alpha = .5,
      transformation = ggplot2::position_jitter(height = 0)
    ) +
      scale_color_viridis_d(option = "G", end = 0.8) +
      scale_fill_viridis_d(option = "G",
                           end = 0.8,
                           alpha = 0.6) +
      # ylim(c(-0.1, 4.8)) +
      #      scale_x_discrete(labels = c(
      #   "natural" = "Natural",
      #   "artificial" = "Artificial",
      #   "simulated" = "Simulated"
      # )) +
      theme(legend.position = "none") +
      facet_wrap(
        ~ group.size,
        ncol = 2
      ) +
      labs(x = "Flight type", y = "Proporiton of unique callers")

3.6.2 Regression model

Code
mod <- brm(formula = prop.callers ~ type + (1 | group.size.f), iter = iter,
    thin = 1, data = seq_entropy_data, family = zero_one_inflated_beta(),
    silent = 2, chains = chains, cores = chains, control = list(adapt_delta = 0.99,
        max_treedepth = 15), file_refit = "always", file = "./data/processed/unique.callers_by_experiment_type_group_size_monot")

custom_ppc(mod, group = "type")

mod <- add_criterion(mod, criterion = "loo")

null_mod <- brm(formula = prop.callers ~ 1 + (1 | group.size.f), iter = iter,
    thin = 1, data = seq_entropy_data, family = zero_one_inflated_beta(),
    silent = 2, chains = chains, cores = chains, control = list(adapt_delta = 0.99,
        max_treedepth = 15), file_refit = "always", file = "./data/processed/null_model_unique.callers_by_experiment_type_group_size_monot")

null_mod <- add_criterion(null_mod, criterion = "loo")

3.6.2.1 Results

3.6.2.1.1 Model performance vs null model
Code
mod <- readRDS("./data/processed/unique.callers_by_experiment_type_group_size_monot.rds")
null_mod <- readRDS("./data/processed/null_model_unique.callers_by_experiment_type_group_size_monot.rds")

comp <- loo_compare(mod, null_mod)

print(comp)
         elpd_diff se_diff
null_mod  0.0       0.0   
mod      -0.4       0.7   
3.6.2.1.2 Model fit
Code
extended_summary(fit = mod, gsub.pattern = "b_type", gsub.replacement = "Aritificial_vs_",
    remove.intercepts = TRUE, highlight = TRUE, print.name = FALSE,
    trace.palette = viridis::mako, fill = fill_color, beta.prefix = "^b_|^bsp_")
priors formula iterations chains thinning warmup diverg_transitions rhats > 1.05 min_bulk_ESS min_tail_ESS seed
1 coi-beta(1, 1) Intercept-student_t(3, 0, 2.5) phi-gamma(0.01, 0.01) sd-student_t(3, 0, 2.5) zoi-beta(1, 1) prop.callers ~ type + (1 | group.size.f) 10000 4 1 5000 4 (2e-04%) 0 18776.57 12800.29 1918215497
Estimate l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Aritificial_vs_Natural -0.282 -0.834 0.283 1 18776.57 12800.29

3.6.2.1.3 Posterior predictive checks
Code
custom_ppc(fit = mod, group = "type")

Takeaways

  • Similar proportion of calling individuals in natural and artificial groups

Session information

R version 4.4.2 (2024-10-31)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 22.04.5 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=es_CR.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=es_CR.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=es_CR.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C       

time zone: America/Costa_Rica
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] rstan_2.32.6        StanHeaders_2.32.10 extrafont_0.19     
 [4] ggsignif_0.6.4      metap_1.11          multtest_2.60.0    
 [7] Biobase_2.62.0      BiocGenerics_0.48.1 cowplot_1.1.3      
[10] bayestestR_0.14.0   brmsish_1.0.0       tidybayes_3.0.7    
[13] ggridges_0.5.6      kableExtra_1.4.0    brms_2.22.0        
[16] Rcpp_1.0.14         pbapply_1.7-2       readxl_1.4.3       
[19] knitr_1.50          ggplot2_3.5.1       viridis_0.6.5      
[22] viridisLite_0.4.2  

loaded via a namespace (and not attached):
  [1] Rdpack_2.6.1         mnormt_2.1.1         gridExtra_2.3       
  [4] formatR_1.14         inline_0.3.19        remotes_2.5.0       
  [7] sandwich_3.1-0       rlang_1.1.5          magrittr_2.0.3      
 [10] multcomp_1.4-25      qqconf_1.3.2         matrixStats_1.4.1   
 [13] compiler_4.4.2       mgcv_1.9-1           loo_2.8.0           
 [16] reshape2_1.4.4       systemfonts_1.1.0    vctrs_0.6.5         
 [19] stringr_1.5.1        pkgconfig_2.0.3      arrayhelpers_1.1-0  
 [22] crayon_1.5.3         fastmap_1.2.0        backports_1.5.0     
 [25] labeling_0.4.3       cmdstanr_0.8.1.9000  rmarkdown_2.29      
 [28] ps_1.9.0             purrr_1.0.4          xfun_0.52           
 [31] jsonlite_2.0.0       gghalves_0.1.4       parallel_4.4.2      
 [34] R6_2.6.1             stringi_1.8.7        extrafontdb_1.0     
 [37] mutoss_0.1-13        cellranger_1.1.0     numDeriv_2016.8-1.1 
 [40] estimability_1.5.1   zoo_1.8-12           bayesplot_1.11.1    
 [43] Matrix_1.7-0         splines_4.4.2        tidyselect_1.2.1    
 [46] rstudioapi_0.16.0    abind_1.4-8          yaml_2.3.10         
 [49] codetools_0.2-20     curl_6.2.2           processx_3.8.6      
 [52] pkgbuild_1.4.7       plyr_1.8.9           lattice_0.22-6      
 [55] tibble_3.2.1         withr_3.0.2          bridgesampling_1.1-2
 [58] posterior_1.6.0      coda_0.19-4.1        evaluate_1.0.3      
 [61] survival_3.7-0       sketchy_1.0.5        RcppParallel_5.1.9  
 [64] ggdist_3.3.2         xml2_1.3.6           pillar_1.10.1       
 [67] tensorA_0.36.2.1     packrat_0.9.2        checkmate_2.3.2     
 [70] stats4_4.4.2         insight_0.20.2       sn_2.1.1            
 [73] distributional_0.5.0 generics_0.1.3       mathjaxr_1.6-0      
 [76] rstantools_2.4.0     munsell_0.5.1        scales_1.3.0        
 [79] TFisher_0.2.0        xtable_1.8-4         glue_1.8.0          
 [82] emmeans_1.10.3       tools_4.4.2          xaringanExtra_0.8.0 
 [85] mvtnorm_1.3-1        grid_4.4.2           plotrix_3.8-4       
 [88] tidyr_1.3.1          ape_5.8              QuickJSR_1.4.0      
 [91] Rttf2pt1_1.3.12      rbibutils_2.2.16     colorspace_2.1-1    
 [94] nlme_3.1-165         cli_3.6.4            svUnit_1.0.6        
 [97] svglite_2.1.3        Brobdingnag_1.2-9    dplyr_1.1.4         
[100] V8_4.4.2             gtable_0.3.6         digest_0.6.37       
[103] TH.data_1.1-2        farver_2.1.2         htmlwidgets_1.6.4   
[106] htmltools_0.5.8.1    lifecycle_1.0.4      MASS_7.3-61