Can we back-transform response variables to use intuitive units in the results???
Make sure all models run for the same number of iterations and number of chains
Purpose
Generate simulated data
Run statistical analyses
Load packages and configure global settings
Code
# knitr is require for creating html/pdf/word reports formatR is# used for soft-wrapping code# install/ load packagessketchy::load_packages(packages =c("viridis", "ggplot2", "knitr","readxl", "pbapply", "brms", "kableExtra", "ggridges", "tidybayes","brmsish", "bayestestR", "cowplot", bioconductor ="multtest","metap", "ggsignif", "extrafont"))
── R CMD build ─────────────────────────────────────────────────────────────────
* checking for file ‘/tmp/Rtmp24mpXd/file69a28a746b16/DESCRIPTION’ ... OK
* preparing ‘multtest’:
* checking DESCRIPTION meta-information ... OK
* cleaning src
* checking for LF line-endings in source and make files and shell scripts
* checking for empty or unneeded directories
* looking to see if a ‘data/datalist’ file should be added
* building ‘multtest_2.64.0.tar.gz’
Code
source("~/Dropbox/R_package_testing/brmsish/R/helpers.R")source("~/Dropbox/R_package_testing/brmsish/R/draw_extended_summary.R")source("~/Dropbox/R_package_testing/brmsish/R/extended_summary.R")opts_chunk$set(fig.width =12, fig.height =8)theme_set(theme_classic(base_size =24))fill_color <- viridis::mako(10, alpha =0.5)[7]graph_cols <-mako(4, begin =0, end =0.8)graph_cols_transp <-mako(4, begin =0, end =0.8, alpha =0.5)names(graph_cols_transp) <-names(graph_cols) <-c("solo", "natural","artificial", "simulated")chains <-4iter <-10000options(brms.backend ="cmdstanr")
Custom functions
Code
# Coefficient of variationcv <-function(x) sd(x) /mean(x) *100# Function to measure the euclidean distance in 3D space between bats in a group flight trajectorypairwise_dist <-function(X) {# keep only 3D coordinates data xyz <- X[, grep("^x|^y|^z", names(X))]# remove empty columns xyz <- xyz[, sapply(xyz, function(x)sum(is.na(x))) <10]# set to long format lng_xyz <-data.frame(x =stack(xyz[, grep("^x", names(xyz))])[, 1],y =stack(xyz[, grep("^y", names(xyz))])[, 1],z =stack(xyz[, grep("^z", names(xyz))]))names(lng_xyz)[names(lng_xyz) =="z.values"] <-"z" lng_xyz$ind <-gsub("z_", "", lng_xyz$z.ind) lng_xyz$z.ind <-NULL# lng_xyz$frame <- X$frame lng_xyz$frame <-1:nrow(X)# mean distance among all bats obs_dists <-sapply(unique(lng_xyz$frame), function(x)mean(dist(lng_xyz[lng_xyz$frame == x, c("x", "y", "z")]))) out <-data.frame(group = X$group[1],group.size =length(unique(lng_xyz$ind)),distance = obs_dists,type = X$type[1],frame =1:nrow(X) )# add group size as factor out$group.size.factor <-factor(out$group.size, levels =1:10, ordered =TRUE)return(out)}# function to create simulated data getting one individual trajectory from diferent groups for a total of individual equals to group_sizes[x]sim_group_traj <-function(group_sizes, seed, trajectories =FALSE) { trajs <- indiv_traj_list# randomize so each iteration is a differentset.seed(seed) trajs <-sample(trajs) dists_list <-list() group_num <-1 used_trajs <-list()for (e inseq_along(group_sizes)) { i <- group_sizes[e] sample_traj_indx <-which(!duplicated(names(trajs)))[1:i]set.seed(group_num + seed) sample_trajs <- trajs[sample_traj_indx]# force all to have the same number of frames min_frames <-min(sapply(sample_trajs, nrow)) sample_trajs <-lapply(sample_trajs, function(x) x[1:min_frames, ]) sample_traj_df <-do_call(cbind, sample_trajs)names(sample_traj_df) <-paste(c("x", "y", "z"), rep(1:i, 3), sep ="_") sample_traj_df$group <-paste("sim", group_num, sep ="-") sample_traj_df$type <-"simulated" dist_df <-pairwise_dist(sample_traj_df) dist_df$type <-"simulated"# add to output list dists_list[[length(dists_list) +1]] <- dist_df# remove sampled trajectories from trajs based on indices so idnviduals are only used once trajs <- trajs[-sample_traj_indx] group_num <- group_num +1names(sample_trajs) <-paste("sim", e, 1:length(sample_trajs), sep ="-") used_trajs <-c(used_trajs, sample_trajs) } dists_df <-do.call(rbind, dists_list)if (!trajectories)return(dists_df) elsereturn(used_trajs)}# function to simulate group calling from solo calling vectorssim_group_call <-function(group_sizes, seed) { indv_calls <-split(time_calls_indiv, list(time_calls_indiv$year.audio))# randomize so each iteration is a differentset.seed(seed) indv_calls <-sample(indv_calls)# dists_list <- list()# group_num <- 1# used_calls <- list() sim_call_list <-lapply(seq_along(group_sizes), function(i) { e <- group_sizes[i]# set.seed(e + seed) sampled_calls <- indv_calls[sample(1:length(indv_calls), e)] clms <-c("start", "end", "indiv", "year.audio") sampled_calls <-lapply(sampled_calls, function(x) x[, clms]) sample_calls_df <-do_call(rbind, sampled_calls) sample_calls_df$sound.files <-paste(i, e, sep ="-") sample_calls_df$year.audio sample_calls_df$group.size <- ereturn(sample_calls_df) }) sim_call_df <-do.call(rbind, sim_call_list)return(sim_call_df)}# Function to calculate contrasts based on a brms draws object# Function to calculate contrasts based on posterior draws and plot the posterior distributionsdraws_contrasts <-function(draws, predictor_name, basal_level,fill_color ="blue",beta.prefix ="^b_") {# Extract the model coefficients coef_names <-colnames(draws) intercept_name <-grep("^b_Intercept$", coef_names, value =TRUE) group_names <-grep(paste0(beta.prefix, predictor_name), coef_names, value =TRUE)# Determine the levels of the categorical predictor levels <-c(basal_level, sub(paste0(beta.prefix, predictor_name), "", group_names))# Calculate EMMs for each level group_values <-list() group_values[[1]] <- draws[[intercept_name]]for (i inseq_along(group_names)) { group_values[[i +1]] <- draws[[intercept_name]] + draws[[group_names[i]]] }# Compute pairwise contrasts contrast_list <-list() contrast_names <-combn(levels, 2, function(x)paste(x[2], "-", x[1]))for (i in1:length(contrast_names)) { contrast_list[[i]] <- group_values[[which(levels ==strsplit(contrast_names[i], " - ")[[1]][2])]] - group_values[[which(levels ==strsplit(contrast_names[i], " - ")[[1]][1])]] }names(contrast_list) <- contrast_names# Function to summarize contrasts summarize_contrast <-function(contrast) { mean_contrast <-mean(contrast) lower_contrast <-quantile(contrast, 0.025) upper_contrast <-quantile(contrast, 0.975)return(c(mean = mean_contrast,lower = lower_contrast,upper = upper_contrast )) }# Summarize each contrast summary_contrasts <-sapply(contrast_list, summarize_contrast, simplify ="array")# Convert summary to a data frame for plotting contrast_summary_df <-data.frame(Contrast =rep(names(contrast_list), each =3),Statistic =rep(c("Mean", "Lower", "Upper"), times =length(contrast_list)),Value =as.vector(summary_contrasts) )# flip summary summary_contrasts <-t(summary_contrasts)colnames(summary_contrasts) <-c("est", "l-95% CI", "u-95% CI") summary_contrasts <-html_format_coef_table(as.data.frame(summary_contrasts), fill = fill_color, highlight =TRUE)# Plot the posterior distributions of contrasts contrast_df <-do.call(rbind, lapply(names(contrast_list), function(name) {data.frame(Contrast = name, Value = contrast_list[[name]]) })) p <-ggplot(contrast_df, aes(x = Value)) +geom_density(alpha =0.6,fill = fill_color,color = fill_color) +geom_vline(xintercept =0, linetype ="dotted") +facet_wrap(~ Contrast, scales ="free_y", ncol =1) +theme_minimal() +theme(axis.title.y =element_blank(),axis.text.y =element_blank(),axis.ticks.y =element_blank(),plot.title =element_blank() ) results <-list(plot = p,contrasts = summary_contrasts)return(results) }# to create several posterior predictive check plots out of a brms fitcustom_ppc <-function(fit, group =NULL, ndraws =30) { by_group <-if (!is.null(group)){TRUE } elseFALSEif (by_group) by_group <-if (any(names(fit$data) == group)){TRUE } elseFALSEif (by_group) by_group <-if (is.character(fit$data[, group]) |is.factor(fit$data[, group])){TRUE } elseFALSEif (by_group) { ppc_dens <-pp_check(fit,ndraws = ndraws,type ='dens_overlay_grouped',group = group) pp_mean <-pp_check( fit,type ="stat_grouped",stat ="mean",group = group,ndraws = ndraws ) +theme_classic() pp_scat <-pp_check(fit,type ="scatter_avg",# group = group,ndraws = ndraws) } else { ppc_dens <-pp_check(fit,ndraws = ndraws,type ='dens_overlay') pp_mean <-pp_check(fit,type ="stat",stat ="mean",ndraws = ndraws) +theme_classic() pp_scat <-pp_check(fit,type ="scatter_avg",ndraws = ndraws) } pp_stat2 <-pp_check(fit, type ="stat_2d", ndraws = ndraws) pp_plot_list <-list(ppc_dens, pp_mean, pp_scat, pp_stat2) pp_plot_list[c(1, 3:4)] <-lapply(pp_plot_list[c(1, 3:4)], function(x) x +scale_color_viridis_d(begin =0.3,end =0.8,alpha =0.5,option ="mako", ) +scale_fill_viridis_d(begin =0.3,end =0.8,alpha =0.5,option ="mako" ) +theme_classic()) ppc_plot <-plot_grid(plotlist = pp_plot_list, ncol =2)print(ppc_plot)}## caller sequence features# Function to calculate entropyentropy <-function(sequence) { freq <-table(strsplit(sequence, NULL)[[1]]) # Frequency of each letter prob <- freq /sum(freq) # Probability of each letter entropy_value <--sum(prob *log2(prob)) # Shannon entropyreturn(entropy_value)}# Function to calculate normalized entropynormalized_entropy <-function(sequence) { H <-entropy(sequence) unique_elements <-length(unique(strsplit(sequence, NULL)[[1]])) max_entropy <-log2(unique_elements) H_norm <- H / max_entropyreturn(H_norm)}# Function to calculate entropy of a probability distributionentropy2 <-function(probs) { non_zero_probs <- probs[probs >0] # Ignore zero probabilities H <--sum(non_zero_probs *log2(non_zero_probs))return(H)}# Function to create a Markov model and calculate average entropymarkov_entropy <-function(sequence, normalize =FALSE) { states <-strsplit(sequence, NULL)[[1]] transitions <-table(head(states, -1), tail(states, -1)) # Transition counts transition_probs <-prop.table(transitions, 1) # Row-wise probabilities# Calculate entropy for each row (state) in the transition matrix state_entropies <-apply(transition_probs, 1, entropy2)# Average entropy across all states avg_entropy <-mean(state_entropies, na.rm =TRUE)if (normalize) avg_entropy <- avg_entropy /log2(length(unique(states)))return(avg_entropy)}
# get number of individuals per groupn_per_group <-sapply(unique(group_traj$group), USE.NAMES =FALSE,function(x) {# get those for that group X <- group_traj[group_traj$group == x, ]# count number of individuals (No NAs) n_indiv <- (sum(!sapply(X, anyNA)) -3)/3return(n_indiv) })table(n_per_group)
n_per_group
2 3 4 5 6
7 8 7 8 4
1.2 Compute pairwise distances for natural and artificial groups
Code
pairwise_dists_list <-sapply(split(group_traj, group_traj$group), pairwise_dist, simplify =FALSE)pairwise_dists <-do.call(rbind, pairwise_dists_list)# remove those with unlikely distancepairwise_dists <- pairwise_dists[pairwise_dists$distance >0.1& pairwise_dists$distance <7, ]
1.3 Generate simulated group flight trajectories
Code
## split into single individual trajectories split in a listsplit_group_traj <-split(group_traj, group_traj$group)# get single individual trajectoresindiv_traj_list <-lapply(split_group_traj, function(x) { x <- x[, sapply(x, function(x) sum(is.na(x))) <10]# get number of individualssuppressWarnings(indv_num <-unique(na.omit(as.numeric(gsub(".*?([0-9]+).*","\\1", names(x)))))) indiv_trajs <-lapply(indv_num, function(y) x[, grep(y, names(x))])return(indiv_trajs)})indiv_traj_list <-unlist(indiv_traj_list, recursive =FALSE, use.names =TRUE)# remove last character of the list names so all have the# original group namenames(indiv_traj_list) <-gsub(".$", "", names(indiv_traj_list))# mean number of individuals per group size in original datamean_n_indiv <-floor(mean(table(n_per_group)))group_sizes <-as.numeric(names(table(n_per_group)))group_sizes <-rep(group_sizes, each = mean_n_indiv)sim_dists <-lapply(1:30, function(x) sim_group_traj(group_sizes,seed = x))sim_trajs_list <-lapply(1:30, function(x) sim_group_traj(group_sizes,seed = x, trajectories =TRUE))# remove those with unlikely distancesim_dists <-lapply(sim_dists, function(x) { x <- x[x$distance >0.1& x$distance <7.81, ]return(x)})
1.4 Describe data
Mean distance between individuals in a group flight:
Code
sim_dists_df <-do.call(rbind, sim_dists)# mean distance by group typeagg <-aggregate(distance ~ type, sim_dists_df, mean)agg$sd <-aggregate(distance ~ type, sim_dists_df, sd)$distance# mean distance by group typeagg2 <-aggregate(distance ~ type, pairwise_dists, mean)agg2$sd <-aggregate(distance ~ type, pairwise_dists, sd)$distancerbind(agg, agg2)
type
distance
sd
simulated
2.050082
0.5696759
artificial
2.005765
0.6803455
natural
1.670052
0.7256127
Closest distance between individuals in a group flight:
Code
sim_dists_df <-do.call(rbind, sim_dists)# mean distance by group typeagg_min <-aggregate(distance ~ type + group, sim_dists_df, min)agg <-aggregate(distance ~ type, agg_min, mean)agg$sd <-aggregate(distance ~ type, agg_min, sd)$distance# mean distance by group typeagg_min2 <-aggregate(distance ~ type + group, pairwise_dists, min)agg2 <-aggregate(distance ~ type, agg_min2, mean)agg2$sd <-aggregate(distance ~ type, agg_min2, sd)$distancerbind(agg, agg2)
type
distance
sd
simulated
0.6592624
0.3312211
artificial
0.9213170
0.5873041
natural
0.7190676
0.3595199
1.5 Plot raw data
1.5.1 Reduce dimensionality
1.6 Generate simulated group flight trajectories
Code
## split into single individual trajectories# split in a listsplit_group_traj <-split(group_traj, group_traj$group)# pca funfun1 <-function(x) prcomp(x, center =FALSE, scale. =FALSE)$x[,1]# horizontal plane fun# fun1 <- function(x) x[,3]indiv_traj_pc1 <-lapply(indiv_traj_list, fun1)pca_list <-lapply(unique(names(indiv_traj_pc1)), function(x){ pca_df <-data.frame(indiv_traj_pc1[names(indiv_traj_pc1) == x])names(pca_df) <-paste0("ind", 1:ncol(pca_df)) stacked_pca_df <-stack(pca_df) stacked_pca_df$frame <-1:nrow(pca_df) stacked_pca_df$group <- x stacked_pca_df$group.size <-ncol(pca_df)return(stacked_pca_df)})pca_df <-do.call(rbind, pca_list)# for simulated datasim_traj_pc1 <-lapply(sim_trajs_list[[1]], function(x) x[,3])# pca_sim_list <- lapply(unique, function(x){pca_sim_list <-lapply(paste("sim", 1:30, "", sep ="-"), function(x){ pca_df <-data.frame(sim_traj_pc1[grep(pattern = x, names(sim_traj_pc1))])names(pca_df) <-paste0("ind", 1:ncol(pca_df)) stacked_pca_df <-stack(pca_df) stacked_pca_df$frame <-1:nrow(pca_df) stacked_pca_df$group <- x stacked_pca_df$group.size <-ncol(pca_df)return(stacked_pca_df)})pca_sim_df <-do.call(rbind, pca_sim_list)pca_sim_df$type <-"simulated"pca_df$type <-sapply(pca_df$group, function(x) pairwise_dists$type[group_traj$group == x][1])#bindpca_df <-rbind(pca_df, pca_sim_df)# order groups by typepca_df$group <-factor(pca_df$group, levels =unique(pca_df$group[order(pca_df$type)]))pca_df <- pca_df[complete.cases(pca_df), ]pca_df$type <-factor(pca_df$type, levels =c("natural", "artificial", "simulated"))# table(pca_df$type, pca_df$group.size)#get 3 names of group for each type levelset.seed(1)selected_groups <-lapply(unique(pca_df$type), function(x) {unique(pca_df$group[pca_df$type == x & pca_df$group.size ==4])[1:3]})pca_df <- pca_df[pca_df$frame <26, ]ggplot(pca_df[pca_df$group %in%unlist(selected_groups), ], aes(x = frame, y = values, group = ind, color = type)) +# geom_point() + geom_smooth(se =FALSE, span =0.3) +theme_classic() +# theme(legend.position = "none") + labs(x ="Frame", y ="Horizontal plane\nposition (m)") +# ylim(c = c(-3, 3)) +facet_wrap( dir ="v", type~ group, #scales = "free_y",ncol =3) +theme(strip.background =element_blank(),strip.text.x =element_blank()) +scale_color_viridis_d(option ="G",end =0.8,alpha =0.6)
1.6.1 Raw distances
All group sizes combined:
Code
# add original data to simulated data and calculate proximitynat_sim_dist_list <-lapply(sim_dists, function(x) {return(rbind(x, pairwise_dists))})# remove distances above 10 mnat_sim_dist_list <-lapply(nat_sim_dist_list, function(x) { x <- x[x$distance <10, ] x$type <-factor(x$type, levels =c("natural", "artificial", "simulated"))return(x)})dat_all <- dat <- nat_sim_dist_list[[1]]dat_all$group.size <-"All sizes"dat <-rbind(dat, dat_all)dat$group.size.factor <-factor(dat$group.size, levels =c(1:10, "All sizes"))n_grps <-aggregate(group ~ group.size.factor + type, dat, function(x)length(unique(x)))n_grps$frames <-aggregate(group ~ group.size.factor + type, dat, function(x)length(x))$groupn_grps$indivs <- n_grps$group *as.numeric(as.character(n_grps$group.size.factor))n_grps$group.size.factor <-factor(n_grps$group.size.factor)n_grps$distance <-ifelse(n_grps$group.size.factor %in%2:3,-1.2,-0.9)n_grps$distance[1] <--0.8n_grps$indivs[n_grps$group.size.factor =="All sizes"] <-aggregate(indivs ~ type, n_grps, sum)$indivsn_grps$n.labels <-paste0(n_grps$group, " / ", n_grps$indivs, " / ", n_grps$frames)n_grps$n.labels[1] <-" n = 3 groups /\n 6 individuals /\n 149 frames"n_grps$n.labels[6] <-" n = 18 groups /\n 74 individuals /\n 888 frames"n_grps$distance[6] <--0.3n_grps$distance[c(11, 17)] <--0.4custom_labels <-unique(nat_sim_dist_list[[1]]$group.size)custom_labels <-paste(c("Group size = ", rep("", 5)), custom_labels)names(custom_labels) <-unique(nat_sim_dist_list[[1]]$group.size)custom_labels <-unique(dat$group.size)custom_labels <-paste(c("Group size = ", rep("", 5)), custom_labels)names(custom_labels) <-unique(dat$group.size)# mean distance by group typeagg_gp <-aggregate(distance ~ type + group + group.size.factor, dat, mean)agg_gp$group.size.factor <-factor(agg_gp$group.size.factor)# raincloud plot:gg_all <-ggplot(dat[dat$group.size =="All sizes", ],aes(y = distance,x = type,color = type,fill = type )) +geom_signif(y_position =c(4.35, 4.75),xmin =c(1, 1),xmax =c(2, 3),annotation =c('beta:0.32~ (0.19 - 0.45)', 'beta:0.36~ (0.24 - 0.47)'), parse =TRUE,tip_length =0.02,textsize =3,size =0.5,color ="gray60",vjust =0.2 ) +# add half-violin from {ggdist} package ggdist::stat_halfeye(# fill = fill_color,alpha =0.5,# custom bandwidthadjust = .5,# adjust heightwidth = .6,.width =0,# move geom to the crightjustification =-.2,point_colour =NA ) +geom_boxplot(# fill = fill_color,width = .15,# remove outliersoutlier.shape =NA) +geom_boxplot(fill =adjustcolor("orange", alpha.f =0.1),data = agg_gp[!agg_gp$group.size %in%2:5,],aes(y = distance, x = type),color =adjustcolor("orange", alpha.f =0.6),width = .1,# remove outliersoutlier.shape =NA ) +# add justified jitter from the {gghalves} package gghalves::geom_half_point(# color = fill_color,# draw jitter on the leftside ="l",# control range of jitterrange_scale = .4,# add some transparencyalpha = .5,transformation = ggplot2::position_jitter(height =0) ) +# scale_color_viridis_d(option = "G", end = 0.8) +# scale_fill_viridis_d(option = "G",# end = 0.8,# alpha = 0.6) +scale_color_manual(values = graph_cols[2:4]) +scale_fill_manual(values = graph_cols_transp[2:4]) + gghalves::geom_half_point(data = agg_gp[agg_gp$group.size !="All sizes", ],aes(y = distance, x = type),color ="orange",# draw jitter on the leftside ="l",pch =20,size =4,# control range of jitterrange_scale = .4,# add some transparencyalpha = .5,transformation = ggplot2::position_jitter(height =0) ) +ylim(c(-0.4, 4.8)) +scale_x_discrete(labels =c("natural"="Natural","artificial"="Artificial","simulated"="Simulated" )) +theme(legend.position ="none") +labs(x ="Flight type", y ="Distance (m)") +facet_wrap(~ group.size,ncol =1,labeller =labeller(group.size =c("All sizes"="All group sizes"))) +geom_text(lineheight =0.8,data = n_grps[!n_grps$group.size.factor %in%2:6, ],aes(y = distance, x = type, label = n.labels),color ="gray60",nudge_x =0,size =3 )# add significancegg_all
Code
# raincloud plot:gg_gs <-ggplot(dat[dat$group.size %in%2:5,],aes(y = distance,x = type,color = type,fill = type )) +# add half-violin from {ggdist} package ggdist::stat_halfeye(# fill = fill_color,alpha =0.6,# custom bandwidthadjust = .5,# adjust heightwidth = .6,.width =0,# move geom to the crightjustification =-.2,point_colour =NA ) +# add justified jitter from the {gghalves} package gghalves::geom_half_point(# color = fill_color,# draw jitter on the leftside ="l",# control range of jitterrange_scale = .4,# add some transparencyalpha = .3,transformation = ggplot2::position_jitter(height =0) ) +# scale_color_viridis_d(option = "G", end = 0.8) +# scale_fill_viridis_d(option = "G",# end = 0.8,# alpha = 0.6) +scale_color_manual(values = graph_cols[2:4]) +scale_fill_manual(values = graph_cols_transp[2:4]) + gghalves::geom_half_point(data = agg_gp[agg_gp$group.size %in%2:5, ],aes(y = distance, x = type),color ="orange",# draw jitter on the leftside ="l",pch =20,size =4,# control range of jitterrange_scale = .4,# add some transparencyalpha = .5,transformation = ggplot2::position_jitter(height =0) ) +geom_boxplot(# fill = fill_color,width = .15,# remove outliersoutlier.shape =NA) +geom_boxplot(fill =adjustcolor("orange", alpha.f =0.1),data = agg_gp[agg_gp$group.size %in%2:5, ],aes(y = distance, x = type),color =adjustcolor("orange", alpha.f =0.6),width = .1,# remove outliersoutlier.shape =NA ) +ylim(c(-1.2, 4.8)) +scale_x_discrete(labels =c("natural"="Natural","artificial"="Artificial","simulated"="Simulated" )) +theme(legend.position ="none") +facet_wrap(~ group.size.factor,ncol =2,labeller =labeller(group.size.factor = custom_labels) ) +labs(x ="Flight type", y ="Pairwise flight distance (m)") +geom_text(lineheight =0.8,data = n_grps[n_grps$group.size.factor %in%2:5, ],aes(y = distance, x = type, label = n.labels),color ="gray60",nudge_x =0,size =2.3 )# gg_gspg <-plot_grid( gg_gs +theme_classic(base_size =15) +theme(legend.position ="none",axis.text.x =element_text(angle =45, hjust =1),axis.title.x =element_blank() ), gg_all +theme_classic(base_size =15) +theme(legend.position ="none", axis.title.y =element_blank()) +labs(x =""),ncol =2 )# Add a common x-axis labelpg <-ggdraw(pg) +draw_label("Group type",x =0.51,y =0.02,vjust =0,size =15 )hedFont <-"Arial"pg <- pg +theme(plot.title =element_text(size =20, family = hedFont, face ="bold"))pg
Add the group roost entry data to each simulated set and run a bayesian mixed effects model coordination on roost entry for each data set
The mean pairwise spatial distance between individuals at each frame was used as the response variable, while group type (natural, artificial or simulated) was used as predictor
Group ID and group size were included as varying effects.
The posterior distributions from the 30 models were combined into a single model fit for statistical inference
Calculate time lapse between the first individual to enter the roost for each group:
Code
group_dat <- dat[!is.infinite(dat$entry.time) &!is.na(dat$entry.time) & dat$type !="Individual", ]# get time difference between entriesgroup_dat_l <-lapply(unique(group_dat$group), function(x) {# print(x) X <- group_dat[group_dat$group == x, ] X <- X[!is.na(X$entry.time), ] X <- X[order(X$entry.time), ]# X$entry.time.diff <- X$entry.time - min(X$entry.time) X$entry.time.diff <-c(NA, X$entry.time[-1] - X$entry.time[-nrow(X)]) X <- X[!is.na(X$entry.time.diff), ]# add 1 milisecond to 0 X$entry.time.diff[X$entry.time.diff ==0] <-NA X$group.size <-if (nrow(X) >0)nrow(X) -1elsevector()return(X)})group_dat <-do.call(rbind, group_dat_l)
2.3 Simulating entry time lapse data
Simulated 30 data sets, each one with a number of simulated groups for each group size (2 to 5 individuals) similar to those in the natural group data. This approach allowed us to propagate uncertainty from random group simulations into our statistical inference.
Code
group_dat <- dat[!is.infinite(dat$entry.time) &!is.na(dat$entry.time) & dat$type !="Individual", ]# get difference to first entrygroup_dat_l <-lapply(unique(group_dat$group), function(x) {# print(x) X <- group_dat[group_dat$group == x, ] X <- X[!is.na(X$entry.time), ] X <- X[order(X$entry.time), ]# X$entry.time.diff <- X$entry.time - min(X$entry.time) X$entry.time.diff <-c(NA, X$entry.time[-1] - X$entry.time[-nrow(X)]) X <- X[!is.na(X$entry.time.diff), ] X$group.size <-if (nrow(X) >0)nrow(X) +1elsevector()return(X)})group_dat <-do.call(rbind, group_dat_l)group_dat$type <-factor(group_dat$type, levels =c("Artificial","Natural", "Simulated"))# make random groups from individual flightsindiv_dat <- dat[!is.infinite(dat$entry.time) &!is.na(dat$entry.time) & dat$type =="Individual", ]indivs <-unique(indiv_dat$Individuo)group_sizes <- group_dat$group.size[!duplicated(group_dat$Video)]# use only group sizes 2:5group_sizes <- group_sizes[group_sizes <=5]table(group_sizes)
# custom_labels <- unique(sim_groups_l[[3]]$group.size)# custom_labels <- paste("group size = ", custom_labels)# names(custom_labels) <- unique(sim_groups_l[[3]]$group.size)# table(call_rate_by_group$group.size, call_rate_by_group$type)cols <- graph_cols[2:4]cols_transp <- graph_cols_transp[2:4]names(cols) <-names(cols_transp) <-c("Natural", "Artificial", "Simulated")# raincloud plot:gg_entry <-ggplot(sim_groups_l[[1]],aes(y = entry.time.diff,x = type,color = type,fill = type )) +# add half-violin from {ggdist} package ggdist::stat_halfeye(# fill = fill_color,alpha =0.5,# custom bandwidthadjust = .5,# adjust heightwidth = .6,.width =0,# move geom to the crightjustification =-.2,point_colour =NA ) +geom_boxplot(# fill = fill_color,width = .15,# remove outliersoutlier.shape =NA) +# add justified jitter from the {gghalves} package gghalves::geom_half_point(# color = fill_color,# draw jitter on the leftside ="l",# control range of jitterrange_scale = .4,# add some transparencyalpha = .5,transformation = ggplot2::position_jitter(height =0) ) +scale_color_manual(values = cols) +scale_fill_manual(values = cols_transp) +# scale_color_viridis_d(option = "G", end = 0.8) +# scale_fill_viridis_d(option = "G",# end = 0.8,# alpha = 0.6) +# ylim(c(-0.1, 4.8)) +scale_x_discrete(labels =c("natural"="Natural","artificial"="Artificial","simulated"="Simulated" )) +theme(legend.position ="none") +labs(x ="Group type", y ="Time difference (s)")agg_grp_time <-aggregate(entry.time.diff ~ type + group, sim_groups_l[[1]], mean)n_grps <-aggregate(group ~ type + group.size, sim_groups_l[[1]], function(x)length(unique(x)))n_grps$indivs <- n_grps$group * n_grps$group.sizeagg_grps <-aggregate(group ~ type, n_grps, sum)agg_grps$indivs <-aggregate(indivs ~ type, n_grps, sum)$indivsagg_grps$n.labels <-paste0(agg_grps$group, " / ", agg_grps$indivs)agg_grps$n.labels[1] <-" n = 13 groups /\n 50 individuals"agg_grps$distance <--15gg_entry <- gg_entry +ylim(c(-20, 210)) +geom_signif(y_position =c(170, 190, 210),xmin =c(1, 1, 2),xmax =c(3, 2, 3),annotation =c('beta:-1.71~ (-2.31 - -1.103)', 'beta:-0.99~ (-1.59 - -0.40)', 'beta:-0.72 ~ (-1.30 - -0.12)'), parse =TRUE,tip_length =0.02,textsize =3,size =0.5,color ="gray60",vjust =0.2 ) + gghalves::geom_half_point(data = agg_grp_time,aes(y = entry.time.diff, x = type),color ="orange",# draw jitter on the leftside ="l",pch =20,size =4,# control range of jitterrange_scale = .4,# add some transparencyalpha = .5,transformation = ggplot2::position_jitter(height =0) ) +geom_boxplot(fill =adjustcolor("orange", alpha.f =0.1),data = agg_grp_time,aes(y = entry.time.diff, x = type),color =adjustcolor("orange", alpha.f =0.6),width = .1,# remove outliersoutlier.shape =NA ) +geom_text(lineheight =0.8,data = agg_grps,aes(y = distance, x = type, label = n.labels),color ="gray60",nudge_x =0,size =4 ) +theme_classic(base_size =12) +theme(legend.position ="none")ggsave("./output/roost_entry_coordination_1_panel.png", gg_entry, grDevices::png,width =4.5,height =3.5,dpi =300)
By group size:
Code
# raincloud plot:ggplot(sim_groups_l[[1]],aes(y = entry.time.diff,x = type,color = type,fill = type )) +# add half-violin from {ggdist} package ggdist::stat_halfeye(# fill = fill_color,alpha =0.5,# custom bandwidthadjust = .5,# adjust heightwidth = .6,.width =0,# move geom to the crightjustification =-.2,point_colour =NA ) +geom_boxplot(# fill = fill_color,width = .15,# remove outliersoutlier.shape =NA) +# add justified jitter from the {gghalves} package gghalves::geom_half_point(# color = fill_color,# draw jitter on the leftside ="l",# control range of jitterrange_scale = .4,# add some transparencyalpha = .5,transformation = ggplot2::position_jitter(height =0) ) +scale_color_viridis_d(option ="G", end =0.8) +scale_fill_viridis_d(option ="G",end =0.8,alpha =0.6) +# ylim(c(-0.1, 4.8)) +scale_x_discrete(labels =c("natural"="Natural","artificial"="Artificial","simulated"="Simulated" )) +theme(legend.position ="none") +facet_wrap(~ group.size.factor,ncol =2,labeller =labeller(group.size.factor = custom_labels) ) +labs(x ="Flight type", y ="Time difference (s)")
2.5 Regression model
Add the group roost entry data to each simulated set and run a bayesian mixed effects model coordination on roost entry for each data set
The difference in time between when entering the roost between consecutive individuals was used as the response variable (modeled with a normal distribution), while group type (natural, artificial or simulated) was used as predictor
Group ID was included as varying effect
The posterior distributions from the 30 models were combined into a single model fit for statistical inference
mod <-brm(formula = calls |resp_rate(flight.time) ~ type +mo(group.size.f) + (1| indiv), iter = iter, thin =1, data = call_rate_indiv, family =negbinomial(),silent =2, chains = chains, cores = chains, control =list(adapt_delta =0.99,max_treedepth =15), file ="./data/processed/individual_call_rate_solo_vs_group_group_size_monot_negbinomial",file_refit ="always")mod <-add_criterion(mod, criterion ="loo")null_mod <-brm(formula = calls |resp_rate(flight.time) ~1+ (1| indiv), iter = iter, thin =1, data = call_rate_indiv, family =negbinomial(),silent =2, chains = chains, cores = chains, control =list(adapt_delta =0.99,max_treedepth =15), file ="./data/processed/null_model_individual_call_rate_solo_vs_group_group_size_monot_negbinomial",file_refit ="always")null_mod <-add_criterion(null_mod, criterion ="loo")
3.2.2.2.1 Results
3.2.2.2.1.1 Model performance vs null model
Code
mod <-readRDS("./data/processed/individual_call_rate_solo_vs_group_group_size_monot.rds")null_mod <-readRDS("./data/processed/null_model_individual_call_rate_solo_vs_group_group_size_monot.rds")loo_compare(mod, null_mod)
mod <-brm(formula =log(cv.gaps +1) ~ type +mo(group.size.f) + (1| group), iter = iter, thin =1, data = cv_gaps, family =gaussian(),silent =2, chains = chains, cores = chains, file ="./data/processed/cv_gaps_solo_vs_group",file_refit ="always")mod <-add_criterion(mod, criterion ="loo")null_mod <-brm(formula =log(cv.gaps +1) ~1+ (1| group), iter = iter,thin =1, data = cv_gaps, family =gaussian(), silent =2, chains = chains,cores = chains, file ="./data/processed/null_cv_gaps_solo_vs_group",file_refit ="always")null_mod <-add_criterion(null_mod, criterion ="loo")beepr::beep(2)
3.4.1.2.1 Results
3.4.1.2.1.1 Model performance vs null model
Code
mod <-readRDS("./data/processed/cv_gaps_solo_vs_group.rds")null_mod <-readRDS("./data/processed/null_cv_gaps_solo_vs_group.rds")loo_compare(mod, null_mod)
Response modeled with an exponential distribution.
Code
mod <-brm(formula =log(gaps +1) ~ type +mo(group.size.f) + (1| indiv) + (1| group), iter = iter, thin =1, data = sub_gaps_indiv,family =gaussian(), silent =2, chains = chains, cores = chains,control =list(adapt_delta =0.99, max_treedepth =15), file_refit ="always",file ="./data/processed/group_gaps_solo_vs_group_group_size_monot")custom_ppc(mod, group ="type")mod <-add_criterion(mod, criterion ="loo")null_mod <-brm(formula =log(gaps +1) ~1+ (1| indiv) + (1| group),iter = iter, thin =1, data = sub_gaps_indiv, family =gaussian(),silent =2, chains = chains, cores = chains, control =list(adapt_delta =0.99,max_treedepth =15), file_refit ="always", file ="./data/processed/null_group_gaps_solo_vs_group_group_size_monot")null_mod <-add_criterion(null_mod, criterion ="loo")beepr::beep(2)
3.4.2.2.1 Results
3.4.2.2.2 Model performance vs null model
Code
mod <-readRDS("./data/processed/group_gaps_solo_vs_group_group_size_monot.rds")null_mod <-readRDS("./data/processed/null_group_gaps_solo_vs_group_group_size_monot.rds")comp <-loo_compare(mod, null_mod)print(comp)
elpd_diff se_diff
mod 0.0 0.0
null_mod -165.3 24.3
mod <-brm(formula = norm.markov.entropy ~ type, iter = iter, thin =1,data = seq_entropy_data, family =zero_one_inflated_beta(), silent =2,chains = chains, cores = chains, control =list(adapt_delta =0.99,max_treedepth =15), file_refit ="always", file ="./data/processed/entropy_by_experiment_type_group_size_monot")custom_ppc(mod, group ="type")mod <-add_criterion(mod, criterion ="loo")null_mod <-brm(formula = norm.markov.entropy ~1, iter = iter, thin =1,data = seq_entropy_data, family =zero_one_inflated_beta(), silent =2,chains = chains, cores = chains, control =list(adapt_delta =0.99,max_treedepth =15), file_refit ="always", file ="./data/processed/null_model_entropy_by_experiment_type_group_size_monot")null_mod <-add_criterion(null_mod, criterion ="loo")
3.5.2.1 Results
3.5.2.1.1 Model performance vs null model
Code
mod <-readRDS("./data/processed/entropy_by_experiment_type_group_size_monot.rds")null_mod <-readRDS("./data/processed/null_model_entropy_by_experiment_type_group_size_monot.rds")comp <-loo_compare(mod, null_mod)print(comp)
No difference in predictability between natural and artificial groups
3.6 Number of calling individuals per experiment
3.6.1 Plot raw data
All groups combined:
Code
# raincloud plot:ggplot(seq_entropy_data,aes(y = prop.callers,x = type,color = type,fill = type )) +# add half-violin from {ggdist} package ggdist::stat_halfeye(# fill = fill_color,alpha =0.5,# custom bandwidthadjust = .5,# adjust heightwidth = .6,.width =0,# move geom to the crightjustification =-.2,point_colour =NA ) +geom_boxplot(# fill = fill_color,width = .15,# remove outliersoutlier.shape =NA) +# add justified jitter from the {gghalves} package gghalves::geom_half_point(# color = fill_color,# draw jitter on the leftside ="l",# control range of jitterrange_scale = .4,# add some transparencyalpha = .5,transformation = ggplot2::position_jitter(height =0) ) +scale_color_viridis_d(option ="G", end =0.8) +scale_fill_viridis_d(option ="G",end =0.8,alpha =0.6) +# ylim(c(-0.1, 4.8)) +# scale_x_discrete(labels = c(# "natural" = "Natural",# "artificial" = "Artificial",# "simulated" = "Simulated"# )) +theme(legend.position ="none") +labs(x ="Flight type", y ="Proportion of unique callers")
Warning in bandwidth_dpi(): Bandwidth calculation failed.
→ Falling back to `bandwidth_nrd0()`.
ℹ This often occurs when a sample contains many duplicates, which suggests that
a dotplot (e.g., `geom_dots()`) or histogram (e.g., `density_histogram()`,
`stat_slab(density = 'histogram')`, or `stat_histinterval()`) may better
represent the data.
Caused by error in `bw.SJ()`:
! sample is too sparse to find TD
By group size:
Code
# raincloud plot:ggplot(seq_entropy_data[seq_entropy_data$group.size <6, ],aes(y = prop.callers,x = type,color = type,fill = type )) +# add half-violin from {ggdist} package ggdist::stat_halfeye(# fill = fill_color,alpha =0.5,# custom bandwidthadjust = .5,# adjust heightwidth = .6,.width =0,# move geom to the crightjustification =-.2,point_colour =NA ) +geom_boxplot(# fill = fill_color,width = .15,# remove outliersoutlier.shape =NA) +# add justified jitter from the {gghalves} package gghalves::geom_half_point(# color = fill_color,# draw jitter on the leftside ="l",# control range of jitterrange_scale = .4,# add some transparencyalpha = .5,transformation = ggplot2::position_jitter(height =0) ) +scale_color_viridis_d(option ="G", end =0.8) +scale_fill_viridis_d(option ="G",end =0.8,alpha =0.6) +# ylim(c(-0.1, 4.8)) +# scale_x_discrete(labels = c(# "natural" = "Natural",# "artificial" = "Artificial",# "simulated" = "Simulated"# )) +theme(legend.position ="none") +facet_wrap(~ group.size,ncol =2 ) +labs(x ="Flight type", y ="Proporiton of unique callers")
3.6.2 Regression model
Code
mod <-brm(formula = prop.callers ~ type + (1| group.size.f), iter = iter,thin =1, data = seq_entropy_data, family =zero_one_inflated_beta(),silent =2, chains = chains, cores = chains, control =list(adapt_delta =0.99,max_treedepth =15), file_refit ="always", file ="./data/processed/unique.callers_by_experiment_type_group_size_monot")custom_ppc(mod, group ="type")mod <-add_criterion(mod, criterion ="loo")null_mod <-brm(formula = prop.callers ~1+ (1| group.size.f), iter = iter,thin =1, data = seq_entropy_data, family =zero_one_inflated_beta(),silent =2, chains = chains, cores = chains, control =list(adapt_delta =0.99,max_treedepth =15), file_refit ="always", file ="./data/processed/null_model_unique.callers_by_experiment_type_group_size_monot")null_mod <-add_criterion(null_mod, criterion ="loo")
3.6.2.1 Results
3.6.2.1.1 Model performance vs null model
Code
mod <-readRDS("./data/processed/unique.callers_by_experiment_type_group_size_monot.rds")null_mod <-readRDS("./data/processed/null_model_unique.callers_by_experiment_type_group_size_monot.rds")comp <-loo_compare(mod, null_mod)print(comp)