Code
# options to customize chunk outputs
knitr::opts_chunk$set(
tidy.opts = list(width.cutoff = 65),
tidy = TRUE,
message = FALSE
)feeding behavior covariates in harvestmen
# options to customize chunk outputs
knitr::opts_chunk$set(
tidy.opts = list(width.cutoff = 65),
tidy = TRUE,
message = FALSE
)# knitr is require for creating html/pdf/word reports
# formatR is used for soft-wrapping code
# install/ load packages
sketchy::load_packages(
packages = c(
"knitr",
"formatR",
"brms",
"readxl",
"ggplot2",
"brmsish",
"viridis",
"marginaleffects",
"loo",
"posterior"
)
)Warning: replacing previous import 'brms::rstudent_t' by 'ggdist::rstudent_t'
when loading 'brmsish'
Warning: replacing previous import 'brms::dstudent_t' by 'ggdist::dstudent_t'
when loading 'brmsish'
Warning: replacing previous import 'brms::qstudent_t' by 'ggdist::qstudent_t'
when loading 'brmsish'
Warning: replacing previous import 'brms::pstudent_t' by 'ggdist::pstudent_t'
when loading 'brmsish'
theme_paper <- function() {
list(
theme_classic(base_size = 14),
scale_color_viridis_d(
option = "G",
end = 0.9
),
scale_fill_viridis_d(
option = "G",
end = 0.9
)
)
}
plot_model_average_forest <- function(
fits,
loos,
delta = 2,
weighting = c("stacking", "pseudobma")) {
weighting <- match.arg(weighting)
library(brms)
library(loo)
library(posterior)
library(ggplot2)
## select top models
loo_tab <- loo_compare(loos)
keep <- rownames(loo_tab)[loo_tab[, "elpd_diff"] >= -delta]
fits <- fits[keep]
loos <- loos[keep]
## stacking weights
weights <- loo_model_weights(
loos,
method = weighting
)
## posterior draws from each model
draws <- lapply(
fits,
function(fit){
d <- as_draws_df(fit)
d <- d[
,
grepl("^b_", names(d)),
drop = FALSE
]
d
}
)
## union of all parameters
pars <- Reduce(
union,
lapply(draws, names)
)
## make all models contain the same parameters
for(i in seq_along(draws)){
missing <- setdiff(
pars,
names(draws[[i]])
)
if(length(missing) > 0){
for(p in missing)
draws[[i]][[p]] <- 0
}
draws[[i]] <- draws[[i]][, pars]
}
## weighted posterior draws
avg_draws <- draws[[1]]
avg_draws[,] <- 0
for(i in seq_along(draws)){
avg_draws <- avg_draws +
weights[i] * draws[[i]]
}
## summarize posterior
forest <- data.frame(
parameter = pars,
Estimate = apply(
avg_draws,
2,
median
),
Q2.5 = apply(
avg_draws,
2,
quantile,
.025
),
Q97.5 = apply(
avg_draws,
2,
quantile,
.975
)
)
## remove intercepts
forest <- subset(
forest,
!grepl("Intercept", parameter)
)
## convert to odds ratios
forest$Estimate <- exp(forest$Estimate)
forest$Q2.5 <- exp(forest$Q2.5)
forest$Q97.5 <- exp(forest$Q97.5)
## parse names
forest$parameter <- sub(
"^b_",
"",
forest$parameter
)
forest$behavior <- sub(
"_.*",
"",
forest$parameter
)
forest$term <- sub(
"^[^_]+_",
"",
forest$parameter
)
## transparency
forest$alpha <- ifelse(
forest$Q2.5 <= 1 &
forest$Q97.5 >= 1,
0.4,
1
)
## plot
ggplot(
forest,
aes(
x = Estimate,
y = reorder(term, Estimate)
)
) +
geom_vline(
xintercept = 1,
linetype = 2
) +
geom_pointrange(
aes(
xmin = Q2.5,
xmax = Q97.5,
alpha = alpha
)
) +
scale_alpha_identity() +
facet_wrap(
~behavior,
scales = "free_y"
) +
theme_classic() +
labs(
x = "Model-averaged odds ratio",
y = NULL
)
}# read data
substrate <- read_excel("./data/raw/data_v02.xlsx", sheet = "Substrate")
# convert tibble to data.frame
substrate <- as.data.frame(substrate)
# prepare data
# reshape data into long format
substrate_long <- reshape(substrate, varying = grep("^Substrate_",
names(substrate)), v.names = "substrate", timevar = "timepoint",
times = names(substrate)[grep("^Substrate_", names(substrate))],
direction = "long")
names(substrate_long)[ncol(substrate_long)] <- "Individual"
# 4. extract day
substrate_long$day <- as.numeric(sub(".*day-([0-9]+).*", "\\1", substrate_long$timepoint))
# 5. extract phase
substrate_long$phase <- sub("Substrate_([^_]+)_.*", "\\1", substrate_long$timepoint)
# 6. extract time of day
substrate_long$time <- sub(".*_([0-9]+AM|[0-9]+PM)$", "\\1", substrate_long$timepoint)
# 7. convert variables
substrate_long$substrate <- factor(substrate_long$substrate)
substrate_long$phase <- factor(substrate_long$phase)
substrate_long$time <- factor(substrate_long$time)
substrate_long$Individual <- factor(substrate_long$Individual)
# clean substrate category names
substrate_long$substrate <- make.names(substrate_long$substrate)
# convert to factor
substrate_long$substrate <- as.character(substrate_long$substrate)
substrate_long$substrate <- trimws(substrate_long$substrate)
substrate_long$substrate <- gsub("[^A-Za-z0-9_]", "_", substrate_long$substrate)
# remove NAs
substrate_long <- substrate_long[substrate_long$substrate != "NA_",
]
substrate_long$substrate <- factor(substrate_long$substrate, levels = unique(substrate_long$substrate))
names(substrate_long) <- gsub("treatment_", "", tolower(names(substrate_long)))
names(substrate_long)[names(substrate_long) == "substrate"] <- "preferred_substrate"Substrate use was analyzed using Bayesian categorical generalized linear mixed models
The response variable was preferred substrate, modeled as a categorical response.
The data were formatted so that each row represented a unique individual × sampling period observation.
Individual identity was included as a random intercept to account for repeated observations of the same individual through time.
Candidate fixed effects included:
sampling_periodfood_substrateleg_conditionsensory_leg_missingphaseA candidate-model approach was used to evaluate competing biological hypotheses.
Candidate models included:
sampling_period,food_substrate, leg_condition, sensory_leg_missing, and phase without sampling_period,sampling_period,sampling_period and every combination of the four biological predictors.In total, 47 competing models were fitted:
sampling_period,sampling_period,sampling_period,sampling_period.Default weakly informative priors implemented in brms were used.
Competing models were compared using approximate leave-one-out cross-validation (LOO), and support for alternative hypotheses was evaluated using differences in expected log predictive density (Δelpd; the model with the highest elpd is considered the best-supported model).
substrate_long$individual <- factor(substrate_long$individual)
substrate_long$food_substrate <- factor(substrate_long$food_substrate)
substrate_long$leg_condition <- factor(substrate_long$leg_condition)
substrate_long$sensory_leg_missing <- factor(substrate_long$sensory_leg_missing)
# convert time: 8AM _> 8, 2PM -> 14, 7PM -> 19
substrate_long$time <- sapply(as.character(substrate_long$time), function(x) switch(x,
`8AM` = 8, `2PM` = 14, `7PM` = 19))
substrate_long <- substrate_long[order(substrate_long$individual,
substrate_long$day, as.numeric(substrate_long$time)), ]
# a continous number for the sequence of sampling events
substrate_long$sampling_period <- 1
for (i in 2:nrow(substrate_long)) {
if (substrate_long$individual[i] == substrate_long$individual[i -
1]) {
substrate_long$sampling_period[i] <- substrate_long$sampling_period[i -
1] + 1
} else {
substrate_long$sampling_period[i] <- 1
}
}
# collapse levels
substrate_long$preferred_substrate <- ifelse(substrate_long$preferred_substrate %in%
c("rock", "sand"), substrate_long$preferred_substrate, "other_substrate")# predictors to evaluate
# predictors that may interact with day
# predictors that may interact with sampling_period
preds <- c(
"food_substrate",
"leg_condition",
"sensory_leg_missing",
"phase"
)
# generate all non-empty subsets
subsets <- unlist(
lapply(
seq_along(preds),
function(k) combn(preds, k, simplify = FALSE)
),
recursive = FALSE
)
# initialize formula list
formulas <- list()
# null model
formulas[["null"]] <-
preferred_substrate ~ (1 | individual)
# sampling period only
formulas[["sampling_period_only"]] <-
preferred_substrate ~ sampling_period + (1 | individual)
# generate models
for (s in subsets) {
pred_string <- paste(s, collapse = " + ")
## additive model (no sampling period)
formulas[[paste0(
"add_noperiod_",
paste(s, collapse = "_")
)]] <-
as.formula(
paste(
"preferred_substrate ~",
pred_string,
"+ (1 | individual)"
)
)
## additive model (with sampling period)
formulas[[paste0(
"add_",
paste(s, collapse = "_")
)]] <-
as.formula(
paste(
"preferred_substrate ~ sampling_period +",
pred_string,
"+ (1 | individual)"
)
)
## interaction model
formulas[[paste0(
"int_",
paste(s, collapse = "_")
)]] <-
as.formula(
paste(
"preferred_substrate ~ sampling_period * (",
pred_string,
") + (1 | individual)"
)
)
}
# fit all models
fits <- vector("list", length(formulas))
for (i in seq_along(formulas)) {
model_name <- names(formulas)[i]
cat(
"\n=====================================\n",
"Fitting model", i, "of", length(formulas), "\n",
model_name, "\n",
as.character(formulas[i]), "\n",
"=====================================\n"
)
fits[[i]] <- brm(
formula = formulas[[i]],
family = categorical(),
data = substrate_long,
chains = 1,
cores = 4,
iter = 4000,
control = list(
adapt_delta = 0.99,
max_treedepth = 15
),
file = paste0(
"./data/processed/fits_substrate/",
model_name,
".rds"
),
file_refit = "on_change"
)
}
names(fits) <- names(formulas)
saveRDS(fits, file = "./data/processed/fit_list_substrate.rds")
# compare models
loos <- lapply(fits, loo)
saveRDS(loos, file = "./data/processed/loo_list_substrate.rds")
global_fit <- substrate_fits[["add_food_substrate_leg_condition_sensory_leg_missing_phase"]]
saveRDS(global_fit, file = "./data/processed/global_model_fit_substrate.rds")loos <- readRDS("./data/processed/loo_list_substrate.rds")
loo_comp <- loo_compare(loos)Warning: Difference in performance potentially due to chance.See McLatchie and
Vehtari (2023) for details.
loo_comp <- as.data.frame(loo_comp)[, c("elpd_diff", "se_diff")]
loo_comp$model <- row.names(loo_comp)
kable(loo_comp[, c("model", "elpd_diff", "se_diff")], digits = 3,
col.names = c("model", "Δelpd", "SD elpd"), row.names = FALSE)| model | Δelpd | SD elpd |
|---|---|---|
| add_food_substrate_phase | 0.000 | 0.000 |
| add_food_substrate_sensory_leg_missing_phase | -0.815 | 0.661 |
| add_food_substrate_leg_condition_phase | -0.826 | 0.943 |
| add_food_substrate_leg_condition_sensory_leg_missing_phase | -1.547 | 1.284 |
| add_food_substrate | -1.810 | 2.822 |
| int_food_substrate_leg_condition | -2.384 | 4.157 |
| add_noperiod_food_substrate_phase | -2.449 | 2.809 |
| int_food_substrate | -2.587 | 3.325 |
| add_food_substrate_leg_condition_sensory_leg_missing | -2.640 | 3.061 |
| add_noperiod_food_substrate_leg_condition_phase | -2.962 | 2.922 |
| add_food_substrate_sensory_leg_missing | -2.965 | 2.887 |
| int_food_substrate_leg_condition_phase | -3.051 | 3.084 |
| add_noperiod_food_substrate_sensory_leg_missing_phase | -3.107 | 2.851 |
| int_food_substrate_phase | -3.374 | 1.778 |
| add_noperiod_food_substrate_leg_condition_sensory_leg_missing_phase | -4.129 | 3.029 |
| add_food_substrate_leg_condition | -4.183 | 2.960 |
| int_food_substrate_leg_condition_sensory_leg_missing_phase | -5.327 | 3.300 |
| int_food_substrate_sensory_leg_missing_phase | -5.562 | 2.095 |
| int_food_substrate_leg_condition_sensory_leg_missing | -5.851 | 4.323 |
| int_food_substrate_sensory_leg_missing | -6.063 | 3.515 |
| add_phase | -7.898 | 3.977 |
| add_sensory_leg_missing_phase | -8.206 | 4.044 |
| add_leg_condition_phase | -8.335 | 4.078 |
| add_leg_condition_sensory_leg_missing_phase | -8.341 | 4.186 |
| add_noperiod_food_substrate_leg_condition | -9.153 | 5.019 |
| add_noperiod_food_substrate | -9.336 | 4.956 |
| add_leg_condition | -9.571 | 5.049 |
| add_noperiod_phase | -9.678 | 4.799 |
| sampling_period_only | -9.682 | 4.990 |
| int_leg_condition_phase | -9.801 | 4.720 |
| add_noperiod_sensory_leg_missing_phase | -9.853 | 4.851 |
| int_phase | -9.934 | 4.046 |
| int_leg_condition | -9.940 | 5.507 |
| add_noperiod_food_substrate_leg_condition_sensory_leg_missing | -10.242 | 5.078 |
| add_noperiod_leg_condition_sensory_leg_missing_phase | -10.258 | 4.933 |
| add_sensory_leg_missing | -10.280 | 5.010 |
| add_noperiod_leg_condition_phase | -10.642 | 4.844 |
| add_noperiod_food_substrate_sensory_leg_missing | -10.792 | 4.986 |
| add_leg_condition_sensory_leg_missing | -10.969 | 5.144 |
| int_leg_condition_sensory_leg_missing | -11.119 | 5.742 |
| int_sensory_leg_missing | -11.447 | 5.067 |
| int_sensory_leg_missing_phase | -11.529 | 4.164 |
| int_leg_condition_sensory_leg_missing_phase | -11.880 | 4.916 |
| null | -16.659 | 6.403 |
| add_noperiod_leg_condition | -17.552 | 6.430 |
| add_noperiod_sensory_leg_missing | -17.928 | 6.407 |
| add_noperiod_leg_condition_sensory_leg_missing | -18.170 | 6.494 |
food_substrate.food_substrate perform substantially worse.global_fit <- readRDS("./data/processed/global_model_fit_substrate.rds")
pred_time <- avg_predictions(model = global_fit, newdata = datagrid(sampling_period = unique(substrate_long$sampling_period),
food_substrate = unique(substrate_long$food_substrate), phase = unique(substrate_long$phase),
leg_condition = unique(substrate_long$leg_condition), sensory_leg_missing = unique(substrate_long$sensory_leg_missing)),
type = "response", variables = c("sampling_period", "food_substrate"))
gg_sub1 <- ggplot(pred_time, aes(x = sampling_period, y = estimate,
colour = group, group = group)) + geom_line(linewidth = 1) + geom_point(size = 2) +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.1) +
labs(x = "Sampling period", y = "Predicted probability", colour = "Substrate") +
facet_grid(~food_substrate) + theme_paper()
saveRDS(gg_sub1, "./data/processed/gg_sub1.RDS")gg_sub1 <- readRDS("./data/processed/gg_sub1.RDS")
gg_sub1food_substrate and phase.phase consistently improves predictive performance relative to models containing only food_substrate.pred_time <- avg_predictions(model = global_fit, newdata = datagrid(sampling_period = unique(substrate_long$sampling_period),
food_substrate = unique(substrate_long$food_substrate), phase = unique(substrate_long$phase),
leg_condition = unique(substrate_long$leg_condition), sensory_leg_missing = unique(substrate_long$sensory_leg_missing)),
type = "response", variables = c("sampling_period", "phase", "food_substrate"))
gg_sub2 <- ggplot(pred_time, aes(x = sampling_period, y = estimate,
colour = group, group = group)) + geom_line(linewidth = 1) + geom_point(size = 2) +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.1) +
labs(x = "Sampling period", y = "Predicted probability", colour = "Substrate") +
facet_grid(food_substrate ~ phase) + theme_paper()
saveRDS(gg_sub2, "./data/processed/gg_sub2.RDS")gg_sub2 <- readRDS("./data/processed/gg_sub2.RDS")
gg_sub2food_substrate and phase.pred_time <- avg_predictions(model = global_fit, newdata = datagrid(sampling_period = unique(substrate_long$sampling_period),
food_substrate = unique(substrate_long$food_substrate), phase = unique(substrate_long$phase),
leg_condition = unique(substrate_long$leg_condition), sensory_leg_missing = unique(substrate_long$sensory_leg_missing)),
type = "response", variables = c("sampling_period", "phase", "food_substrate",
"sensory_leg_missing"))
ggsub3 <- ggplot(pred_time, aes(x = sampling_period, y = estimate,
colour = group, group = group)) + geom_line(linewidth = 1) + geom_point(size = 2) +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.1) +
labs(x = "Sampling period", y = "Predicted probability", colour = "Substrate") +
facet_grid(food_substrate ~ phase + sensory_leg_missing) + theme_paper()
saveRDS(ggsub3, "./data/processed/gg_sub3.RDS")gg_sub3 <- readRDS("./data/processed/gg_sub3.RDS")
gg_sub3top_models <- data.frame(Model = rownames(loo_comp)[1:10], loo_comp[1:10,
])
kable(top_models, digits = 1, row.names = FALSE)| Model | elpd_diff | se_diff | model |
|---|---|---|---|
| add_food_substrate_phase | 0.0 | 0.0 | add_food_substrate_phase |
| add_food_substrate_sensory_leg_missing_phase | -0.8 | 0.7 | add_food_substrate_sensory_leg_missing_phase |
| add_food_substrate_leg_condition_phase | -0.8 | 0.9 | add_food_substrate_leg_condition_phase |
| add_food_substrate_leg_condition_sensory_leg_missing_phase | -1.5 | 1.3 | add_food_substrate_leg_condition_sensory_leg_missing_phase |
| add_food_substrate | -1.8 | 2.8 | add_food_substrate |
| int_food_substrate_leg_condition | -2.4 | 4.2 | int_food_substrate_leg_condition |
| add_noperiod_food_substrate_phase | -2.4 | 2.8 | add_noperiod_food_substrate_phase |
| int_food_substrate | -2.6 | 3.3 | int_food_substrate |
| add_food_substrate_leg_condition_sensory_leg_missing | -2.6 | 3.1 | add_food_substrate_leg_condition_sensory_leg_missing |
| add_noperiod_food_substrate_leg_condition_phase | -3.0 | 2.9 | add_noperiod_food_substrate_leg_condition_phase |
food_substrate.phase.leg_condition or sensory_leg_missing substantially improves predictive performance.To evaluate the importance of temporal structure, models including sampling_period can be compared directly with their corresponding models excluding sampling_period.
time_comp <- data.frame(Comparison = c("food_substrate + phase", "food_substrate + leg_condition + phase",
"food_substrate + sensory_leg_missing + phase", "food_substrate + leg_condition + sensory_leg_missing + phase",
"food_substrate"), With_sampling_period = c("add_food_substrate_phase",
"add_food_substrate_leg_condition_phase", "add_food_substrate_sensory_leg_missing_phase",
"add_food_substrate_leg_condition_sensory_leg_missing_phase",
"add_food_substrate"), Without_sampling_period = c("add_noperiod_food_substrate_phase",
"add_noperiod_food_substrate_leg_condition_phase", "add_noperiod_food_substrate_sensory_leg_missing_phase",
"add_noperiod_food_substrate_leg_condition_sensory_leg_missing_phase",
"add_noperiod_food_substrate"))
time_comp$Delta_elpd <- NA
for (i in seq_len(nrow(time_comp))) {
with <- loo_comp[rownames(loo_comp) == time_comp$With_sampling_period[i],
"elpd_diff"]
without <- loo_comp[rownames(loo_comp) == time_comp$Without_sampling_period[i],
"elpd_diff"]
time_comp$Delta_elpd[i] <- without - with
}
kable(time_comp[, c("Comparison", "Delta_elpd")], digits = 1, col.names = c("Model",
expression(Delta * "elpd from adding sampling_period")), row.names = FALSE)| Model | Delta * “elpd from adding sampling_period” |
|---|---|
| food_substrate + phase | -2.4 |
| food_substrate + leg_condition + phase | -2.1 |
| food_substrate + sensory_leg_missing + phase | -2.3 |
| food_substrate + leg_condition + sensory_leg_missing + phase | -2.6 |
| food_substrate | -7.5 |
sampling_period consistently improves predictive performance.phase is included in the model.phase is omitted.These results suggest that:
fits_substrate <- readRDS("./data/processed/fit_list_substrate.rds")
loos <- readRDS("./data/processed/loo_list_substrate.rds")
# extract fixed effects
pmaf_sub <- plot_model_average_forest(fits_substrate, loos, delta = 2)
saveRDS(pmaf_sub, "./data/processed/pmaf_sub.RDS")# pmaf_sub <- readRDS('./data/processed/pmaf_sub.RDS') pmaf_subBehavioral responses were analyzed using Bayesian multivariate generalized linear mixed models
The response consisted of a multivariate matrix of five binary behavioral variables:
FLEEESCAPEBOBBINGCHEMICAL_RELEASEFREEZEEach behavior was modeled using a Bernoulli distribution with a logit link.
The data were reformatted so that each row represented a unique individual × sampling period combination, with the five behaviors forming the multivariate response.
Individual identity was included as a random intercept to account for repeated observations of the same individual through time.
Candidate fixed effects included:
sampling_periodTreatment_food_substrateLeg_conditionSensory_leg_missingA candidate-model approach was used to evaluate competing biological hypotheses.
Candidate models included:
sampling_period,sampling_period,sampling_period and each combination of biological predictors.In total, 23 competing models were fitted.
Default weakly informative priors implemented in brms were used.
Models were compared using approximate leave-one-out cross-validation (LOO), and support for competing hypotheses was evaluated using differences in expected log predictive density (Δelpd).
behavior <- read_excel("./data/raw/data_v02.xlsx", sheet = "behavior")
# convert tibble to data.frame
behavior <- as.data.frame(behavior)
# convert metadata variables
behavior$Individual <- factor(behavior$Individual)
behavior$Treatment_food_substrate <- factor(behavior$Treatment_food_substrate)
behavior$Leg_condition <- factor(behavior$Leg_condition)
behavior$Sensory_leg_missing <- factor(behavior$Sensory_leg_missing)
# identify metadata columns
meta_cols <- names(behavior)[
1:match("Sensory_leg_missing", names(behavior))
]
# identify behavior columns
behavior_cols <- names(behavior)[
(match("Sensory_leg_missing", names(behavior)) + 1):ncol(behavior)
]
# extract sampling periods
sampling_periods <- unique(
sub("_.*", "", behavior_cols)
)
# initialize list
behavior_list <- vector(
"list",
length(sampling_periods)
)
names(behavior_list) <- sampling_periods
# reshape to one row per individual × sampling period
for (i in seq_along(sampling_periods)) {
sp <- sampling_periods[i]
cols <- grep(
paste0("^", sp, "_"),
names(behavior),
value = TRUE
)
dat <- behavior[, c(meta_cols, cols)]
names(dat)[
(length(meta_cols) + 1):ncol(dat)
] <- sub(
paste0("^", sp, "_"),
"",
cols
)
beh_cols <- names(dat)[
(length(meta_cols) + 1):ncol(dat)
]
for (b in beh_cols) {
dat[[b]] <- ifelse(
tolower(trimws(dat[[b]])) == "no",
0,
1
)
dat[[b]] <- as.integer(dat[[b]])
}
dat$sampling_period <- sp
behavior_list[[i]] <- dat
}
# combine sampling periods
behavior_long <- do.call(
rbind,
behavior_list
)
# convert sampling period
behavior_long <- behavior_long[order(behavior_long$Individual, behavior_long$sampling_period), ]
behavior_long$sampling_period <- factor(
behavior_long$sampling_period,
levels = c(
"1A","1B",
"2A","2B",
"3A","3B",
"4A","4B",
"5A","5B"
),
ordered = TRUE
)
behavior_long$sampling_period <- 1
for (i in 2:nrow(behavior_long)) {
if (behavior_long$Individual[i] == behavior_long$Individual[i - 1]) {
behavior_long$sampling_period[i] <- behavior_long$sampling_period[i - 1] + 1
} else {
behavior_long$sampling_period[i] <- 1
}
}
# response variables
behaviors <- c(
"FLEE",
"ESCAPE",
"BOBBING",
"CHEMICAL_RELEASE",
"FREEZE"
)
# candidate predictors
preds <- c(
"Treatment_food_substrate",
"Leg_condition",
"Sensory_leg_missing"
)
# generate all predictor subsets
subsets <- unlist(
lapply(
seq_along(preds),
function(k) combn(preds, k, simplify = FALSE)
),
recursive = FALSE
)
# initialize formula list
formulas <- list()
# null model
formulas[["null"]] <-
"~ 1 + (1 | Individual)"
# sampling period only
formulas[["sampling_period_only"]] <-
"~ sampling_period + (1 | Individual)"
# generate candidate models
for (s in subsets) {
pred_string <- paste(s, collapse = " + ")
formulas[[paste0(
"add_noperiod_",
paste(s, collapse = "_")
)]] <-
paste(
"~",
pred_string,
"+ (1 | Individual)"
)
formulas[[paste0(
"add_",
paste(s, collapse = "_")
)]] <-
paste(
"~ sampling_period +",
pred_string,
"+ (1 | Individual)"
)
formulas[[paste0(
"int_",
paste(s, collapse = "_")
)]] <-
paste(
"~ sampling_period * (",
pred_string,
") + (1 | Individual)"
)
}
# fit models
fits_behavior <- vector(
"list",
length(formulas)
)
for (i in seq_along(formulas)) {
model_name <- names(formulas)[i]
cat(
"\n=====================================\n",
"Fitting model", i, "of", length(formulas), "\n",
model_name, "\n",
"=====================================\n"
)
# build multivariate formula
mv_formula <-
Reduce(
`+`,
lapply(
behaviors,
function(b) {
bf(
as.formula(
paste(
b,
formulas[[i]]
)
),
family = bernoulli()
)
}
)
) +
set_rescor(FALSE)
fits_behavior[[i]] <- brm(
formula = mv_formula,
data = behavior_long,
chains = 2,
cores = 2,
iter = 4000,
control = list(
adapt_delta = 0.99,
max_treedepth = 15
),
file = paste0(
"./data/processed/fits_behavior/",
model_name,
".rds"
),
file_refit = "on_change"
)
}
names(fits_behavior) <- names(formulas)
saveRDS(
fits_behavior,
file = "./data/processed/fit_list_behavior.rds"
)
saveRDS(
fits_behavior[["add_Treatment_food_substrate_Leg_condition_Sensory_leg_missing"]],
file = "./data/processed/global_model_fit_behavior.rds"
)
# compare models
loo_behavior <- lapply(
fits_behavior,
loo
)
saveRDS(
loo_behavior,
file = "./data/processed/loo_list_behavior.rds"
)loos <- readRDS("./data/processed/loo_list_behavior.rds")
loo_behavior <- loo_compare(loos)
loo_behavior <- as.data.frame(loo_behavior)[, c("elpd_diff", "se_diff")]
loo_behavior$model <- row.names(loo_behavior)
kable(loo_behavior[, c("model", "elpd_diff", "se_diff")], digits = 3,
col.names = c("model", "Δelpd", "SD elpd"), row.names = FALSE)| model | Δelpd | SD elpd |
|---|---|---|
| add_Leg_condition | 0.000 | 0.000 |
| add_Treatment_food_substrate_Leg_condition | -1.196 | 1.653 |
| add_Leg_condition_Sensory_leg_missing | -3.310 | 1.125 |
| add_Sensory_leg_missing | -3.380 | 2.246 |
| add_Treatment_food_substrate_Leg_condition_Sensory_leg_missing | -3.717 | 1.843 |
| sampling_period_only | -3.945 | 2.741 |
| int_Leg_condition | -4.218 | 1.921 |
| add_Treatment_food_substrate | -4.320 | 3.092 |
| int_Sensory_leg_missing | -4.858 | 3.561 |
| add_Treatment_food_substrate_Sensory_leg_missing | -4.922 | 2.616 |
| int_Treatment_food_substrate | -5.920 | 4.182 |
| int_Treatment_food_substrate_Leg_condition | -7.684 | 3.655 |
| int_Leg_condition_Sensory_leg_missing | -8.555 | 3.693 |
| int_Treatment_food_substrate_Sensory_leg_missing | -9.520 | 4.654 |
| int_Treatment_food_substrate_Leg_condition_Sensory_leg_missing | -9.745 | 4.823 |
| add_noperiod_Leg_condition_Sensory_leg_missing | -10.045 | 5.800 |
| add_noperiod_Leg_condition | -10.458 | 5.708 |
| add_noperiod_Treatment_food_substrate_Leg_condition | -11.833 | 5.910 |
| null | -12.349 | 6.194 |
| add_noperiod_Treatment_food_substrate_Leg_condition_Sensory_leg_missing | -12.543 | 6.033 |
| add_noperiod_Treatment_food_substrate | -12.946 | 6.377 |
| add_noperiod_Sensory_leg_missing | -13.350 | 5.990 |
| add_noperiod_Treatment_food_substrate_Sensory_leg_missing | -15.075 | 6.143 |
sampling_period and Leg_condition.Leg_condition as a main effect.sampling_period consistently outperform their corresponding models without sampling_period.global_fit <- readRDS("./data/processed/global_model_fit_behavior.rds")
pred_time <- avg_predictions(model = global_fit, newdata = datagrid(sampling_period = unique(behavior_long$sampling_period),
Leg_condition = unique(behavior_long$Leg_condition), Sensory_leg_missing = unique(behavior_long$Sensory_leg_missing)),
type = "response", variables = c("sampling_period"))
ggbeh1 <- ggplot(pred_time, aes(x = sampling_period, y = estimate,
group = group)) + geom_ribbon(aes(ymin = conf.low, ymax = conf.high),
fill = "grey80", alpha = 0.4, colour = NA) + geom_line(aes(colour = group),
linewidth = 1) + geom_point(aes(colour = group), size = 2) + labs(x = "Sampling period",
y = "Predicted probability", colour = "Substrate") + facet_grid(~group) +
theme_paper() + theme(legend.position = "none")
saveRDS(ggbeh1, "./data/processed/ggbeh1.RDS")ggbeh1 <- readRDS("./data/processed/ggbeh1.RDS")
ggbeh1Leg_condition.pred_time <- avg_predictions(model = global_fit, newdata = datagrid(sampling_period = unique(behavior_long$sampling_period),
Leg_condition = unique(behavior_long$Leg_condition), Sensory_leg_missing = unique(behavior_long$Sensory_leg_missing)),
type = "response", variables = c("sampling_period", "Leg_condition"))
ggbeh2 <- ggplot(pred_time, aes(x = sampling_period, y = estimate,
group = group)) + geom_ribbon(aes(ymin = conf.low, ymax = conf.high),
fill = "grey70", alpha = 0.4, colour = NA) + geom_line(aes(colour = group),
linewidth = 1) + geom_point(aes(colour = group), size = 2) + labs(x = "Sampling period",
y = "Predicted probability", colour = "Substrate") + facet_grid(Leg_condition ~
group) + theme_paper() + theme(legend.position = "none")
saveRDS(ggbeh2, "./data/processed/ggbeh2.RDS")ggbeh2 <- readRDS("./data/processed/ggbeh2.RDS")
ggbeh2pred_time <- avg_predictions(model = global_fit, newdata = datagrid(sampling_period = unique(behavior_long$sampling_period),
Leg_condition = unique(behavior_long$Leg_condition), Sensory_leg_missing = unique(behavior_long$Sensory_leg_missing)),
type = "response", variables = c("sampling_period", "Sensory_leg_missing"))
ggbeh3 <- ggplot(pred_time, aes(x = sampling_period, y = estimate,
group = group)) + geom_ribbon(aes(ymin = conf.low, ymax = conf.high),
fill = "grey70", alpha = 0.4, colour = NA) + geom_line(aes(colour = group),
linewidth = 1) + geom_point(aes(colour = group), size = 2) + labs(x = "Sampling period",
y = "Predicted probability", colour = "Substrate") + facet_grid(Sensory_leg_missing ~
group) + theme_paper() + theme(legend.position = "none")
saveRDS(ggbeh3, "./data/processed/ggbeh3.RDS")ggbeh3 <- readRDS("./data/processed/ggbeh3.RDS")
ggbeh3pred_time <- avg_predictions(model = global_fit, newdata = datagrid(sampling_period = unique(behavior_long$sampling_period),
Leg_condition = unique(behavior_long$Leg_condition), Sensory_leg_missing = unique(behavior_long$Sensory_leg_missing),
Treatment_food_substrate = unique(behavior_long$Treatment_food_substrate)),
type = "response", variables = c("sampling_period", "Treatment_food_substrate"))
ggbeh4 <- ggplot(pred_time, aes(x = sampling_period, y = estimate,
group = group)) + geom_ribbon(aes(ymin = conf.low, ymax = conf.high),
fill = "grey70", alpha = 0.4, colour = NA) + geom_line(aes(colour = group),
linewidth = 1) + geom_point(aes(colour = group), size = 2) + labs(x = "Sampling period",
y = "Predicted probability", colour = "Substrate") + facet_grid(Treatment_food_substrate ~
group) + theme_paper() + theme(legend.position = "none")
saveRDS(ggbeh4, "./data/processed/ggbeh4.RDS")ggbeh4 <- readRDS("./data/processed/ggbeh4.RDS")
ggbeh4sampling_period and either Treatment_food_substrate or Sensory_leg_missing perform substantially worse than additive models.sampling_period × Leg_condition.library(knitr)
top_models <- data.frame(Model = rownames(loo_behavior)[1:10], loo_behavior[1:10,
])
kable(top_models, digits = 1, row.names = FALSE)| Model | elpd_diff | se_diff | model |
|---|---|---|---|
| add_Leg_condition | 0.0 | 0.0 | add_Leg_condition |
| add_Treatment_food_substrate_Leg_condition | -1.2 | 1.7 | add_Treatment_food_substrate_Leg_condition |
| add_Leg_condition_Sensory_leg_missing | -3.3 | 1.1 | add_Leg_condition_Sensory_leg_missing |
| add_Sensory_leg_missing | -3.4 | 2.2 | add_Sensory_leg_missing |
| add_Treatment_food_substrate_Leg_condition_Sensory_leg_missing | -3.7 | 1.8 | add_Treatment_food_substrate_Leg_condition_Sensory_leg_missing |
| sampling_period_only | -3.9 | 2.7 | sampling_period_only |
| int_Leg_condition | -4.2 | 1.9 | int_Leg_condition |
| add_Treatment_food_substrate | -4.3 | 3.1 | add_Treatment_food_substrate |
| int_Sensory_leg_missing | -4.9 | 3.6 | int_Sensory_leg_missing |
| add_Treatment_food_substrate_Sensory_leg_missing | -4.9 | 2.6 | add_Treatment_food_substrate_Sensory_leg_missing |
Leg_condition.Treatment_food_substrate and/or Sensory_leg_missing.Temporal effects can be evaluated by comparing models that differ only by the inclusion of sampling_period.
time_comp <- data.frame(Comparison = c("Leg_condition", "Treatment_food_substrate + Leg_condition",
"Treatment_food_substrate + Leg_condition + Sensory_leg_missing",
"Treatment_food_substrate", "Sensory_leg_missing"), With_sampling_period = c("add_Leg_condition",
"add_Treatment_food_substrate_Leg_condition", "add_Treatment_food_substrate_Leg_condition_Sensory_leg_missing",
"add_Treatment_food_substrate", "add_Sensory_leg_missing"), Without_sampling_period = c("add_noperiod_Leg_condition",
"add_noperiod_Treatment_food_substrate_Leg_condition", "add_noperiod_Treatment_food_substrate_Leg_condition_Sensory_leg_missing",
"add_noperiod_Treatment_food_substrate", "add_noperiod_Sensory_leg_missing"))
time_comp$Delta_elpd <- NA
for (i in seq_len(nrow(time_comp))) {
with <- loo_behavior[rownames(loo_behavior) == time_comp$With_sampling_period[i],
"elpd_diff"]
without <- loo_behavior[rownames(loo_behavior) == time_comp$Without_sampling_period[i],
"elpd_diff"]
time_comp$Delta_elpd[i] <- without - with
}
kable(time_comp[, c("Comparison", "Delta_elpd")], digits = 1, col.names = c("Model",
"Δelpd from adding sampling_period"), row.names = FALSE)| Model | Δelpd from adding sampling_period |
|---|---|
| Leg_condition | -10.5 |
| Treatment_food_substrate + Leg_condition | -10.6 |
| Treatment_food_substrate + Leg_condition + Sensory_leg_missing | -8.8 |
| Treatment_food_substrate | -8.6 |
| Sensory_leg_missing | -10.0 |
sampling_period consistently improves predictive performance across all candidate models.interaction_comp <- data.frame(Comparison = c("Leg_condition", "Treatment_food_substrate",
"Sensory_leg_missing"), Additive = c("add_Leg_condition", "add_Treatment_food_substrate",
"add_Sensory_leg_missing"), Interaction = c("int_Leg_condition",
"int_Treatment_food_substrate", "int_Sensory_leg_missing"))
interaction_comp$Delta_elpd <- NA
for (i in seq_len(nrow(interaction_comp))) {
add <- loo_behavior[rownames(loo_behavior) == interaction_comp$Additive[i],
"elpd_diff"]
int <- loo_behavior[rownames(loo_behavior) == interaction_comp$Interaction[i],
"elpd_diff"]
interaction_comp$Delta_elpd[i] <- int - add
}
kable(interaction_comp, digits = 1, col.names = c("Predictor", "Additive model",
"Interaction model", "Δelpd"), row.names = FALSE)| Predictor | Additive model | Interaction model | Δelpd |
|---|---|---|---|
| Leg_condition | add_Leg_condition | int_Leg_condition | -4.2 |
| Treatment_food_substrate | add_Treatment_food_substrate | int_Treatment_food_substrate | -1.6 |
| Sensory_leg_missing | add_Sensory_leg_missing | int_Sensory_leg_missing | -1.5 |
Leg_condition to vary across sampling periods improves predictive performance.Treatment_food_substrate or Sensory_leg_missing reduce predictive performance, indicating that these additional complexities are not supported by the data.Model-averaged odds ratios (median posterior estimates ± 95% credible intervals) for predictors of five behavioral responses obtained by Bayesian model averaging across all candidate models within Δelpd ≤ 2 of the best-supported model. Odds ratios greater than one indicate an increased probability of exhibiting a given behavior, whereas values less than one indicate a decreased probability relative to the reference category. Effects whose 95% credible intervals exclude one are shown with full opacity, whereas effects whose credible intervals overlap one (e.g. “non-significant”) are displayed with reduced transparency, highlighting predictors for which the evidence is weaker. Only predictors retained in the set of best-supported models are displayed, such that the figure summarizes the effects receiving the strongest support after accounting for model-selection uncertainty.
fits_behavior <- readRDS("./data/processed/fit_list_behavior.rds")
loos <- readRDS("./data/processed/loo_list_behavior.rds")
# extract fixed effects
pmaf_beh <- plot_model_average_forest(fits_behavior, loos, delta = 2)
saveRDS(pmaf_beh, "./data/processed/pmaf_beh.RDS")# pmaf_beh <- readRDS('./data/processed/pmaf_beh.RDS') pmaf_beh─ Session info ───────────────────────────────────────────────────────────────
setting value
version R version 4.5.2 (2025-10-31)
os Ubuntu 22.04.4 LTS
system x86_64, linux-gnu
ui X11
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz America/Costa_Rica
date 2026-06-29
pandoc 3.6.3 @ /usr/lib/rstudio/resources/app/bin/quarto/bin/tools/x86_64/ (via rmarkdown)
quarto 1.8.25 @ /usr/lib/rstudio/resources/app/bin/quarto/bin/quarto
─ Packages ───────────────────────────────────────────────────────────────────
package * version date (UTC) lib source
abind 1.4-8 2024-09-12 [1] CRAN (R 4.5.2)
ape 5.8-1 2024-12-16 [1] CRAN (R 4.5.2)
arrayhelpers 1.1-0 2020-02-04 [1] CRAN (R 4.5.2)
backports 1.5.1 2026-04-03 [1] CRAN (R 4.5.2)
bayesplot 1.15.0 2025-12-12 [1] CRAN (R 4.5.2)
bridgesampling 1.2-1 2025-11-19 [1] CRAN (R 4.5.2)
brms * 2.23.0 2025-09-09 [1] CRAN (R 4.5.2)
brmsish * 1.0.0 2026-03-03 [1] Github (maRce10/brmsish@81ab826)
Brobdingnag 1.2-9 2022-10-19 [1] CRAN (R 4.5.2)
cachem 1.1.0 2024-05-16 [1] CRAN (R 4.5.2)
cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.5.2)
checkmate 2.3.4 2026-02-03 [1] CRAN (R 4.5.2)
cli 3.6.6 2026-04-09 [1] CRAN (R 4.5.2)
coda 0.19-4.1 2024-01-31 [1] CRAN (R 4.5.2)
codetools 0.2-20 2024-03-31 [1] CRAN (R 4.5.2)
cowplot 1.2.0 2025-07-07 [1] CRAN (R 4.5.2)
crayon 1.5.3 2024-06-20 [1] CRAN (R 4.5.2)
curl 7.0.0 2025-08-19 [1] CRAN (R 4.5.2)
data.table 1.18.2.1 2026-01-27 [1] CRAN (R 4.5.2)
devtools 2.4.6 2025-10-03 [1] CRAN (R 4.5.2)
digest 0.6.39 2025-11-19 [1] CRAN (R 4.5.2)
distributional 0.6.0 2026-01-14 [1] CRAN (R 4.5.2)
dplyr 1.2.1 2026-04-03 [1] CRAN (R 4.5.2)
ellipsis 0.3.2 2021-04-29 [3] CRAN (R 4.1.1)
evaluate 1.0.5 2025-08-27 [1] CRAN (R 4.5.2)
farver 2.1.2 2024-05-13 [1] CRAN (R 4.5.2)
fastmap 1.2.0 2024-05-15 [1] CRAN (R 4.5.2)
formatR * 1.14 2023-01-17 [1] CRAN (R 4.5.2)
fs 2.0.1 2026-03-24 [1] CRAN (R 4.5.2)
generics 0.1.4 2025-05-09 [1] CRAN (R 4.5.2)
ggdist 3.3.3 2025-04-23 [1] CRAN (R 4.5.2)
ggplot2 * 4.0.2 2026-02-03 [1] CRAN (R 4.5.2)
glue 1.8.0 2024-09-30 [1] CRAN (R 4.5.2)
gridExtra 2.3 2017-09-09 [3] CRAN (R 4.0.1)
gtable 0.3.6 2024-10-25 [1] CRAN (R 4.5.2)
htmltools 0.5.9 2025-12-04 [1] CRAN (R 4.5.2)
htmlwidgets 1.6.4 2023-12-06 [1] CRAN (R 4.5.2)
inline 0.3.21 2025-01-09 [1] CRAN (R 4.5.2)
jsonlite 2.0.0 2025-03-27 [1] CRAN (R 4.5.2)
kableExtra 1.4.0 2024-01-24 [1] CRAN (R 4.5.2)
knitr * 1.51 2025-12-20 [1] CRAN (R 4.5.2)
labeling 0.4.3 2023-08-29 [1] CRAN (R 4.5.2)
lattice 0.22-9 2026-02-09 [1] CRAN (R 4.5.2)
lifecycle 1.0.5 2026-01-08 [1] CRAN (R 4.5.2)
loo * 2.9.0 2025-12-23 [1] CRAN (R 4.5.2)
magrittr 2.0.5 2026-04-04 [1] CRAN (R 4.5.2)
marginaleffects * 0.32.0 2026-02-14 [1] CRAN (R 4.5.2)
Matrix 1.7-4 2025-08-28 [1] CRAN (R 4.5.2)
matrixStats 1.5.0 2025-01-07 [1] CRAN (R 4.5.2)
memoise 2.0.1 2021-11-26 [3] CRAN (R 4.1.2)
mvtnorm 1.3-3 2025-01-10 [1] CRAN (R 4.5.2)
nlme 3.1-168 2025-03-31 [1] CRAN (R 4.5.2)
otel 0.2.0 2025-08-29 [1] CRAN (R 4.5.2)
packrat 0.9.3 2025-06-16 [1] CRAN (R 4.5.2)
pbapply 1.7-4 2025-07-20 [1] CRAN (R 4.5.2)
pillar 1.11.1 2025-09-17 [1] CRAN (R 4.5.2)
pkgbuild 1.4.8 2025-05-26 [1] CRAN (R 4.5.2)
pkgconfig 2.0.3 2019-09-22 [3] CRAN (R 4.0.1)
pkgload 1.5.1 2026-04-01 [1] CRAN (R 4.5.2)
posterior * 1.6.1 2025-02-27 [1] CRAN (R 4.5.2)
purrr 1.2.2 2026-04-10 [1] CRAN (R 4.5.2)
QuickJSR 1.9.0 2026-01-25 [1] CRAN (R 4.5.2)
R6 2.6.1 2025-02-15 [1] CRAN (R 4.5.2)
RColorBrewer 1.1-3 2022-04-03 [1] CRAN (R 4.5.2)
Rcpp * 1.1.1 2026-01-10 [1] CRAN (R 4.5.2)
RcppParallel 5.1.11-2 2026-03-05 [1] CRAN (R 4.5.2)
readxl * 1.4.5 2025-03-07 [1] CRAN (R 4.5.2)
remotes 2.5.0 2024-03-17 [1] CRAN (R 4.5.2)
rlang 1.2.0 2026-04-06 [1] CRAN (R 4.5.2)
rmarkdown 2.31 2026-03-26 [1] CRAN (R 4.5.2)
rstan 2.32.7 2025-03-10 [1] CRAN (R 4.5.2)
rstantools 2.6.0 2026-01-10 [1] CRAN (R 4.5.2)
rstudioapi 0.18.0 2026-01-16 [1] CRAN (R 4.5.2)
S7 0.2.1 2025-11-14 [1] CRAN (R 4.5.2)
scales 1.4.0 2025-04-24 [1] CRAN (R 4.5.2)
sessioninfo 1.2.3 2025-02-05 [1] CRAN (R 4.5.2)
sketchy 1.0.7 2026-03-03 [1] CRANs (R 4.5.2)
StanHeaders 2.32.10 2024-07-15 [1] CRAN (R 4.5.2)
stringi 1.8.7 2025-03-27 [1] CRAN (R 4.5.2)
stringr 1.6.0 2025-11-04 [1] CRAN (R 4.5.2)
svglite 2.2.2 2025-10-21 [1] CRAN (R 4.5.2)
svUnit 1.0.8 2025-08-26 [1] CRAN (R 4.5.2)
systemfonts 1.3.1 2025-10-01 [1] CRAN (R 4.5.2)
tensorA 0.36.2.1 2023-12-13 [1] CRAN (R 4.5.2)
textshaping 1.0.4 2025-10-10 [1] CRAN (R 4.5.2)
tibble 3.3.1 2026-01-11 [1] CRAN (R 4.5.2)
tidybayes 3.0.7 2024-09-15 [1] CRAN (R 4.5.2)
tidyr 1.3.2 2025-12-19 [1] CRAN (R 4.5.2)
tidyselect 1.2.1 2024-03-11 [1] CRAN (R 4.5.2)
usethis 3.2.1 2025-09-06 [1] CRAN (R 4.5.2)
V8 8.2.0 2026-04-21 [1] CRAN (R 4.5.2)
vctrs 0.7.3 2026-04-11 [1] CRAN (R 4.5.2)
viridis * 0.6.5 2024-01-29 [1] CRAN (R 4.5.2)
viridisLite * 0.4.3 2026-02-04 [1] CRAN (R 4.5.2)
withr 3.0.2 2024-10-28 [1] CRAN (R 4.5.2)
xaringanExtra 0.8.0 2024-05-19 [1] CRAN (R 4.5.2)
xfun 0.57 2026-03-20 [1] CRAN (R 4.5.2)
xml2 1.5.2 2026-01-17 [1] CRAN (R 4.5.2)
yaml 2.3.12 2025-12-10 [1] CRAN (R 4.5.2)
[1] /home/m/R/x86_64-pc-linux-gnu-library/4.5
[2] /usr/local/lib/R/site-library
[3] /usr/lib/R/site-library
[4] /usr/lib/R/library
* ── Packages attached to the search path.
──────────────────────────────────────────────────────────────────────────────