First install/load packages. The following code will load packages and install those that have not been previously installed:
Set parameters and variable names for later use:
# define number of iterations for MCMCglmm models
itrns <- 60000
# parameter names for response calls
rsp.prms <- c("mean_freq", "entropy", "skewness", "num_syllables", "syll_duration", "syll_rate", "PC1_consistency")
# parameter names for response calls
inq.prms <- c("call_duration", "mean_freq", "entropy", "freq_range", "modulation_index", "freq_slope")
# color palette for plots
cols <- inferno(10, alpha = 0.6)
# for ggplot significance lines
dif.range <- function(x, na.rm = TRUE) max(x, na.rm = na.rm) - min(x, na.rm = na.rm)
# ggplot2 theme
theme_set(theme_classic(base_size = 14))
# position of points in ggplot2
pos <- position_dodge(width= 0.5)
# repetitions for classification routine
reps <- 100
Read “extended selection tables” (RDS files) containing both metadata and acoustic data for inquiry and response calls and measure spectrographic parameters using specan()
function from warbleR
package:
# inquiry calls extended selection table
est.inq <- readRDS("~/Dropbox/Projects/Ontogenia respuesta Thyroptera/suppl_mat/ext_sel_tab_inquiry.RDS")
# response calls extended selection table
est.rsp <- readRDS("~/Dropbox/Projects/Ontogenia respuesta Thyroptera/suppl_mat/ext_sel_tab_response.RDS")
# set global options for warbleR package functions
warbleR_options(parallel = parallel::detectCores() - 1, bp = "frange", fast = FALSE, threshold = 15, ovlp = 90)
# get spectral parameters for inquiry calls
sp.inq <- specan(est.inq, wl = 300)
# measure mel cepstral coef stats
mfcc.inq <- mfcc_stats(X = est.inq, wl = 300, nbands = 10)
# get spectral parameters for response calls
sp.rsp <- specan(X = est.rsp, wl = 600)
# measure mel cepstral coef stats
mfcc.rsp <- mfcc_stats(X = est.rsp, wl = 80, nbands = 10)
# remove columns that won't be used
sp.rsp$top.freq <- sp.rsp$bottom.freq <- sp.inq$top.freq <- sp.inq$bottom.freq <- NULL
# put together mfcc and spectral parameters
sp.inq <- merge(sp.inq, mfcc.inq, by = c("sound.files", "selec"))
# add metadata to measurements inquiry calls
sp.inq <- merge(sp.inq, est.inq[ ,c("sound.files", "selec", "sex", "age.class", "indiv", "age")], by = c("sound.files", "selec"))
# put together mfcc and spectral parameters
sp.rsp <- merge(sp.rsp, mfcc.rsp, by = c("sound.files", "selec"))
# add metadata to measurements response calls
sp.rsp <- merge(sp.rsp, est.rsp[ ,c("sound.files", "selec", "start", "end", "call", "sex", "age.class", "indiv", "age")], by = c("sound.files", "selec"))
# fix entropy standarizing by syllable duration
sp.inq$entropy <- lm(entropy ~ duration, data = sp.inq)$residuals
# fix entropy standarizing by syllable duration
sp.rsp$entropy[!is.na(sp.rsp$entropy)] <- lm(entropy ~ duration, data = sp.rsp[!is.na(sp.rsp$entropy), ])$residuals
# rename parameters to be used in stats
names(sp.inq)[names(sp.inq) %in% c("duration", "meanfreq", "entropy", "dfslope", "dfrange", "modindx")] <- inq.prms
Response calls measurements were made at the syllable level. Call level parameters must be measured:
# measure call parameters
sp.rsp.cll <- song_param(sp.rsp, song_colm = "call", sd = TRUE, mean_colm = c("duration", "meanfreq", "entropy", "modindx", "dfrange", "skew", grep("kurt.cc|mean.d|var.d1|median.c|min.cc|max.cc|skew.cc", names(sp.rsp), value = TRUE)), na.rm = TRUE)
Add coefficient of variation for selected parameters and then calculate PCA on call consistency:
# calculate Coefficient of variation
sp.rsp.cll$cv.duration <- sp.rsp.cll$duration / sp.rsp.cll$sd.duration
sp.rsp.cll$cv.meanfreq <- sp.rsp.cll$meanfreq / sp.rsp.cll$sd.meanfreq
sp.rsp.cll$cv.entropy <- sp.rsp.cll$entropy / sp.rsp.cll$sd.entropy
sp.rsp.cll$cv.modindx <- sp.rsp.cll$modindx / sp.rsp.cll$sd.modindx
sp.rsp.cll$cv.skew <- sp.rsp.cll$skew / sp.rsp.cll$sd.skew
sp.rsp.cll$cv.dfrange <- sp.rsp.cll$dfrange / sp.rsp.cll$sd.dfrange
sp.rsp.cll$cv.gap.duration <- sp.rsp.cll$gap.duration / sp.rsp.cll$sd.gap.duration
# add metadata to call level parameters
sp.rsp.cll <- merge(sp.rsp.cll, sp.rsp[!duplicated(sp.rsp[, c("sound.files", "call")]), c("sound.files", "call", "indiv", "sex", "age", "age.class")], by = c("sound.files", "call"))
Calculate PCA for representing call consistency:
## run PCA on complete cases of CV variables
pca <- prcomp(sp.rsp.cll[complete.cases(sp.rsp.cll[,c("cv.duration", "cv.meanfreq", "cv.entropy", "cv.dfrange", "cv.skew", "cv.gap.duration")]), c("cv.duration", "cv.meanfreq", "cv.entropy", "cv.dfrange", "cv.skew", "cv.gap.duration")], scale. = TRUE)
# check variable contribution to PCs
pca.contr <- data.frame(PC = paste0("PC", 1:6), t(summary(pca)$importance))
ggplot(pca.contr, aes(x = PC, y = Cumulative.Proportion)) +
geom_col(fill = cols[6]) + ylab("Cumulative contribution") +
geom_text(aes(x=PC,y=Cumulative.Proportion,label= round(Cumulative.Proportion, 2)),vjust=0)
Check variable contribution to PCs and save PC1:
# Add first PC to call level parameters
sp.rsp.cll$PC1 <- NA
sp.rsp.cll$PC1[complete.cases(sp.rsp.cll[,c("cv.duration", "cv.meanfreq", "cv.entropy", "cv.dfrange", "cv.skew", "cv.gap.duration")])] <- pca$x[, "PC1"]
# check PCA loadings
loads <- data.frame(PC = rep(paste0("PC", 1:6), each = 6), var = rep(c("CV_duration", "CV_mean_freq", "CV_entropy", "CV_freq_range", "CV_skewness", "CV_gap_duration"), 6), score = c(pca$rotation))
ggplot(loads, aes(x = var, y = score)) +
geom_col(fill = cols[8]) +
facet_wrap( ~ PC, nrow = 3) +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
Rename response call parameters:
names(sp.rsp.cll)[names(sp.rsp.cll) %in% c("meanfreq", "entropy", "skew", "num.elms", "elm.duration", "song.rate", "PC1")] <- rsp.prms
Correlation on selected acoustic parameters for response calls:
#correlation among variables
cm <- cor(sp.rsp.cll[, rsp.prms], use = "pairwise.complete.obs")
corrplot.mixed(corr = cm, order = "hclust", tl.pos = "lt", upper = "ellipse")
Correlation on selected acoustic parameters for inquiry calls:
#correlation among variables
cm <- cor(sp.inq[, inq.prms], use = "pairwise.complete.obs")
corrplot.mixed(corr = cm, order = "hclust", tl.pos = "lt", upper = "ellipse")
Youngest age at which RESPONSE calls were produced:
round(mean(tapply(sp.rsp.cll$age[sp.rsp.cll$sex == "M" & sp.rsp.cll$age.class == "juveniles"], sp.rsp.cll$indiv[sp.rsp.cll$sex == "M" & sp.rsp.cll$age.class == "juveniles"], min, na.rm = TRUE), na.rm = TRUE), 2)
## [1] 25.08
round(mean(tapply(sp.rsp.cll$age[sp.rsp.cll$sex == "F" & sp.rsp.cll$age.class == "juveniles"], sp.rsp.cll$indiv[sp.rsp.cll$sex == "F" & sp.rsp.cll$age.class == "juveniles"], min, na.rm = TRUE), na.rm = TRUE), 2)
## [1] 26.77
round(min(sp.rsp.cll$age[sp.rsp.cll$sex == "M" & sp.rsp.cll$age.class == "juveniles"]), 2)
## [1] 6.62
round(min(sp.rsp.cll$age[sp.rsp.cll$sex == "F" & sp.rsp.cll$age.class == "juveniles"]), 2)
## [1] 4.29
Youngest age at which INQUIRY calls were produced (only for males as females were not aged - no forearm information):
m.age.inq.male <- tapply(sp.inq$age[sp.inq$sex == "M" & sp.inq$age.class == "juveniles"], sp.inq$indiv[sp.inq$sex == "M" & sp.inq$age.class == "juveniles"], min, na.rm = TRUE)
round(mean(m.age.inq.male[!is.infinite(m.age.inq.male)], na.rm = TRUE), 2)
## [1] 63.16
m.age.inq.male <- tapply(sp.inq$age[sp.inq$sex == "M" & sp.inq$age.class == "juveniles"], sp.inq$indiv[sp.inq$sex == "M" & sp.inq$age.class == "juveniles"], min, na.rm = TRUE)
round(min(m.age.inq.male[!is.infinite(m.age.inq.male)], na.rm = TRUE), 2)
## [1] 40.75
Bayesian MCMC generalized linear models on response call acoustic parameters with individual as a random effect and age or sex as predictors. 3 models compared for each acoustic parameter:
A loop is used to run these 3 models for each selected acoustic parameters. Parameters are scaled (i.e. z-transformed) to obtained standardized effect sizes (within the loop). Diagnostic plots for MCMC model performance are shown at the end of this report:
# Selecting between with and without interaction
mods.rsp.ontog <- pblapply(rsp.prms, function(x){
# choose complete cases and data for juveniles
X <- sp.rsp.cll[complete.cases(sp.rsp.cll[ , x]) & sp.rsp.cll$age.class == "juveniles", ]
# scale and center
X[, x] <- scale(X[, x])
m.age.sex <- MCMCglmm(formula(paste(x, " ~ age + sex + age:sex")), random = ~ indiv, data = X, verbose = FALSE, nitt = itrns)
m.age <- MCMCglmm(formula(paste(x, " ~ age")), random = ~ indiv, data = X, verbose = FALSE, nitt = itrns)
m.null <- MCMCglmm(formula(paste(x, " ~ 1")), random = ~ indiv, data = X, verbose = FALSE, nitt = itrns)
ms <- model.sel(m.age.sex, m.age, m.null, rank="DIC")
mds <- list(model.tab = ms, m.age.sex = m.age.sex, m.age = m.age, m.null = m.null)
return(mds)
})
names(mods.rsp.ontog) <- rsp.prms
mods.rsp.ontog
Model selection results (only models that improved model fit compared to the null models are evaluated):
[1] “mean_freq”X.Intercept. | age | sex | age.sex | df | logLik | DIC | delta | weight | |
---|---|---|---|---|---|---|---|---|---|
m.age.sex | -0.127 | 0.006 |
|
|
6 | -644 | 1311 | 0.00 | 0.573 |
m.null | 0.002 | NA | NA | NA | 3 | -646 | 1312 | 1.27 | 0.304 |
m.age | -0.036 | 0.001 | NA | NA | 4 | -646 | 1314 | 3.09 | 0.122 |
X.Intercept. | age | sex | age.sex | df | logLik | DIC | delta | weight | |
---|---|---|---|---|---|---|---|---|---|
m.age.sex | -0.690 | 0.023 |
|
|
6 | -747 | 1510 | 0.0 | 9.99e-01 |
m.age | -0.418 | 0.013 | NA | NA | 4 | -756 | 1524 | 14.2 | 8.11e-04 |
m.null | 0.029 | NA | NA | NA | 3 | -766 | 1546 | 35.7 | 1.74e-08 |
X.Intercept. | age | sex | age.sex | df | logLik | DIC | delta | weight | |
---|---|---|---|---|---|---|---|---|---|
m.age.sex | 0.737 | -0.020 |
|
|
6 | -722 | 1465 | 0.00 | 8.61e-01 |
m.age | 0.471 | -0.013 | NA | NA | 4 | -725 | 1469 | 3.64 | 1.39e-01 |
m.null | 0.004 | NA | NA | NA | 3 | -737 | 1493 | 27.79 | 7.96e-07 |
X.Intercept. | age | sex | age.sex | df | logLik | DIC | delta | weight | |
---|---|---|---|---|---|---|---|---|---|
m.age | -0.523 | 0.013 | NA | NA | 4 | -641 | 1303 | 0.000 | 5.94e-01 |
m.age.sex | -0.659 | 0.010 |
|
|
6 | -641 | 1304 | 0.763 | 4.06e-01 |
m.null | -0.081 | NA | NA | NA | 3 | -653 | 1327 | 23.495 | 4.70e-06 |
X.Intercept. | age | sex | age.sex | df | logLik | DIC | delta | weight | |
---|---|---|---|---|---|---|---|---|---|
m.age.sex | 0.664 | -0.026 |
|
|
6 | -650 | 1320 | 0.00 | 6.37e-01 |
m.age | 0.762 | -0.023 | NA | NA | 4 | -651 | 1321 | 1.13 | 3.63e-01 |
m.null | -0.027 | NA | NA | NA | 3 | -685 | 1391 | 70.55 | 3.05e-16 |
X.Intercept. | age | sex | age.sex | df | logLik | DIC | delta | weight | |
---|---|---|---|---|---|---|---|---|---|
m.age.sex | -0.583 | 0.013 |
|
|
6 | -656 | 1332 | 0.00 | 6.14e-01 |
m.age | -0.697 | 0.019 | NA | NA | 4 | -657 | 1333 | 0.93 | 3.86e-01 |
m.null | -0.033 | NA | NA | NA | 3 | -683 | 1385 | 52.37 | 2.61e-12 |
X.Intercept. | age | sex | age.sex | df | logLik | DIC | delta | weight | |
---|---|---|---|---|---|---|---|---|---|
m.age.sex | -0.907 | 0.019 |
|
|
6 | -580 | 1181 | 0.00 | 8.99e-01 |
m.age | -0.422 | 0.011 | NA | NA | 4 | -583 | 1186 | 4.37 | 1.01e-01 |
m.null | -0.032 | NA | NA | NA | 3 | -592 | 1202 | 21.00 | 2.48e-05 |
Select the best model for each parameter and extract effect sizes:
# select best models based on BIC
best.mods.rsp.ontog <- pblapply(mods.rsp.ontog, function(X){
# if best model was at least 2 BIC units higher than null
if (X[[1]]["m.null", "delta"] > 2) return(X[[rownames(X[[1]])[1]]]) else
return(NA) # else if models were as good as null model return NA
})
# rename
names(best.mods.rsp.ontog) <- names(mods.rsp.ontog)
# remove the NA ones (the ones in which the null model was the best)
best.mods.rsp.ontog <- best.mods.rsp.ontog[sapply(best.mods.rsp.ontog, class) == "MCMCglmm"]
# extract fixed effect size
out <- lapply(1:length(best.mods.rsp.ontog), function(x){
# fixed effects
fe <- summary(best.mods.rsp.ontog[[x]])$solutions
# Confidence intervals
ci <- HPDinterval(best.mods.rsp.ontog[[x]]$Sol)
# sample sizes
obs <- sp.rsp.cll[complete.cases(sp.rsp.cll[ , names(best.mods.rsp.ontog)[x]]) & sp.rsp.cll$age.class == "juveniles", ]
# put results together in a data frame
res <- data.frame(response = names(best.mods.rsp.ontog)[x], predictor = rownames(ci)[2:nrow(ci)], effect_size = fe[-1, "post.mean"], CI_2.5 = ci[2:nrow(ci), 1], CI_97.5 = ci[2:nrow(ci), 2], pMCMC = fe[-1, "pMCMC"], stringsAsFactors = FALSE, n.obs = nrow(obs), n.indv = length(unique(obs$indiv)))
return(res)
})
# put effect sizes in a single data frame
es.rsp.ontog <- do.call(rbind, out)
rownames(es.rsp.ontog) <- 1:nrow(es.rsp.ontog)
response | predictor | effect_size | CI_2.5 | CI_97.5 | pMCMC | n.obs | n.indv | |
---|---|---|---|---|---|---|---|---|
1 | entropy | age | 0.023 | 0.015 | 0.031 | 2e-04 | 548 | 21 |
3 | entropy | age:sexM | -0.016 | -0.027 | -0.006 | 0.0021 | 548 | 21 |
4 | skewness | age | -0.020 | -0.028 | -0.012 | 2e-04 | 548 | 21 |
6 | skewness | age:sexM | 0.011 | 0.000 | 0.021 | 0.0351 | 548 | 21 |
7 | num_syllables | age | 0.013 | 0.008 | 0.017 | 2e-04 | 548 | 21 |
8 | syll_duration | age | -0.026 | -0.034 | -0.020 | 2e-04 | 548 | 21 |
10 | syll_duration | age:sexM | 0.006 | -0.003 | 0.015 | 0.1667 | 548 | 21 |
11 | syll_rate | age | 0.013 | 0.006 | 0.021 | 7e-04 | 541 | 21 |
13 | syll_rate | age:sexM | 0.010 | 0.001 | 0.020 | 0.0365 | 541 | 21 |
14 | PC1_consistency | age | 0.019 | 0.011 | 0.027 | 2e-04 | 475 | 21 |
16 | PC1_consistency | age:sexM | -0.013 | -0.024 | -0.002 | 0.0218 | 475 | 21 |
Plot effect sizes of age by response variable:
ggplot(es.rsp.ontog[es.rsp.ontog$predictor == "age", ], aes(x = response, y = effect_size)) +
geom_hline(yintercept = 0, lty = 2) +
geom_point(col = cols[7], size = 2.5) +
geom_errorbar(aes(ymin=CI_2.5, ymax=CI_97.5), width=.05, col = cols[7], size = 1.2) +
coord_flip()
Plot effect sizes of age-sex interaction by response variable (showing the difference in slope of males compare to females):
ggplot(es.rsp.ontog[es.rsp.ontog$predictor == "age:sexM", ], aes(x = response, y = effect_size)) +
geom_hline(yintercept = 0, lty = 2) +
geom_point(col = cols[7], size = 2.5) +
geom_errorbar(aes(ymin=CI_2.5, ymax=CI_97.5), width=.05, col = cols[7], size = 1.2) +
coord_flip()
Scatter plots with original units:
# subset response call parameters for juveniles
sp.rsp.cll.j <- melt(sp.rsp.cll[sp.rsp.cll$age.class == "juveniles", c(rsp.prms, "sex", "age")], id.vars=c("sex", "age"))
# convert to ms
sp.rsp.cll.j$value[sp.rsp.cll.j$variable == "syll_duration"] <- sp.rsp.cll.j$value[sp.rsp.cll.j$variable == "syll_duration"] * 1000
# Plot relationship from above model
ggplot(sp.rsp.cll.j, aes(x = age, y = value, color = sex)) +
geom_point(size = 2) +
labs(x = "Age (days)", y = NULL) +
geom_smooth(method = "lm") +
scale_colour_manual(name="Sex", values = cols[c(2, 7)]) +
facet_wrap(~ variable, ncol = 2, scales = "free_y", strip.position = "left") +
theme_classic(base_size = 16) +
theme(strip.background = element_blank(), strip.placement = "outside")
3 age classes
Longitudinal data (age.class == ‘juveniles’) was split in 4 age classes and the 2 extremes were used (pups and juveniles). Ages above 70 days old were excluded due to small sample sizes.
The following code organizes the data accordingly:
# remove juveniles order than 70 days
sub.sp.rsp.cll <- sp.rsp.cll[sp.rsp.cll$age < 70 | sp.rsp.cll$age.class == "adults", ]
# split juveniles in 4 age classes
sub.sp.rsp.cll$age.class[sub.sp.rsp.cll$age.class == "juveniles"] <- cut(sub.sp.rsp.cll$age[sub.sp.rsp.cll$age.class == "juveniles"], breaks = 4)
# remove intermediate classes
sub.sp.rsp.cll <- sub.sp.rsp.cll[sub.sp.rsp.cll$age.class %in% c(1, 4, "adults"), ]
# convert classes to labels
sub.sp.rsp.cll$age.class <- ifelse(sub.sp.rsp.cll$age.class == 1, "pups", ifelse(sub.sp.rsp.cll$age.class == 4, "juveniles", "adults"))
Again, Bayesian MCMC generalized linear models on response call acoustic parameters with individual as a random effect and age class as predictor. 2 models compared for each acoustic parameter:
age class as single predictor: \[Acoustic\ parameter \sim age.class + (1 | indiv)\]
Null model with no predictor: \[Acoustic\ parameter \sim 1 + (1 | indiv)\]
A loop is used to run these 2 models for each selected acoustic parameters. Parameters are scaled (i.e. z-transformed) to obtained standardized effect sizes (within the loop). Diagnostic plots for MCMC model performance are shown at the end of this report:
\[Acoustic\ parameter \sim age.class + age + (1 | indiv)\]
# adults vs juveniles and pups
mds.age.clss.rsp <- pblapply(rsp.prms, function(x){
# choose complete cases and data for juveniles
X <- sub.sp.rsp.cll[complete.cases(sub.sp.rsp.cll[ , x]), ]
# scale and center
X[, x] <- scale(X[, x])
# model age
m.age <- MCMCglmm(formula(paste(x, " ~ age.class")), random = ~ indiv, data = X, verbose = FALSE, nitt = itrns)
# null model
m.null <- MCMCglmm(formula(paste(x, " ~ 1")), random = ~ indiv, data = X, verbose = FALSE, nitt = itrns)
# put together models
ms <- model.sel(m.age, m.null, rank = "DIC")
# save models in a list
res <- list(model.tab = ms, m.age = m.age, m.null = m.null)
return(res)
})
# add parameter name to models
names(mds.age.clss.rsp) <- rsp.prms
Model selection results (only models that improved model fit compared to the null models are evaluated):
[1] “mean_freq”X.Intercept. | age.class | df | logLik | DIC | delta | weight | |
---|---|---|---|---|---|---|---|
m.age | 0.007 |
|
5 | -270 | 569 | 0.0 | 1.00e+00 |
m.null | -0.151 | NA | 3 | -287 | 600 | 31.9 | 1.19e-07 |
X.Intercept. | age.class | df | logLik | DIC | delta | weight | |
---|---|---|---|---|---|---|---|
m.age | 0.269 |
|
5 | -348 | 720 | 0.0 | 0.99575 |
m.null | 0.046 | NA | 3 | -354 | 731 | 10.9 | 0.00425 |
X.Intercept. | age.class | df | logLik | DIC | delta | weight | |
---|---|---|---|---|---|---|---|
m.age | 0.044 |
|
5 | -368 | 757 | 0.0 | 1.00e+00 |
m.null | 0.025 | NA | 3 | -383 | 784 | 27.2 | 1.24e-06 |
X.Intercept. | age.class | df | logLik | DIC | delta | weight | |
---|---|---|---|---|---|---|---|
m.age | 0.510 |
|
5 | -328 | 678 | 0.00 | 0.793 |
m.null | -0.012 | NA | 3 | -328 | 681 | 2.69 | 0.207 |
X.Intercept. | age.class | df | logLik | DIC | delta | weight | |
---|---|---|---|---|---|---|---|
m.age | 0.076 |
|
5 | -288 | 600 | 0.0 | 1.00e+00 |
m.null | -0.075 | NA | 3 | -309 | 643 | 42.8 | 4.98e-10 |
X.Intercept. | age.class | df | logLik | DIC | delta | weight | |
---|---|---|---|---|---|---|---|
m.age | 0.794 |
|
5 | -240 | 504 | 0.0 | 0.9986 |
m.null | 0.063 | NA | 3 | -245 | 518 | 13.1 | 0.0014 |
X.Intercept. | age.class | df | logLik | DIC | delta | weight | |
---|---|---|---|---|---|---|---|
m.age | 0.408 |
|
5 | -308 | 639 | 0.00 | 0.99095 |
m.null | -0.035 | NA | 3 | -313 | 648 | 9.39 | 0.00905 |
Select the best model for each parameter and extract effect sizes:
# select best models based on BIC
best.mods.rsp.age.clss <- pblapply(mds.age.clss.rsp, function(X){
# if best model was at least 2 BIC units higher than null
if (X[[1]]["m.null", "delta"] > 2) return(X[[rownames(X[[1]])[1]]]) else
return(NA) # else if models were as good as null model return NA
})
# rename
names(best.mods.rsp.age.clss) <- rsp.prms
# remove the NA ones (the ones in which the null model was the best)
best.mods.rsp.age.clss <- best.mods.rsp.age.clss[sapply(best.mods.rsp.age.clss, class) == "MCMCglmm"]
# extract fixed effect size
out <- lapply(1:length(best.mods.rsp.age.clss), function(x){
# fixed effects
fe <- summary(best.mods.rsp.age.clss[[x]])$solutions
# Confidence intervals
ci <- HPDinterval(best.mods.rsp.age.clss[[x]]$Sol)
# sample sizes
obs <- sub.sp.rsp.cll[complete.cases(sub.sp.rsp.cll[ , names(best.mods.rsp.age.clss)[x]]), ]
# put results together in a data frame
res <- data.frame(response = names(best.mods.rsp.age.clss)[x], predictor = rownames(ci)[2:nrow(ci)], effect_size = fe[-1, "post.mean"], CI_2.5 = ci[2:nrow(ci), 1], CI_97.5 = ci[2:nrow(ci), 2], pMCMC = fe[-1, "pMCMC"], stringsAsFactors = FALSE, n.obs = nrow(obs), n.indv = length(unique(obs$indiv)))
return(res)
})
# put effect sizes in a single data frame
es.rsp.age.clss <- do.call(rbind, out)
rownames(es.rsp.age.clss) <- 1:nrow(es.rsp.age.clss)
# rename effects
es.rsp.age.clss$predictor <- gsub("age.classpups", "Pups_vs_adults", es.rsp.age.clss$predictor)
es.rsp.age.clss$predictor <- gsub("age.classjuveniles", "Juveniles_vs_adults", es.rsp.age.clss$predictor)
response | predictor | effect_size | CI_2.5 | CI_97.5 | pMCMC | n.obs | n.indv | |
---|---|---|---|---|---|---|---|---|
1 | mean_freq | Juveniles_vs_adults | 0.251 | -0.457 | 1.024 | 0.5144 | 286 | 29 |
2 | mean_freq | Pups_vs_adults | -0.865 | -1.610 | -0.104 | 0.0291 | 286 | 29 |
3 | entropy | Juveniles_vs_adults | 0.061 | -0.459 | 0.617 | 0.8337 | 285 | 29 |
4 | entropy | Pups_vs_adults | -0.856 | -1.385 | -0.296 | 0.0025 | 285 | 29 |
5 | skewness | Juveniles_vs_adults | -0.599 | -1.111 | -0.070 | 0.0249 | 285 | 29 |
6 | skewness | Pups_vs_adults | 0.531 | 0.024 | 1.089 | 0.0449 | 285 | 29 |
7 | num_syllables | Juveniles_vs_adults | -0.650 | -1.082 | -0.223 | 0.0032 | 286 | 29 |
8 | num_syllables | Pups_vs_adults | -1.095 | -1.509 | -0.676 | 2e-04 | 286 | 29 |
9 | syll_duration | Juveniles_vs_adults | -0.977 | -1.430 | -0.525 | 2e-04 | 286 | 29 |
10 | syll_duration | Pups_vs_adults | 0.505 | 0.048 | 0.969 | 0.0309 | 286 | 29 |
11 | syll_rate | Juveniles_vs_adults | -0.903 | -1.314 | -0.481 | 2e-04 | 281 | 29 |
12 | syll_rate | Pups_vs_adults | -1.665 | -2.080 | -1.260 | 2e-04 | 281 | 29 |
13 | PC1_consistency | Juveniles_vs_adults | -0.447 | -0.988 | 0.127 | 0.1172 | 249 | 29 |
14 | PC1_consistency | Pups_vs_adults | -1.061 | -1.603 | -0.500 | 4e-04 | 249 | 29 |
Juveniles and pups vs adults (graphs represent the difference with regard to the adult values):
ggplot(es.rsp.age.clss[es.rsp.age.clss$predictor %in% c("Juveniles_vs_adults", "Pups_vs_adults"), ], aes(x = response, y = effect_size, col = predictor)) +
geom_point(size = 2.5, position = pos) +
geom_hline(yintercept = 0, lty = 2) +
geom_errorbar(aes(ymin=CI_2.5, ymax=CI_97.5), width=.05, size = 1.2, position = pos) +
scale_color_manual(values= cols[c(2, 7)]) +
coord_flip()
Mean and violin plots for response variables: (P ≤ 0.05 = **; P ≤ 0.01 = ***; P ≤ 0.001 ****)
# put data in long format for ggplot
rsh.sp.rsp.cll <- melt(sub.sp.rsp.cll[, c(unique(es.rsp.age.clss$response), "age.class")], id.vars=c("age.class"))
#remove not signf
rsh.sp.rsp.cll$age.class <- factor(rsh.sp.rsp.cll$age.class, levels = c("pups", "juveniles", "adults"))
# out2 <- rsh.sp.rsp.cll
# out2 <- out2[out2$variable %in% es.rsp.age.clss$response, ]
# set geom_line gpplot values for adding significance brakets and asterisks
es <- es.rsp.age.clss[es.rsp.age.clss$pMCMC < 0.05, ]
es$label <- ifelse(es$pMCMC < 0.001, "***", ifelse(es$pMCMC < 0.01, "**", "*"))
names(es)[1] <- "variable"
es$x <- ifelse(es$predictor == "Pups_vs_adults", 2, 2.5)
es$sd <- sapply(es$variable, function(x) sd(rsh.sp.rsp.cll$value[rsh.sp.rsp.cll$variable == x], na.rm = TRUE))
es$max <- sapply(es$variable, function(x) max(rsh.sp.rsp.cll$value[rsh.sp.rsp.cll$variable == x], na.rm = TRUE))
# position of black asteriks if vs pups add sd*0.25 to max, if vs juvs 0.75
es$y <- ifelse(es$predictor != "Pups_vs_adults", es$max + (es$sd * 0.45), es$max + (es$sd * 1.3))
# line position in y axis
es$y2 <- es$y - es$sd * 0.08
# length of vertical lines in brakets
es$y3 <- es$y2 - sapply(es$variable, function(x) dif.range(rsh.sp.rsp.cll$value[rsh.sp.rsp.cll$variable == x]) * 0.025)
# y position of white asteriks
es$y4 <- es$y + es$sd * 0.5
# start and end of lines
es$x2 <- ifelse(es$x == 2, 1, 2)
es$x3 <- 3
# create segment data for each comparison
data.segm <- lapply(1:nrow(es), function(x)
data.frame(x = c(es$x2[x], es$x2[x], es$x3[x]), y = rep(es$y2[x], 3), xend = c(es$x3[x], es$x2[x], es$x3[x]), yend = c(es$y2[x],es$y3[x], es$y3[x]), variable = es$variable[x]))
# get mean by variable and age class
mean.rsp.cll <- aggregate(value ~ variable + age.class, data= rsh.sp.rsp.cll, FUN = mean, na.rm = TRUE)
# create plot without significance brakets
p <- ggplot(rsh.sp.rsp.cll, aes(age.class, value)) +
geom_point(size = 2.5, color = cols[1], data = mean.rsp.cll) +
geom_text(col = "white", size = 9, data = es, mapping = aes(x = x, y = y4, label = label)) +
geom_text(col = "black", size = 9, data = es, mapping = aes(x = x, y = y, label = label)) +
geom_violin(fill = cols[8]) +
facet_wrap(~ variable, scales = "free_y", ncol = 2, strip.position = "left") +
theme(strip.background = element_blank(), strip.placement = "outside") +
labs(y = NULL, x = "Age class")
# add signfincance brakets
for(i in 1:length(data.segm))
p <- p + geom_segment(data=data.segm[[i]], lineend = "square", aes(x=x,y=y,yend=yend,xend=xend),inherit.aes=FALSE)
# plot
p
Range of ages for juveniles between 1 and 70 days old split in 4 periods (after 70 days the sample size drops)
Adults included as a single category
Same number of individuals per period (6, those with highest n) and same number of calls per individual (6 calls). This simplify the comparison of classification accuracy across segments
Calls within individual were randomly sub-sampled and the process was iterated 100 times
Mean and SD values are calculated from the 100 replicates
Random Forest accuracy was measured with cross-validation (out-of-bag error) in which each case is classified with a model that did not include that case for training
# rename age classes splitting in 4 breaks
sp.rsp.cll$age.class[which(sp.rsp.cll$age < 70)] <- as.character(cut(sp.rsp.cll$age[which(sp.rsp.cll$age < 70)], breaks = 4))
# remove other juveniles
sp.rsp.cll <- sp.rsp.cll[sp.rsp.cll$age.class != "juveniles", ]
# Set it as factor
sp.rsp.cll$age.class <- as.factor(sp.rsp.cll$age.class)
# ALL PARAMETERS (AP)
indv.prms <- c("syll_duration","mean_freq", "entropy", "modindx", "dfrange", "skewness", "num_syllables", "song.duration", "syll_rate", "gap.duration", "sd.gap.duration", "sd.elm.duration", grep("kurt.cc|mean.d|var.d1|median.c|min.cc|max.cc|skew.cc", names(sp.rsp.cll), value = TRUE))
# preprocess to scale and remove colinear parameters
pp.indv <- preProcess(sp.rsp.cll[, indv.prms[-1]], c("center", "scale", "corr"), cutoff = 0.7)
# save processed parameters
sp.rsp.cll.indv <- predict(pp.indv, sp.rsp.cll)
# remove not used parameters
sp.rsp.cll.indv <- sp.rsp.cll.indv[ , names(sp.rsp.cll.indv) %in% c("indiv", "age.class", indv.prms)]
# split and prepare data by age class
dats <- lapply(unique(sp.rsp.cll.indv$age.class), function(x){
# isolate data for that age class
X <- sp.rsp.cll.indv[sp.rsp.cll.indv$age.class == x, ]
# get only complete cases
X <- X[complete.cases(X), ]
# select 6 with highest sample size
tab <- table(X$indiv)
X <- X[X$indiv %in% names(tab)[order(-tab)][1:6], ]
# add age class column
X$age.class <- x
return(X)
})
# minimum sample size per individual
min.n <- min(sapply(dats, function(x) table(x$indiv)))
## run RF 100 times
class.res <- replicate(100, {out <- lapply(dats, function(X)
{
# selec min.n samples per individual
X <- X[rownames(X) %in% unlist(tapply(rownames(X), X$indiv, sample, min.n)), ]
# save age class for output data frame
age.class <- X$age.class[1]
# remove age class column
X$age.class <- NULL
# run random forest
rf <- ranger(indiv ~ ., data = X, num.trees = 10000, importance = "impurity")
rf.error <- rf$prediction.error
return(data.frame(age.class, sample.size = nrow(X), individuals = length(unique(X$indiv)), method = "RandomForest", cv.error = rf.error))
})
do.call(rbind, out)
}, simplify = FALSE)
class.res <- do.call(rbind, class.res)
class.agg <- aggregate(cv.error ~ age.class + sample.size + individuals, data = class.res, mean)
class.agg$sd <- aggregate(cv.error ~ age.class + sample.size + individuals, data = class.res, sd)$cv.error
class.agg$age.class <- factor(class.agg$age.class, levels = unique(class.agg$age.class)[c(3, 1, 2, 4, 5)])
ggplot(class.agg, aes(x = age.class, y = cv.error)) +
geom_point(size = 2.5, position = position_dodge(width = 1), col = cols[7]) +
geom_errorbar(aes(ymin = cv.error - sd, ymax = cv.error + sd), width=.05, size = 1.2, position = position_dodge(width = 1), col = cols[7]) +
geom_hline(yintercept = 5/6, lty = 3) +
annotate("text", x = 3, y = 0.8, label = "Error expected by chance") +
labs(x = "Age class (days)", y = "Classification error (CV)")
class.agg
age.class | sample.size | individuals | cv.error |
---|---|---|---|
(19.5,34.7] | 36 | 6 | 0.354 |
(34.7,49.9] | 36 | 6 | 0.515 |
(4.23,19.5] | 36 | 6 | 0.212 |
(49.9,65.2] | 36 | 6 | 0.378 |
adults | 36 | 6 | 0.401 |
Range of ages for juveniles between 1 and 70 days old split in 4 periods (after 70 days the sample size drops)
Adults included as a single category
Same number of individuals per period (6, those with highest n) and same number of calls per individual (6 calls). This simplify the comparison of classification accuracy across segments
Calls within individual were randomly sub-sampled and the process was iterated 100 times
Mean and SD values are calculated from the 100 replicates
Random Forest accuracy was measured with cross-validation (out-of-bag error) in which each case is classified with a model that did not include that case for training
# save processed parameters
sp.rsp.cll.indv <- predict(pp.indv, sp.rsp.cll)
# remove not used parameters
sp.rsp.cll.indv <- sp.rsp.cll.indv[ , names(sp.rsp.cll.indv) %in% c("sex", "age.class", indv.prms)]
# split and prepare data by age class
dats <- lapply(unique(sp.rsp.cll.indv$age.class), function(x){
# isolate data for that age class
X <- sp.rsp.cll.indv[sp.rsp.cll.indv$age.class == x, ]
# get only complete cases
X <- X[complete.cases(X), ]
# select 6 with highest sample size
tab <- table(X$sex)
X <- X[X$sex %in% names(tab)[order(-tab)][1:6], ]
# add age class column
X$age.class <- x
return(X)
})
# minimum sample size per individual
min.n <- min(sapply(dats, function(x) table(x$sex)))
## run RF 100 times
class.res <- replicate(3, {out <- lapply(dats, function(X)
{
# selec min.n samples per sex
X <- X[rownames(X) %in% unlist(tapply(rownames(X), X$sex, sample, min.n)), ]
# save age class for output data frame
age.class <- X$age.class[1]
# remove age class column
X$age.class <- NULL
# run random forest
rf <- ranger(sex ~ ., data = X, num.trees = 10000, importance = "impurity")
rf.error <- rf$prediction.error
return(data.frame(age.class, sample.size = nrow(X), sex = length(unique(X$sex)), method = "RandomForest", cv.error = rf.error))
})
do.call(rbind, out)
}, simplify = FALSE)
class.res <- do.call(rbind, class.res)
class.agg.sex <- aggregate(cv.error ~ age.class + sample.size + sex, data = class.res, mean)
class.agg.sex$sd <- aggregate(cv.error ~ age.class + sample.size + sex, data = class.res, sd)$cv.error
class.agg.sex$age.class <- factor(class.agg.sex$age.class, levels = unique(class.agg$age.class)[c(3, 1, 2, 4, 5)])
ggplot(class.agg.sex, aes(x = age.class, y = cv.error)) +
geom_point(size = 2.5, position = position_dodge(width = 1), col = cols[7]) +
geom_errorbar(aes(ymin = cv.error - sd, ymax = cv.error + sd), width=.05, size = 1.2, position = position_dodge(width = 1), col = cols[7]) +
geom_hline(yintercept = 0.5, lty = 3) +
annotate("text", x = 3, y = 0.46, label = "Error expected by chance") +
labs(x = "Age class (days)", y = "Classification error (CV)")
class.agg.sex
age.class | sample.size | individuals | cv.error |
---|---|---|---|
(19.5,34.7] | 62 | 0 | 0.237 |
(34.7,49.9] | 62 | 0 | 0.167 |
(4.23,19.5] | 62 | 0 | 0.161 |
(49.9,65.2] | 62 | 0 | 0.188 |
adults | 62 | 0 | 0.204 |
Bayesian MCMC generalized linear models on response call acoustic parameters with individual as a random effect and age class or sex as predictors. 3 models compared for each acoustic parameter:
A loop is used to run these 3 models for each selected acoustic parameters. Parameters are scaled (i.e. z-transformed) to obtained standardized effect sizes (within the loop). Diagnostic plots for MCMC model performance are shown at the end of this report:
Select the best model for each parameter and extract effect sizes:
# get best model
best.mods.inq.age.class <- pblapply(mods.inq.age.class, function(X){
# if best model was at least 2 BIC units higher than null
if (X[[1]]["m.null", "delta"] > 2) return(X[[rownames(X[[1]])[1]]]) else
return(NA)
})
# names list objects
names(best.mods.inq.age.class) <- names(mods.inq.age.class)
# remove NAs
best.mods.inq.age.class <- best.mods.inq.age.class[sapply(best.mods.inq.age.class, class) == "MCMCglmm"]
# fixed effect size
out <- lapply(1:length(best.mods.inq.age.class), function(x){
# fixed effects
fe <- summary(best.mods.inq.age.class[[x]])$solutions
# confidence intervals
ci <- HPDinterval(best.mods.inq.age.class[[x]]$Sol)
# put together results
res <- data.frame(response = names(best.mods.inq.age.class)[x], predictor = rownames(ci)[2:nrow(ci)], effect_size = fe[-1, "post.mean"], CI_2.5 = ci[2:nrow(ci), 1], CI_97.5 = ci[2:nrow(ci), 2], pMCMC = fe[-1, "pMCMC"], stringsAsFactors = FALSE)
return(res)
})
# put all results together
es.inq.age.class <- do.call(rbind, out)
# rename rows
rownames(es.inq.age.class) <- 1:nrow(es.inq.age.class)
[1] “call_duration”
X.Intercept. | age.class | sex | age.class.sex | df | logLik | DIC | delta | weight | |
---|---|---|---|---|---|---|---|---|---|
m.age | -0.170 |
|
NA | NA | 4 | -289 | 603 | 0.000 | 0.5373 |
m.age.sex | -0.225 |
|
|
|
5 | -289 | 603 | 0.353 | 0.4504 |
m.null | -0.015 | NA | NA | NA | 3 | -293 | 611 | 7.560 | 0.0123 |
X.Intercept. | age.class | sex | age.class.sex | df | logLik | DIC | delta | weight | |
---|---|---|---|---|---|---|---|---|---|
m.age.sex | -0.194 |
|
|
|
5 | -283 | 591 | 0.000 | 0.52596 |
m.age | -0.012 |
|
NA | NA | 4 | -283 | 591 | 0.219 | 0.47149 |
m.null | 0.173 | NA | NA | NA | 3 | -288 | 602 | 10.663 | 0.00254 |
X.Intercept. | age.class | sex | age.class.sex | df | logLik | DIC | delta | weight | |
---|---|---|---|---|---|---|---|---|---|
m.null | 0.217 | NA | NA | NA | 3 | -437 | 892 | 0.000 | 0.416 |
m.age.sex | 0.060 |
|
|
|
5 | -437 | 892 | 0.626 | 0.304 |
m.age | 0.207 |
|
NA | NA | 4 | -437 | 892 | 0.790 | 0.280 |
X.Intercept. | age.class | sex | age.class.sex | df | logLik | DIC | delta | weight | |
---|---|---|---|---|---|---|---|---|---|
m.age | -0.020 |
|
NA | NA | 4 | -402 | 826 | 0.000 | 0.544 |
m.age.sex | 0.002 |
|
|
|
5 | -402 | 827 | 0.586 | 0.405 |
m.null | 0.077 | NA | NA | NA | 3 | -405 | 831 | 4.732 | 0.051 |
X.Intercept. | age.class | sex | age.class.sex | df | logLik | DIC | delta | weight | |
---|---|---|---|---|---|---|---|---|---|
m.null | -0.064 | NA | NA | NA | 3 | -392 | 806 | 0.000 | 0.482 |
m.age | 0.000 |
|
NA | NA | 4 | -393 | 807 | 0.921 | 0.304 |
m.age.sex | -0.021 |
|
|
|
5 | -393 | 808 | 1.630 | 0.213 |
X.Intercept. | age.class | sex | age.class.sex | df | logLik | DIC | delta | weight | |
---|---|---|---|---|---|---|---|---|---|
m.null | -0.102 | NA | NA | NA | 3 | -451 | 921 | 0.000 | 0.342 |
m.age | -0.070 |
|
NA | NA | 4 | -451 | 921 | 0.015 | 0.339 |
m.age.sex | -0.070 |
|
|
|
5 | -450 | 921 | 0.135 | 0.319 |
Inquiry call parameters of juveniles vs adults (graphs represent the difference with regard to the adult values):
Mean and violin plots for response variables:
# convert to ms
sp.inq$call_duration <- sp.inq$call_duration * 1000
# long format
l.sp.inq <- melt(sp.inq[, c(inq.prms, "sex", "age.class")], id.vars=c("sex", "age.class"))
es <- es.inq.age.class
es <- es[es$pMCMC < 0.05, ]
es$label <- ifelse(es$pMCMC < 0.001, "***", ifelse(es$pMCMC < 0.01, "**", "*"))
names(es)[1] <- "variable"
# set mid point for brakets
es$x <- 1.5
#get SD and max of value for each parameter
es$sd <- sapply(es$variable, function(x) sd(l.sp.inq$value[l.sp.inq$variable == x]))
es$max <- sapply(es$variable, function(x) max(l.sp.inq$value[l.sp.inq$variable == x]))
# position of black asteriks if vs pups add sd*0.25 to max, if vs juvs 0.75
es$y <- es$max + (es$sd * 0.45)
# line position in y axis
es$y2 <- es$y - es$sd * 0.08
# length of vertical lines in brakets
es$y3 <- es$y2 - sapply(es$variable, function(x) dif.range(l.sp.inq$value[l.sp.inq$variable == x]) * 0.025)
# y position of white asteriks
es$y4 <- es$y + es$sd * 0.5
# start and end of braket lines
es$x2 <- 1
es$x3 <- 2
# create segment data for each comparison
data.segm <- lapply(1:nrow(es), function(x)
data.frame(x = c(es$x2[x], es$x2[x], es$x3[x]), y = rep(es$y2[x], 3), xend = c(es$x3[x], es$x2[x], es$x3[x]), yend = c(es$y2[x],es$y3[x], es$y3[x]), variable = es$variable[x]))
agg.inq.prms <- aggregate(value ~ variable + age.class, data = l.sp.inq, mean)
p <- ggplot(l.sp.inq, aes(age.class, value)) +
geom_point(size = 2.5, color = cols[1], data = agg.inq.prms) +
geom_text(col = "white", size = 9, data = es, mapping = aes(x = x, y = y4, label = label)) +
geom_text(col = "black", size = 9, data = es, mapping = aes(x = x, y = y, label = label)) +
geom_violin(fill = cols[8]) +
facet_wrap(~ variable, scales = "free_y", ncol = 2, strip.position = "left") +
theme(strip.background = element_blank(), strip.placement = "outside") +
labs(y = NULL, x = "Age class")
# addd brakets and asteriks
for(i in 1:length(data.segm))
p <- p + geom_segment(data=data.segm[[i]], lineend = "square", aes(x=x,y=y,yend=yend,xend=xend),inherit.aes=FALSE)
p
Juveniles did not have enough samples so we used same number of individuals in both age classes (5) and same number of calls per individual as in juveniles (2, 2, 10, 10 and 10 calls)
Calls within individual with more than 2 calls were randomly sub-sampled and the process was iterated 100 times
Mean and SD values are calculated from the 100 replicates
Random Forest accuracy was measured with cross-validation (out-of-bag error) in which each case is classified with a model that did not include that case for training
# parameters to be used
indv.inq.prms <- c("call_duration", "mean_freq", "sd", "freq.median", "freq.Q25", "freq.Q75", "freq.IQR", "time.median", "time.Q25", "time.Q75", "time.IQR", "skew", "kurt", "sp.ent", "time.ent", "entropy", "sfm", "meandom", "mindom", "maxdom", "freq_slope", "freq_range", "startdom", "enddom", "modulation_index", "peakf", grep("kurt.cc|mean.d|var.d1|median.c|min.cc|max.cc|skew.cc", names(sp.inq), value = TRUE))
pp <- preProcess(sp.inq[, indv.inq.prms], method = c("scale", "center"))
sp.inq.indv <- predict(pp, sp.inq)
# remove not used parameters
sp.inq.indv <- sp.inq.indv[ , c("indiv", "age.class", indv.inq.prms)]
##### on all original acoustic parameters #######
dats.indv <- lapply(na.omit(unique(sp.inq.indv$age.class)), function(x){
X <- sp.inq.indv[which(sp.inq.indv$age.class == x), ]
X <- X[complete.cases(X), ]
# select 6 with highest sample size
tab <- table(X$indiv)
X <- X[X$indiv %in% names(tab)[order(-tab)][1:6], ]
X$age.class <- x
return(X)
})
# minimum sample size per individual
# sapply(dats.indv, function(x) (table(x$indiv)))
# number of juveniles
table(dats.indv[[2]]$indiv)
# minimum number of samples for 5 individuals
min.ns <- c(2, 2, 10, 10, 10)
# replicate 100 times and calculate classification error
inq.classf <- replicate(reps, {
out <- lapply(dats.indv, function(X)
{
# split by individual
Y <- split(X, f = X$indiv)
if (X$age.class[1] == "juveniles")
# select sample sizes
Z <- lapply(Y, function(x){
if (nrow(x) == 2) return(x) else
return(x[sample(1:nrow(x), 10), ])
}) else
{
Y<- Y[sample(1:length(Y), 5)]
Z <- lapply(1:length(Y), function(y){
W <- Y[[y]]
W <- W[sample(1:nrow(W), min.ns[y]), ]
})
}
W <- do.call(rbind, Z)
# remove age class
age.class <- X$age.class[1]
W$age.class <- NULL
# run random forest
rf <- ranger(indiv ~ ., data = W, num.trees = 10000, importance = "impurity")
rf.error <- rf$prediction.error
return(data.frame(age.class, sample.size = nrow(W), individuals = length(unique(W$indiv)), method = "RandomForest", cv.error = rf.error))
})
do.call(rbind, out)
}, simplify = FALSE)
inq.classf <- do.call(rbind, inq.classf)
#aggregate mean and sd results
agg.inq.classf <- aggregate(cv.error ~ age.class + sample.size + individuals, data = inq.classf, mean)
agg.inq.classf$sd <- aggregate(cv.error ~ age.class + sample.size + individuals, data = inq.classf, sd)$cv.error
Plot mean and SD of classification error (out-of-bag error for Random Forest):
agg.inq.classf$age.class <- factor(agg.inq.classf$age.class, levels = c("juveniles", "adults"))
ggplot(agg.inq.classf, aes(x = age.class, y = cv.error)) +
geom_point(size = 2.5, position = position_dodge(width = 1), col = cols[8]) +
geom_errorbar(aes(ymin = cv.error - sd, ymax = cv.error + sd), width=.05, size = 1.2, position = position_dodge(width = 1), col = cols[8]) +
geom_hline(yintercept = 0.734, lty = 3) +
annotate("text", x = 1.5, y = 0.7, label = "Error expected by chance") +
labs(x = "Age class (days)", y = "Classification error (CV)")
Include MCMC chain trace and autocorrelation plots:
Response call ontogeny
## [1] "entropy"
## [1] "skewness"
## [1] "num_syllables"
## [1] "syll_duration"
## [1] "syll_rate"
## [1] "PC1_consistency"
Response call pups & juveniles vs adults
## [1] "mean_freq"
## [1] "entropy"
## [1] "skewness"
## [1] "num_syllables"
## [1] "syll_duration"
## [1] "syll_rate"
## [1] "PC1_consistency"
Inquiry call juveniles vs adults
## [1] "call_duration"
## [1] "mean_freq"
## [1] "freq_range"