Load packages
## add 'developer' to packages to be installed from github
x <- c("data.table", "lubridate", "devtools", "maRce10/warbleR", "readxl", "ranger", "caret", "e1071", "pbapply", "viridis", "ggplot2", "kableExtra", "rlang", "Sim.DiffProc", "soundgen")
aa <- lapply(x, function(y) {
# get pakage name
pkg <- strsplit(y, "/")[[1]]
pkg <- pkg[length(pkg)]
# check if installed, if not then install
if (!pkg %in% installed.packages()[,"Package"]) {
if (grepl("/", y)) devtools::install_github(y, force = TRUE) else
install.packages(y)
}
# load package
a <- try(require(pkg, character.only = T), silent = T)
if (!a) remove.packages(pkg)
})
Functions and global parameters
warbleR_options(wl = 300, parallel = 1, bp = c(0.5, 8), fast = TRUE, threshold = 15, ovlp = 20)
opts_knit$set(root.dir = "..")
# set evaluation false
opts_chunk$set( fig.width = 6, fig.height = 3, eval = FALSE, warning = FALSE, message = FALSE, tidy = TRUE)
num.trees <- 10000
Read data from Keen et al 2021
# read ext sel tab calls
labeled_calls <- readRDS("./data/raw/budgie call EST ms dataset.RDS")
# subset to unambiguous call types
labeled_calls <- labeled_calls[labeled_calls$Call.Type %in% grep("2", unique(labeled_calls$Call.Type),
invert = TRUE, value = TRUE), ]
# resample to 22.05 kHz
labeled_calls <- resample_est_waves(labeled_calls, samp.rate = 22.05, pb = FALSE)
df <- as.data.frame(table(labeled_calls$Call.Type))
names(df) <- c("Call_type", "Sample_size")
kb <- kable(df, row.names = FALSE)
kb <- kable_styling(kb, bootstrap_options = c("striped", "hover", "condensed", "responsive"))
print(kb)
Call_type
|
Sample_size
|
A
|
50
|
B
|
50
|
C
|
50
|
D
|
50
|
E
|
50
|
I
|
50
|
K
|
50
|
Measure acoustic parameters
# function to get acoustic parameters
acous_param_fun <- function(x) {
# measure acoustics parameters
sp <- spectro_analalysis(x, pb = FALSE, harmonicity = FALSE, threshold = 5)
# measure cepstral coeffs
cc <- mfcc_stats(x, pb = FALSE)[, -c(1, 2)]
# spectrographic cross correlation
spxc <- xcorr(x, pb = FALSE)
# MDS
spxc <- cmdscale(1 - spxc, k = 10, list. = TRUE)
spxc_mds <- spxc$points
colnames(spxc_mds) <- paste0("spxcMDS", 1:ncol(spxc_mds))
# mfcc cross correlation
mfccxc <- xcorr(x, pb = FALSE, type = "mfcc")
# MDS
mfccxc <- cmdscale(1 - mfccxc, k = 10, list. = TRUE)
mfxc_mds <- mfccxc$points
colnames(mfxc_mds) <- paste0("mfxcMDS", 1:ncol(mfxc_mds))
# dynamic time warping
fre_cntrs <- freq_ts(x, img = FALSE, pb = FALSE, threshold.time = 1)
# replace NAs with mean values for each column
if (anyNA(fre_cntrs))
for (i in 3:ncol(fre_cntrs)) fre_cntrs[is.na(fre_cntrs[, i]), i] <- mean(fre_cntrs[,
i], na.rm = TRUE)
dtw.dists <- freq_DTW(x, img = FALSE, pb = FALSE, threshold.time = 1, ts.df = fre_cntrs)
# MDS
dtw_mds <- cmdscale(dtw.dists, k = 10, list. = TRUE)$points
# fix colnames
colnames(dtw_mds) <- paste0("dtwMDS", 1:ncol(dtw_mds))
# put parameters in a list
all_params <- data.frame(sp, cc, dtw_mds, spxc_mds, mfxc_mds)
# scale for random forest
all_params[, -c(1, 2)] <- scale(all_params[, -c(1, 2)])
# add individual and experiment
all_params$call.type <- x$Call.Type[1]
return(all_params)
}
labeled_calls_l <- split(labeled_calls, labeled_calls$Call.Type)
# loop to measure acoustic parameters on each group
acous_param_l <- lapply(labeled_calls_l, FUN = function(x) try(acous_param_fun(x),
silent = TRUE))
acous_param <- do.call(rbind, acous_param_l)
names(acous_param)[(sapply(acous_param, anyNA))]
# save as RDS
saveRDS(acous_param, "./data/processed/acoustic_parameters_test_for_best_parameter_combination.RDS")
Run random forest with different acoustic parameter subsets
# read acoustic parameter data
acous_param <- readRDS("./data/processed/acoustic_parameters_test_for_best_parameter_combination.RDS")
# exclude groups in which the solo flight audio is uncertain for some
# individuals
# which parameters would be measured
param_categories <- c("mfcc", "spxc", "mfxc", "dtw", "sp")
# get actual parameter names
col_names <- names(acous_param)
col_names <- col_names[!col_names %in% c("sound.files", "selec", "call.type")]
# measurement category for each measruremnt
clss_col_names <- col_names
# name it by measurement function
for (i in 1:length(param_categories)) clss_col_names[if (param_categories[i] != "sp") grepl(c("cc",
"spxc", "mfxc", "dtw")[i], col_names) else !grepl("cc|xc|dtw|indiv|sound.files|selec",
col_names)] <- param_categories[i]
# cbind(col_names, clss_col_names)
# all posible combinations
combs4 <- combn(param_categories, 4)
combs3 <- combn(param_categories, 3)
combs2 <- combn(param_categories, 2)
# ake it a list
combs <- c(as.data.frame(combs4), as.data.frame(combs3), as.data.frame(combs2))
# add all 4 parameters as an option to the list
combs <- c(append(combs, list(param_categories)), param_categories)
combs <- sample(combs)
# loop
out <- pblapply(1:length(combs), cl = 3, function(i) {
# subset columns to keep only those from selected acoustic measurements
X <- acous_param[, c(col_names[clss_col_names %in% c(combs[[i]])], "call.type")]
# make it a factor for ranger to work
X$call.type <- as.factor(X$call.type)
# run RF model spectral and cepstral parameters
rfm <- ranger(call.type ~ ., data = X, num.trees = num.trees, importance = "impurity")
res <- data.frame(parameters = paste(combs[[i]], collapse = "-"), error = rfm$prediction.error)
return(res)
})
rf_perfomance_acous_params <- do.call(rbind, out)
rf_perfomance_acous_params <- rf_perfomance_acous_params[order(rf_perfomance_acous_params$error,
decreasing = FALSE), ]
# save as RDS
saveRDS(rf_perfomance_acous_params, "./data/processed/random_forest_performance by_acoustic_parameters_combinations.RDS")
Out-of-bar error by measuring method combination
rf_perfomance_acous_params <- readRDS("./data/processed/random_forest_performance by_acoustic_parameters_combinations.RDS")
kb <- kable(rf_perfomance_acous_params, row.names = FALSE)
kb <- kable_styling(kb, bootstrap_options = c("striped", "hover", "condensed", "responsive"))
print(kb)
parameters
|
error
|
sp
|
0.0457143
|
mfxc-sp
|
0.0542857
|
dtw-sp
|
0.0542857
|
spxc-sp
|
0.0571429
|
mfxc-dtw-sp
|
0.0628571
|
spxc-mfxc-dtw-sp
|
0.0657143
|
spxc-dtw-sp
|
0.0657143
|
spxc-mfxc-sp
|
0.0714286
|
mfcc-sp
|
0.1114286
|
mfcc-mfxc-dtw-sp
|
0.1142857
|
mfcc-dtw-sp
|
0.1171429
|
mfcc-spxc-sp
|
0.1200000
|
mfcc-spxc-mfxc-sp
|
0.1228571
|
mfcc-spxc-dtw-sp
|
0.1228571
|
mfcc-mfxc-sp
|
0.1314286
|
mfcc-spxc-mfxc-dtw-sp
|
0.1400000
|
dtw
|
0.6942857
|
spxc-dtw
|
0.7714286
|
mfxc-dtw
|
0.7885714
|
spxc-mfxc-dtw
|
0.8057143
|
spxc
|
0.9657143
|
mfcc-dtw
|
0.9685714
|
mfcc-spxc-dtw
|
0.9685714
|
mfcc-mfxc-dtw
|
0.9714286
|
spxc-mfxc
|
0.9714286
|
mfxc
|
0.9714286
|
mfcc-spxc-mfxc-dtw
|
0.9771429
|
mfcc-spxc-mfxc
|
0.9971429
|
mfcc
|
1.0000000
|
mfcc-mfxc
|
1.0000000
|
mfcc-spxc
|
1.0000000
|
Session information
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
##
## locale:
## [1] LC_CTYPE=pt_BR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=es_CR.UTF-8 LC_COLLATE=pt_BR.UTF-8
## [5] LC_MONETARY=es_CR.UTF-8 LC_MESSAGES=pt_BR.UTF-8
## [7] LC_PAPER=es_CR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] soundgen_2.1.0 shinyBS_0.61 Sim.DiffProc_4.8 rlang_0.4.11
## [5] kableExtra_1.3.4 viridis_0.6.1 viridisLite_0.4.0 pbapply_1.4-3
## [9] e1071_1.7-7 caret_6.0-88 ggplot2_3.3.5 lattice_0.20-44
## [13] ranger_0.13.1 readxl_1.3.1 warbleR_1.1.27 NatureSounds_1.0.4
## [17] knitr_1.33 seewave_2.1.8 tuneR_1.3.3 devtools_2.4.2
## [21] usethis_2.0.1 lubridate_1.7.10 data.table_1.14.0
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-2 rjson_0.2.20 ellipsis_0.3.2
## [4] class_7.3-19 rprojroot_2.0.2 fs_1.5.0
## [7] rstudioapi_0.13 proxy_0.4-26 Deriv_4.1.3
## [10] remotes_2.4.0 prodlim_2019.11.13 fansi_0.5.0
## [13] xml2_1.3.2 codetools_0.2-18 splines_4.1.0
## [16] cachem_1.0.5 pkgload_1.2.1 jsonlite_1.7.2
## [19] pROC_1.17.0.1 shiny_1.6.0 compiler_4.1.0
## [22] httr_1.4.2 assertthat_0.2.1 Matrix_1.3-4
## [25] fastmap_1.1.0 cli_3.0.1 formatR_1.11
## [28] later_1.2.0 htmltools_0.5.1.1 prettyunits_1.1.1
## [31] tools_4.1.0 gtable_0.3.0 glue_1.4.2
## [34] reshape2_1.4.4 dplyr_1.0.7 Rcpp_1.0.7
## [37] cellranger_1.1.0 jquerylib_0.1.4 vctrs_0.3.8
## [40] svglite_2.0.0 nlme_3.1-152 iterators_1.0.13
## [43] timeDate_3043.102 gower_0.2.2 xfun_0.24
## [46] stringr_1.4.0 ps_1.6.0 testthat_3.0.4
## [49] rvest_1.0.0 mime_0.11 lifecycle_1.0.0
## [52] MASS_7.3-54 scales_1.1.1 ipred_0.9-11
## [55] promises_1.2.0.1 parallel_4.1.0 yaml_2.2.1
## [58] memoise_2.0.0 gridExtra_2.3 sass_0.4.0
## [61] rpart_4.1-15 stringi_1.7.3 highr_0.9
## [64] desc_1.3.0 foreach_1.5.1 pkgbuild_1.2.0
## [67] lava_1.6.9 pkgconfig_2.0.3 systemfonts_1.0.2
## [70] dtw_1.22-3 bitops_1.0-7 evaluate_0.14
## [73] purrr_0.3.4 recipes_0.1.16 processx_3.5.2
## [76] tidyselect_1.1.1 plyr_1.8.6 magrittr_2.0.1
## [79] R6_2.5.0 fftw_1.0-6 generics_0.1.0
## [82] DBI_1.1.1 pillar_1.6.1 withr_2.4.2
## [85] survival_3.2-11 RCurl_1.98-1.3 nnet_7.3-16
## [88] tibble_3.1.2 crayon_1.4.1 utf8_1.2.1
## [91] rmarkdown_2.9 grid_4.1.0 callr_3.7.0
## [94] ModelMetrics_1.2.2.2 digest_0.6.27 webshot_0.5.2
## [97] xtable_1.8-4 httpuv_1.6.1 signal_0.7-7
## [100] stats4_4.1.0 munsell_0.5.0 bslib_0.2.5.1
## [103] sessioninfo_1.1.1