Load packages
## add 'developer' to packages to be installed from github
x <- c("data.table", "lubridate", "devtools", "maRce10/warbleR", "readxl", "ranger", "caret", "e1071", "pbapply", "viridis", "ggplot2", "kableExtra", "rlang", "Sim.DiffProc", "soundgen")
aa <- lapply(x, function(y) {
# get pakage name
pkg <- strsplit(y, "/")[[1]]
pkg <- pkg[length(pkg)]
# check if installed, if not then install
if (!pkg %in% installed.packages()[,"Package"]) {
if (grepl("/", y)) devtools::install_github(y, force = TRUE) else
install.packages(y)
}
# load package
a <- try(require(pkg, character.only = T), silent = T)
if (!a) remove.packages(pkg)
})
Functions and global parameters
warbleR_options(wl = 300, parallel = 1, bp = c(0.5, 8), fast = TRUE, threshold = 15, ovlp = 20)
opts_knit$set(root.dir = "..")
# set evaluation false
opts_chunk$set( fig.width = 6, fig.height = 3, eval = FALSE, warning = FALSE, message = FALSE, tidy = TRUE)
num.trees <- 10000
Read data from Keen et al 2021
# read ext sel tab calls
labeled_calls <- readRDS("./data/raw/budgie call EST ms dataset.RDS")
# subset to unambiguous call types
labeled_calls <- labeled_calls[labeled_calls$Call.Type %in% grep("2", unique(labeled_calls$Call.Type), invert = TRUE, value = TRUE), ]
# resample to 22.05 kHz
labeled_calls <- resample_est_waves(labeled_calls, samp.rate = 22.05, pb = FALSE)
df <- as.data.frame(table(labeled_calls$Call.Type))
names(df) <- c("Call_type", "Sample_size")
kb <-kable(df, row.names = FALSE)
kb <- kable_styling(kb, bootstrap_options = c("striped", "hover", "condensed", "responsive"))
print(kb)
Call_type
|
Sample_size
|
A
|
50
|
B
|
50
|
C
|
50
|
D
|
50
|
E
|
50
|
I
|
50
|
K
|
50
|
Measure acoustic parameters
# function to get acoustic parameters
acous_param_fun <- function(x) {
# measure acoustics parameters
sp <- specan(x, pb = FALSE, harmonicity = FALSE, threshold = 5)
# measure cepstral coeffs
cc <- mfcc_stats(x, pb = FALSE)[, -c(1, 2)]
# spectrographic cross correlation
spxc <- xcorr(x, pb = FALSE)
# MDS
spxc <- cmdscale(1 - spxc, k = 10, list. = TRUE)
spxc_mds <- spxc$points
colnames(spxc_mds) <- paste0("spxcMDS", 1:ncol(spxc_mds))
# mfcc cross correlation
mfccxc <- xcorr(x, pb = FALSE, type = "mfcc")
# MDS
mfccxc <- cmdscale(1 - mfccxc, k = 10, list. = TRUE)
mfxc_mds <- mfccxc$points
colnames(mfxc_mds) <- paste0("mfxcMDS", 1:ncol(mfxc_mds))
# dynamic time warping
fre_cntrs <- freq_ts(x, img = FALSE, pb = FALSE, threshold.time = 1)
# replace NAs with mean values for each column
if (anyNA(fre_cntrs))
for(i in 3:ncol(fre_cntrs))
fre_cntrs[is.na(fre_cntrs[,i]), i] <- mean(fre_cntrs[, i], na.rm = TRUE)
dtw.dists <- freq_DTW(x, img = FALSE, pb = FALSE, threshold.time = 1, ts.df = fre_cntrs)
# MDS
dtw_mds <- cmdscale(dtw.dists, k = 10, list. = TRUE)$points
# fix colnames
colnames(dtw_mds) <- paste0("dtwMDS", 1:ncol(dtw_mds))
# put parameters in a list
all_params <- data.frame(sp, cc, dtw_mds, spxc_mds, mfxc_mds)
# scale for random forest
all_params[,-c(1, 2)] <- scale(all_params[,-c(1, 2)])
# add individual and experiment
all_params$call.type <- x$Call.Type[1]
return(all_params)
}
labeled_calls_l <- split(labeled_calls, labeled_calls$Call.Type)
# loop to measure acoustic parameters on each group
acous_param_l <- lapply(labeled_calls_l, FUN = function(x) try(acous_param_fun(x), silent = TRUE))
acous_param <- do.call(rbind, acous_param_l)
names(acous_param)[(sapply(acous_param, anyNA))]
# save as RDS
saveRDS(acous_param, "./data/processed/acoustic_parameters_test_for_best_parameter_combination.RDS")
Run random forest with different acoustic parameter subsets
# read acoustic parameter data
acous_param <- readRDS("./data/processed/acoustic_parameters_test_for_best_parameter_combination.RDS")
# exclude groups in which the solo flight audio is uncertain for some individuals
# which parameters would be measured
param_categories <- c("mfcc", "spxc", "mfxc", "dtw", "sp")
# get actual parameter names
col_names <- names(acous_param)
col_names <- col_names[!col_names %in% c("sound.files", "selec","call.type")]
# measurement category for each measruremnt
clss_col_names <- col_names
# name it by measurement function
for (i in 1:length(param_categories))
clss_col_names[if (param_categories[i] != "sp")
grepl(c("cc", "spxc", "mfxc", "dtw")[i], col_names) else
!grepl("cc|xc|dtw|indiv|sound.files|selec", col_names)] <- param_categories[i]
# cbind(col_names, clss_col_names)
# all posible combinations
combs4 <- combn(param_categories, 4)
combs3 <- combn(param_categories, 3)
combs2 <- combn(param_categories, 2)
# ake it a list
combs <- c(as.data.frame(combs4), as.data.frame(combs3), as.data.frame(combs2))
# add all 4 parameters as an option to the list
combs <- c(append(combs, list(param_categories)), param_categories)
combs <- sample(combs)
# loop
out <- pblapply(1:length(combs), cl = 3, function(i) {
# subset columns to keep only those from selected acoustic measurements
X <- acous_param[ , c(col_names[clss_col_names %in% c(combs[[i]])], "call.type")]
# make it a factor for ranger to work
X$call.type <- as.factor(X$call.type)
# run RF model spectral and cepstral parameters
rfm <- ranger(
call.type ~ .,
data = X,
num.trees = num.trees,
importance = "impurity")
res <- data.frame(parameters = paste(combs[[i]], collapse = "-"), error = rfm$prediction.error)
return(res)
})
rf_perfomance_acous_params <- do.call(rbind, out)
rf_perfomance_acous_params <- rf_perfomance_acous_params[order(rf_perfomance_acous_params$error, decreasing = FALSE), ]
# save as RDS
saveRDS(rf_perfomance_acous_params, "./data/processed/random_forest_performance by_acoustic_parameters_combinations.RDS")
Out-of-bar error by measuring method combination
rf_perfomance_acous_params <- readRDS("./data/processed/random_forest_performance by_acoustic_parameters_combinations.RDS")
kb <-kable(rf_perfomance_acous_params, row.names = FALSE)
kb <- kable_styling(kb, bootstrap_options = c("striped", "hover", "condensed", "responsive"))
print(kb)
parameters
|
error
|
sp
|
0.0457143
|
mfxc-sp
|
0.0542857
|
dtw-sp
|
0.0542857
|
spxc-sp
|
0.0571429
|
mfxc-dtw-sp
|
0.0628571
|
spxc-mfxc-dtw-sp
|
0.0657143
|
spxc-dtw-sp
|
0.0657143
|
spxc-mfxc-sp
|
0.0714286
|
mfcc-sp
|
0.1114286
|
mfcc-mfxc-dtw-sp
|
0.1142857
|
mfcc-dtw-sp
|
0.1171429
|
mfcc-spxc-sp
|
0.1200000
|
mfcc-spxc-mfxc-sp
|
0.1228571
|
mfcc-spxc-dtw-sp
|
0.1228571
|
mfcc-mfxc-sp
|
0.1314286
|
mfcc-spxc-mfxc-dtw-sp
|
0.1400000
|
dtw
|
0.6942857
|
spxc-dtw
|
0.7714286
|
mfxc-dtw
|
0.7885714
|
spxc-mfxc-dtw
|
0.8057143
|
spxc
|
0.9657143
|
mfcc-dtw
|
0.9685714
|
mfcc-spxc-dtw
|
0.9685714
|
mfcc-mfxc-dtw
|
0.9714286
|
spxc-mfxc
|
0.9714286
|
mfxc
|
0.9714286
|
mfcc-spxc-mfxc-dtw
|
0.9771429
|
mfcc-spxc-mfxc
|
0.9971429
|
mfcc
|
1.0000000
|
mfcc-mfxc
|
1.0000000
|
mfcc-spxc
|
1.0000000
|
Session information
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=pt_BR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=es_CR.UTF-8 LC_COLLATE=pt_BR.UTF-8
## [5] LC_MONETARY=es_CR.UTF-8 LC_MESSAGES=pt_BR.UTF-8
## [7] LC_PAPER=es_CR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] soundgen_2.1.0 shinyBS_0.61 Sim.DiffProc_4.8 rlang_0.4.11
## [5] kableExtra_1.3.4 viridis_0.6.1 viridisLite_0.4.0 pbapply_1.4-3
## [9] e1071_1.7-7 caret_6.0-88 ggplot2_3.3.4 lattice_0.20-44
## [13] ranger_0.12.1 readxl_1.3.1 warbleR_1.1.27 NatureSounds_1.0.4
## [17] knitr_1.33 seewave_2.1.6 tuneR_1.3.3 devtools_2.4.2
## [21] usethis_2.0.1 lubridate_1.7.10 data.table_1.14.0
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-2 rjson_0.2.20 ellipsis_0.3.2
## [4] class_7.3-19 rprojroot_2.0.2 fs_1.5.0
## [7] rstudioapi_0.13 proxy_0.4-26 Deriv_4.1.3
## [10] remotes_2.4.0 prodlim_2019.11.13 fansi_0.5.0
## [13] xml2_1.3.2 codetools_0.2-18 splines_4.0.5
## [16] cachem_1.0.5 pkgload_1.2.1 jsonlite_1.7.2
## [19] pROC_1.17.0.1 shiny_1.6.0 compiler_4.0.5
## [22] httr_1.4.2 assertthat_0.2.1 Matrix_1.3-3
## [25] fastmap_1.1.0 cli_2.5.0 later_1.2.0
## [28] htmltools_0.5.1.1 prettyunits_1.1.1 tools_4.0.5
## [31] gtable_0.3.0 glue_1.4.2 reshape2_1.4.4
## [34] dplyr_1.0.7 Rcpp_1.0.6 cellranger_1.1.0
## [37] jquerylib_0.1.4 vctrs_0.3.8 svglite_2.0.0
## [40] nlme_3.1-152 iterators_1.0.13 timeDate_3043.102
## [43] gower_0.2.2 xfun_0.23 stringr_1.4.0
## [46] ps_1.6.0 testthat_3.0.3 rvest_1.0.0
## [49] mime_0.10 lifecycle_1.0.0 MASS_7.3-54
## [52] scales_1.1.1 ipred_0.9-11 promises_1.2.0.1
## [55] parallel_4.0.5 yaml_2.2.1 memoise_2.0.0
## [58] gridExtra_2.3 sass_0.4.0 rpart_4.1-15
## [61] stringi_1.6.2 highr_0.9 desc_1.3.0
## [64] foreach_1.5.1 pkgbuild_1.2.0 lava_1.6.9
## [67] pkgconfig_2.0.3 systemfonts_1.0.2 dtw_1.22-3
## [70] bitops_1.0-7 evaluate_0.14 purrr_0.3.4
## [73] recipes_0.1.16 processx_3.5.2 tidyselect_1.1.1
## [76] plyr_1.8.6 magrittr_2.0.1 R6_2.5.0
## [79] fftw_1.0-6 generics_0.1.0 DBI_1.1.1
## [82] pillar_1.6.1 withr_2.4.2 survival_3.2-11
## [85] RCurl_1.98-1.3 nnet_7.3-16 tibble_3.1.2
## [88] crayon_1.4.1 utf8_1.2.1 rmarkdown_2.8
## [91] grid_4.0.5 callr_3.7.0 ModelMetrics_1.2.2.2
## [94] digest_0.6.27 webshot_0.5.2 xtable_1.8-4
## [97] httpuv_1.6.1 signal_0.7-7 stats4_4.0.5
## [100] munsell_0.5.0 bslib_0.2.5.1 sessioninfo_1.1.1