Load packages

## add 'developer' to packages to be installed from github
x <- c("data.table", "lubridate", "devtools", "maRce10/warbleR", "readxl", "ranger", "caret", "e1071", "pbapply", "viridis", "ggplot2", "kableExtra", "rlang", "Sim.DiffProc", "soundgen")

aa <- lapply(x, function(y) {
  
  # get pakage name
  pkg <- strsplit(y, "/")[[1]]
  pkg <- pkg[length(pkg)]
  
  # check if installed, if not then install 
  if (!pkg %in% installed.packages()[,"Package"])  {

      if (grepl("/", y))  devtools::install_github(y, force = TRUE) else
    install.packages(y) 
    }

  # load package
  a <- try(require(pkg, character.only = T), silent = T)

  if (!a) remove.packages(pkg)
  })

Functions and global parameters

warbleR_options(wl = 300, parallel = 1, bp = c(0.5, 8), fast = TRUE, threshold = 15, ovlp = 20)

opts_knit$set(root.dir = "..")

# set evaluation false
opts_chunk$set( fig.width = 6, fig.height = 3, eval = FALSE, warning = FALSE, message = FALSE, tidy = TRUE)

num.trees <- 10000

Read data from Keen et al 2021

# read ext sel tab calls
labeled_calls <- readRDS("./data/raw/budgie call EST ms dataset.RDS")

# subset to unambiguous call types
labeled_calls <- labeled_calls[labeled_calls$Call.Type %in% grep("2", unique(labeled_calls$Call.Type),
    invert = TRUE, value = TRUE), ]

# resample to 22.05 kHz
labeled_calls <- resample_est_waves(labeled_calls, samp.rate = 22.05, pb = FALSE)
df <- as.data.frame(table(labeled_calls$Call.Type))

names(df) <- c("Call_type", "Sample_size")

kb <- kable(df, row.names = FALSE)

kb <- kable_styling(kb, bootstrap_options = c("striped", "hover", "condensed", "responsive"))

print(kb)
Call_type Sample_size
A 50
B 50
C 50
D 50
E 50
I 50
K 50

Measure acoustic parameters

# function to get acoustic parameters
acous_param_fun <- function(x) {

    # measure acoustics parameters
    sp <- spectro_analalysis(x, pb = FALSE, harmonicity = FALSE, threshold = 5)

    # measure cepstral coeffs
    cc <- mfcc_stats(x, pb = FALSE)[, -c(1, 2)]

    # spectrographic cross correlation
    spxc <- xcorr(x, pb = FALSE)

    # MDS
    spxc <- cmdscale(1 - spxc, k = 10, list. = TRUE)

    spxc_mds <- spxc$points

    colnames(spxc_mds) <- paste0("spxcMDS", 1:ncol(spxc_mds))

    # mfcc cross correlation
    mfccxc <- xcorr(x, pb = FALSE, type = "mfcc")

    # MDS
    mfccxc <- cmdscale(1 - mfccxc, k = 10, list. = TRUE)

    mfxc_mds <- mfccxc$points

    colnames(mfxc_mds) <- paste0("mfxcMDS", 1:ncol(mfxc_mds))

    # dynamic time warping
    fre_cntrs <- freq_ts(x, img = FALSE, pb = FALSE, threshold.time = 1)

    # replace NAs with mean values for each column
    if (anyNA(fre_cntrs))
        for (i in 3:ncol(fre_cntrs)) fre_cntrs[is.na(fre_cntrs[, i]), i] <- mean(fre_cntrs[,
            i], na.rm = TRUE)

    dtw.dists <- freq_DTW(x, img = FALSE, pb = FALSE, threshold.time = 1, ts.df = fre_cntrs)

    # MDS
    dtw_mds <- cmdscale(dtw.dists, k = 10, list. = TRUE)$points

    # fix colnames
    colnames(dtw_mds) <- paste0("dtwMDS", 1:ncol(dtw_mds))

    # put parameters in a list
    all_params <- data.frame(sp, cc, dtw_mds, spxc_mds, mfxc_mds)

    # scale for random forest
    all_params[, -c(1, 2)] <- scale(all_params[, -c(1, 2)])

    # add individual and experiment
    all_params$call.type <- x$Call.Type[1]

    return(all_params)
}

labeled_calls_l <- split(labeled_calls, labeled_calls$Call.Type)

# loop to measure acoustic parameters on each group
acous_param_l <- lapply(labeled_calls_l, FUN = function(x) try(acous_param_fun(x),
    silent = TRUE))


acous_param <- do.call(rbind, acous_param_l)

names(acous_param)[(sapply(acous_param, anyNA))]

# save as RDS
saveRDS(acous_param, "./data/processed/acoustic_parameters_test_for_best_parameter_combination.RDS")

Run random forest with different acoustic parameter subsets

# read acoustic parameter data
acous_param <- readRDS("./data/processed/acoustic_parameters_test_for_best_parameter_combination.RDS")

# exclude groups in which the solo flight audio is uncertain for some
# individuals

# which parameters would be measured
param_categories <- c("mfcc", "spxc", "mfxc", "dtw", "sp")

# get actual parameter names
col_names <- names(acous_param)
col_names <- col_names[!col_names %in% c("sound.files", "selec", "call.type")]

# measurement category for each measruremnt
clss_col_names <- col_names

# name it by measurement function
for (i in 1:length(param_categories)) clss_col_names[if (param_categories[i] != "sp") grepl(c("cc",
    "spxc", "mfxc", "dtw")[i], col_names) else !grepl("cc|xc|dtw|indiv|sound.files|selec",
    col_names)] <- param_categories[i]

# cbind(col_names, clss_col_names)


# all posible combinations
combs4 <- combn(param_categories, 4)
combs3 <- combn(param_categories, 3)
combs2 <- combn(param_categories, 2)

# ake it a list
combs <- c(as.data.frame(combs4), as.data.frame(combs3), as.data.frame(combs2))

# add all 4 parameters as an option to the list
combs <- c(append(combs, list(param_categories)), param_categories)

combs <- sample(combs)

# loop
out <- pblapply(1:length(combs), cl = 3, function(i) {

    # subset columns to keep only those from selected acoustic measurements
    X <- acous_param[, c(col_names[clss_col_names %in% c(combs[[i]])], "call.type")]

    # make it a factor for ranger to work
    X$call.type <- as.factor(X$call.type)

    # run RF model spectral and cepstral parameters
    rfm <- ranger(call.type ~ ., data = X, num.trees = num.trees, importance = "impurity")

    res <- data.frame(parameters = paste(combs[[i]], collapse = "-"), error = rfm$prediction.error)
    return(res)
})

rf_perfomance_acous_params <- do.call(rbind, out)

rf_perfomance_acous_params <- rf_perfomance_acous_params[order(rf_perfomance_acous_params$error,
    decreasing = FALSE), ]

# save as RDS
saveRDS(rf_perfomance_acous_params, "./data/processed/random_forest_performance by_acoustic_parameters_combinations.RDS")

Out-of-bar error by measuring method combination

rf_perfomance_acous_params <- readRDS("./data/processed/random_forest_performance by_acoustic_parameters_combinations.RDS")

kb <- kable(rf_perfomance_acous_params, row.names = FALSE)

kb <- kable_styling(kb, bootstrap_options = c("striped", "hover", "condensed", "responsive"))

print(kb)
parameters error
sp 0.0457143
mfxc-sp 0.0542857
dtw-sp 0.0542857
spxc-sp 0.0571429
mfxc-dtw-sp 0.0628571
spxc-mfxc-dtw-sp 0.0657143
spxc-dtw-sp 0.0657143
spxc-mfxc-sp 0.0714286
mfcc-sp 0.1114286
mfcc-mfxc-dtw-sp 0.1142857
mfcc-dtw-sp 0.1171429
mfcc-spxc-sp 0.1200000
mfcc-spxc-mfxc-sp 0.1228571
mfcc-spxc-dtw-sp 0.1228571
mfcc-mfxc-sp 0.1314286
mfcc-spxc-mfxc-dtw-sp 0.1400000
dtw 0.6942857
spxc-dtw 0.7714286
mfxc-dtw 0.7885714
spxc-mfxc-dtw 0.8057143
spxc 0.9657143
mfcc-dtw 0.9685714
mfcc-spxc-dtw 0.9685714
mfcc-mfxc-dtw 0.9714286
spxc-mfxc 0.9714286
mfxc 0.9714286
mfcc-spxc-mfxc-dtw 0.9771429
mfcc-spxc-mfxc 0.9971429
mfcc 1.0000000
mfcc-mfxc 1.0000000
mfcc-spxc 1.0000000

Session information

## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
## 
## locale:
##  [1] LC_CTYPE=pt_BR.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=es_CR.UTF-8        LC_COLLATE=pt_BR.UTF-8    
##  [5] LC_MONETARY=es_CR.UTF-8    LC_MESSAGES=pt_BR.UTF-8   
##  [7] LC_PAPER=es_CR.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] soundgen_2.1.0     shinyBS_0.61       Sim.DiffProc_4.8   rlang_0.4.11      
##  [5] kableExtra_1.3.4   viridis_0.6.1      viridisLite_0.4.0  pbapply_1.4-3     
##  [9] e1071_1.7-7        caret_6.0-88       ggplot2_3.3.5      lattice_0.20-44   
## [13] ranger_0.13.1      readxl_1.3.1       warbleR_1.1.27     NatureSounds_1.0.4
## [17] knitr_1.33         seewave_2.1.8      tuneR_1.3.3        devtools_2.4.2    
## [21] usethis_2.0.1      lubridate_1.7.10   data.table_1.14.0 
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_2.0-2     rjson_0.2.20         ellipsis_0.3.2      
##   [4] class_7.3-19         rprojroot_2.0.2      fs_1.5.0            
##   [7] rstudioapi_0.13      proxy_0.4-26         Deriv_4.1.3         
##  [10] remotes_2.4.0        prodlim_2019.11.13   fansi_0.5.0         
##  [13] xml2_1.3.2           codetools_0.2-18     splines_4.1.0       
##  [16] cachem_1.0.5         pkgload_1.2.1        jsonlite_1.7.2      
##  [19] pROC_1.17.0.1        shiny_1.6.0          compiler_4.1.0      
##  [22] httr_1.4.2           assertthat_0.2.1     Matrix_1.3-4        
##  [25] fastmap_1.1.0        cli_3.0.1            formatR_1.11        
##  [28] later_1.2.0          htmltools_0.5.1.1    prettyunits_1.1.1   
##  [31] tools_4.1.0          gtable_0.3.0         glue_1.4.2          
##  [34] reshape2_1.4.4       dplyr_1.0.7          Rcpp_1.0.7          
##  [37] cellranger_1.1.0     jquerylib_0.1.4      vctrs_0.3.8         
##  [40] svglite_2.0.0        nlme_3.1-152         iterators_1.0.13    
##  [43] timeDate_3043.102    gower_0.2.2          xfun_0.24           
##  [46] stringr_1.4.0        ps_1.6.0             testthat_3.0.4      
##  [49] rvest_1.0.0          mime_0.11            lifecycle_1.0.0     
##  [52] MASS_7.3-54          scales_1.1.1         ipred_0.9-11        
##  [55] promises_1.2.0.1     parallel_4.1.0       yaml_2.2.1          
##  [58] memoise_2.0.0        gridExtra_2.3        sass_0.4.0          
##  [61] rpart_4.1-15         stringi_1.7.3        highr_0.9           
##  [64] desc_1.3.0           foreach_1.5.1        pkgbuild_1.2.0      
##  [67] lava_1.6.9           pkgconfig_2.0.3      systemfonts_1.0.2   
##  [70] dtw_1.22-3           bitops_1.0-7         evaluate_0.14       
##  [73] purrr_0.3.4          recipes_0.1.16       processx_3.5.2      
##  [76] tidyselect_1.1.1     plyr_1.8.6           magrittr_2.0.1      
##  [79] R6_2.5.0             fftw_1.0-6           generics_0.1.0      
##  [82] DBI_1.1.1            pillar_1.6.1         withr_2.4.2         
##  [85] survival_3.2-11      RCurl_1.98-1.3       nnet_7.3-16         
##  [88] tibble_3.1.2         crayon_1.4.1         utf8_1.2.1          
##  [91] rmarkdown_2.9        grid_4.1.0           callr_3.7.0         
##  [94] ModelMetrics_1.2.2.2 digest_0.6.27        webshot_0.5.2       
##  [97] xtable_1.8-4         httpuv_1.6.1         signal_0.7-7        
## [100] stats4_4.1.0         munsell_0.5.0        bslib_0.2.5.1       
## [103] sessioninfo_1.1.1