Load packages

## add 'developer' to packages to be installed from github
x <- c("data.table", "lubridate", "devtools", "maRce10/warbleR", "readxl", "ranger", "caret", "e1071", "pbapply", "viridis", "ggplot2", "kableExtra", "rlang", "Sim.DiffProc", "soundgen")

aa <- lapply(x, function(y) {
  
  # get pakage name
  pkg <- strsplit(y, "/")[[1]]
  pkg <- pkg[length(pkg)]
  
  # check if installed, if not then install 
  if (!pkg %in% installed.packages()[,"Package"])  {

      if (grepl("/", y))  devtools::install_github(y, force = TRUE) else
    install.packages(y) 
    }

  # load package
  a <- try(require(pkg, character.only = T), silent = T)

  if (!a) remove.packages(pkg)
  })

Functions and global parameters

warbleR_options(wl = 300, parallel = 1, bp = c(0.5, 8), fast = TRUE, threshold = 15, ovlp = 20)

opts_knit$set(root.dir = "..")

# set evaluation false
opts_chunk$set( fig.width = 6, fig.height = 3, eval = FALSE, warning = FALSE, message = FALSE, tidy = TRUE)

num.trees <- 10000

Read data from Keen et al 2021

# read ext sel tab calls
labeled_calls <- readRDS("./data/raw/budgie call EST ms dataset.RDS")

# subset to unambiguous call types
labeled_calls <- labeled_calls[labeled_calls$Call.Type %in% grep("2", unique(labeled_calls$Call.Type), invert = TRUE, value = TRUE), ]

# resample to 22.05 kHz
labeled_calls <- resample_est_waves(labeled_calls, samp.rate = 22.05, pb = FALSE)
df <- as.data.frame(table(labeled_calls$Call.Type))

names(df) <- c("Call_type", "Sample_size")

kb <-kable(df, row.names = FALSE) 
  
kb <- kable_styling(kb, bootstrap_options = c("striped", "hover", "condensed", "responsive"))
  
print(kb)
Call_type Sample_size
A 50
B 50
C 50
D 50
E 50
I 50
K 50

Measure acoustic parameters

# function to get acoustic parameters
acous_param_fun <- function(x) {
  
    # measure acoustics parameters
    sp <- specan(x, pb = FALSE, harmonicity = FALSE, threshold = 5)
    
    # measure cepstral coeffs
    cc <- mfcc_stats(x, pb = FALSE)[, -c(1, 2)]
  
    # spectrographic cross correlation
    spxc <- xcorr(x, pb = FALSE)
    
    # MDS
    spxc <- cmdscale(1 - spxc, k = 10, list. = TRUE)
  
    spxc_mds <- spxc$points
    
    colnames(spxc_mds) <- paste0("spxcMDS", 1:ncol(spxc_mds))
     
    # mfcc cross correlation
    mfccxc <- xcorr(x, pb = FALSE, type = "mfcc")
    
    # MDS
    mfccxc <- cmdscale(1 - mfccxc, k = 10, list. = TRUE)
  
    mfxc_mds <- mfccxc$points
    
    colnames(mfxc_mds) <- paste0("mfxcMDS", 1:ncol(mfxc_mds))
  
    # dynamic time warping
    fre_cntrs <- freq_ts(x, img = FALSE, pb = FALSE, threshold.time = 1)
  
    # replace NAs with mean values for each column
    if (anyNA(fre_cntrs))
    for(i in 3:ncol(fre_cntrs))
      fre_cntrs[is.na(fre_cntrs[,i]), i] <- mean(fre_cntrs[, i], na.rm = TRUE)
      
    dtw.dists <- freq_DTW(x, img = FALSE, pb = FALSE, threshold.time = 1, ts.df = fre_cntrs)
  
    # MDS
    dtw_mds <- cmdscale(dtw.dists, k = 10, list. = TRUE)$points
    
    # fix colnames
    colnames(dtw_mds) <- paste0("dtwMDS", 1:ncol(dtw_mds))
  
    # put parameters in a list
    all_params <- data.frame(sp, cc, dtw_mds, spxc_mds, mfxc_mds)
    
    # scale for random forest
    all_params[,-c(1, 2)] <- scale(all_params[,-c(1, 2)])
    
    # add individual and experiment
    all_params$call.type <- x$Call.Type[1]

    return(all_params)
    } 

labeled_calls_l <- split(labeled_calls, labeled_calls$Call.Type)

# loop to measure acoustic parameters on each group
acous_param_l <- lapply(labeled_calls_l, FUN = function(x) try(acous_param_fun(x), silent = TRUE))


acous_param <- do.call(rbind, acous_param_l)

names(acous_param)[(sapply(acous_param, anyNA))]

# save as RDS
saveRDS(acous_param, "./data/processed/acoustic_parameters_test_for_best_parameter_combination.RDS")

Run random forest with different acoustic parameter subsets

# read acoustic parameter data
acous_param <- readRDS("./data/processed/acoustic_parameters_test_for_best_parameter_combination.RDS")

# exclude groups in which the solo flight audio is uncertain for some individuals 

# which parameters would be measured
param_categories <- c("mfcc",  "spxc", "mfxc", "dtw", "sp")

# get actual parameter names
col_names <- names(acous_param)
col_names <- col_names[!col_names %in% c("sound.files", "selec","call.type")]

# measurement category for each measruremnt
clss_col_names <- col_names

# name it by measurement function 
for (i in  1:length(param_categories))
 clss_col_names[if (param_categories[i] != "sp") 
                            grepl(c("cc", "spxc", "mfxc", "dtw")[i], col_names) else
                            !grepl("cc|xc|dtw|indiv|sound.files|selec", col_names)] <- param_categories[i]

# cbind(col_names, clss_col_names)


# all posible combinations
combs4 <- combn(param_categories, 4)
combs3 <- combn(param_categories, 3)
combs2 <- combn(param_categories, 2)

# ake it a list
combs <- c(as.data.frame(combs4), as.data.frame(combs3), as.data.frame(combs2))

# add all 4 parameters as an option to the list
combs <- c(append(combs, list(param_categories)), param_categories)

combs <- sample(combs)

# loop
out <- pblapply(1:length(combs), cl = 3, function(i) {
    
    # subset columns to keep only those from selected acoustic measurements 
    X <- acous_param[ , c(col_names[clss_col_names %in% c(combs[[i]])], "call.type")]
   
  # make it a factor for ranger to work 
  X$call.type <- as.factor(X$call.type)
  
  # run RF model spectral and cepstral parameters
  rfm <- ranger(
      call.type ~ .,
      data = X,
      num.trees = num.trees,
      importance = "impurity")
    
  res <- data.frame(parameters = paste(combs[[i]], collapse = "-"), error = rfm$prediction.error)  
  return(res)
})

rf_perfomance_acous_params <- do.call(rbind, out)

rf_perfomance_acous_params <- rf_perfomance_acous_params[order(rf_perfomance_acous_params$error, decreasing = FALSE), ]

# save as RDS
saveRDS(rf_perfomance_acous_params, "./data/processed/random_forest_performance by_acoustic_parameters_combinations.RDS")

Out-of-bar error by measuring method combination

rf_perfomance_acous_params <- readRDS("./data/processed/random_forest_performance by_acoustic_parameters_combinations.RDS")

kb <-kable(rf_perfomance_acous_params, row.names = FALSE) 
  
kb <- kable_styling(kb, bootstrap_options = c("striped", "hover", "condensed", "responsive"))
  
print(kb)
parameters error
sp 0.0457143
mfxc-sp 0.0542857
dtw-sp 0.0542857
spxc-sp 0.0571429
mfxc-dtw-sp 0.0628571
spxc-mfxc-dtw-sp 0.0657143
spxc-dtw-sp 0.0657143
spxc-mfxc-sp 0.0714286
mfcc-sp 0.1114286
mfcc-mfxc-dtw-sp 0.1142857
mfcc-dtw-sp 0.1171429
mfcc-spxc-sp 0.1200000
mfcc-spxc-mfxc-sp 0.1228571
mfcc-spxc-dtw-sp 0.1228571
mfcc-mfxc-sp 0.1314286
mfcc-spxc-mfxc-dtw-sp 0.1400000
dtw 0.6942857
spxc-dtw 0.7714286
mfxc-dtw 0.7885714
spxc-mfxc-dtw 0.8057143
spxc 0.9657143
mfcc-dtw 0.9685714
mfcc-spxc-dtw 0.9685714
mfcc-mfxc-dtw 0.9714286
spxc-mfxc 0.9714286
mfxc 0.9714286
mfcc-spxc-mfxc-dtw 0.9771429
mfcc-spxc-mfxc 0.9971429
mfcc 1.0000000
mfcc-mfxc 1.0000000
mfcc-spxc 1.0000000

Session information

## R version 4.0.5 (2021-03-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=pt_BR.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=es_CR.UTF-8        LC_COLLATE=pt_BR.UTF-8    
##  [5] LC_MONETARY=es_CR.UTF-8    LC_MESSAGES=pt_BR.UTF-8   
##  [7] LC_PAPER=es_CR.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] soundgen_2.1.0     shinyBS_0.61       Sim.DiffProc_4.8   rlang_0.4.11      
##  [5] kableExtra_1.3.4   viridis_0.6.1      viridisLite_0.4.0  pbapply_1.4-3     
##  [9] e1071_1.7-7        caret_6.0-88       ggplot2_3.3.4      lattice_0.20-44   
## [13] ranger_0.12.1      readxl_1.3.1       warbleR_1.1.27     NatureSounds_1.0.4
## [17] knitr_1.33         seewave_2.1.6      tuneR_1.3.3        devtools_2.4.2    
## [21] usethis_2.0.1      lubridate_1.7.10   data.table_1.14.0 
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_2.0-2     rjson_0.2.20         ellipsis_0.3.2      
##   [4] class_7.3-19         rprojroot_2.0.2      fs_1.5.0            
##   [7] rstudioapi_0.13      proxy_0.4-26         Deriv_4.1.3         
##  [10] remotes_2.4.0        prodlim_2019.11.13   fansi_0.5.0         
##  [13] xml2_1.3.2           codetools_0.2-18     splines_4.0.5       
##  [16] cachem_1.0.5         pkgload_1.2.1        jsonlite_1.7.2      
##  [19] pROC_1.17.0.1        shiny_1.6.0          compiler_4.0.5      
##  [22] httr_1.4.2           assertthat_0.2.1     Matrix_1.3-3        
##  [25] fastmap_1.1.0        cli_2.5.0            later_1.2.0         
##  [28] htmltools_0.5.1.1    prettyunits_1.1.1    tools_4.0.5         
##  [31] gtable_0.3.0         glue_1.4.2           reshape2_1.4.4      
##  [34] dplyr_1.0.7          Rcpp_1.0.6           cellranger_1.1.0    
##  [37] jquerylib_0.1.4      vctrs_0.3.8          svglite_2.0.0       
##  [40] nlme_3.1-152         iterators_1.0.13     timeDate_3043.102   
##  [43] gower_0.2.2          xfun_0.23            stringr_1.4.0       
##  [46] ps_1.6.0             testthat_3.0.3       rvest_1.0.0         
##  [49] mime_0.10            lifecycle_1.0.0      MASS_7.3-54         
##  [52] scales_1.1.1         ipred_0.9-11         promises_1.2.0.1    
##  [55] parallel_4.0.5       yaml_2.2.1           memoise_2.0.0       
##  [58] gridExtra_2.3        sass_0.4.0           rpart_4.1-15        
##  [61] stringi_1.6.2        highr_0.9            desc_1.3.0          
##  [64] foreach_1.5.1        pkgbuild_1.2.0       lava_1.6.9          
##  [67] pkgconfig_2.0.3      systemfonts_1.0.2    dtw_1.22-3          
##  [70] bitops_1.0-7         evaluate_0.14        purrr_0.3.4         
##  [73] recipes_0.1.16       processx_3.5.2       tidyselect_1.1.1    
##  [76] plyr_1.8.6           magrittr_2.0.1       R6_2.5.0            
##  [79] fftw_1.0-6           generics_0.1.0       DBI_1.1.1           
##  [82] pillar_1.6.1         withr_2.4.2          survival_3.2-11     
##  [85] RCurl_1.98-1.3       nnet_7.3-16          tibble_3.1.2        
##  [88] crayon_1.4.1         utf8_1.2.1           rmarkdown_2.8       
##  [91] grid_4.0.5           callr_3.7.0          ModelMetrics_1.2.2.2
##  [94] digest_0.6.27        webshot_0.5.2        xtable_1.8-4        
##  [97] httpuv_1.6.1         signal_0.7-7         stats4_4.0.5        
## [100] munsell_0.5.0        bslib_0.2.5.1        sessioninfo_1.1.1