Load packages

## add 'developer/' to packages to be installed from github
x <- c("pbapply", "viridis", "knitr", "remotes", "maRce10/warbleR", "maRce10/ohun", "DT", "ranger")

aa <- lapply(x, function(y) {
  
  # get pakage name
  pkg <- strsplit(y, "/")[[1]]
  pkg <- pkg[length(pkg)]
  
  # check if installed, if not then install 
  if (!pkg %in% installed.packages()[,"Package"])  {

      if (grepl("/", y))  remotes::install_github(y, force = TRUE) else
    install.packages(y) 
    }

  # load package
  try(require(pkg, character.only = T), silent = T)
})

Set functions and global parameters

cols <- viridis(10, alpha = 0.6)

warbleR_options(wav.path = "./data/raw")

Bat calls data

File descriptions

manual_detections_Raven_bats4testing.csv: Manual detection of all visible bat vocalizations (Raven)

manual_detections_bats4testing.csv: Manual detection of all high quality bat vocalizations (Avisoft)

automated_detections_bats4testing.csv: Automated detection with Eric’s function

Read data

reference_all <- read.csv("./data/raw/manual_detections_Raven_bats4testing.csv")

reference_best <- read.csv("./data/raw/manual_detections_bats4testing.csv")

ericks_detection <- read.csv("./data/raw/automated_detections_bats4testing.csv")

Check references

Exhaustive detection (all signals)

# all
check_sels(reference_all, pb = FALSE)

## all selections are OK

feature_reference(reference_all, path = "./data/raw")

##              min        mean        max       
## sel.duration 0.00046883 0.003376288 0.03785942
## gap.duration 0.01268074 0.08107097  0.938257  
## duty.cycle   0.03712569 0.006502513 0.1009111

High quality subset

check_sels(reference_best, pb = FALSE)

## all selections are OK

feature_reference(reference_best, path = "./data/raw")

##              min        mean        max     
## sel.duration 0.000208   0.003602071 0.03638 
## gap.duration 0.012139   0.1011788   0.937004
## duty.cycle   0.03020532 0.004873684 0.08718

Detection from Erick’s algorithm

check_sels(ericks_detection, pb = FALSE)

## all selections are OK but some have very few samples (less than 20, potentially problematic for some analyses) 
## Check 'min.n.samples' column

feature_reference(ericks_detection, path = "./data/raw")

##              min          mean        max      
## sel.duration 2.604167e-06 0.005757727 0.2316797
## gap.duration 0.001611979  0.07480331  0.936732 
## duty.cycle   0.05292565   0.006045013 0.251075

Compare references (high quality vs exhaustive)

# all vs best
diagnose_detection(reference = reference_all, detection = reference_best)

##   true.positives false.positives false.negatives split.positives
## 1            139               1              59               1
##   merged.positives overlap.to.true.positives sensitivity specificity
## 1                0                 0.7612143   0.7020202   0.9928571

High quality reference has ~70% of all signals

Diagnose Erick’s detection

Compare to exhaustive detection:

diagnose_detection(reference = reference_all, detection = ericks_detection)

##   true.positives false.positives false.negatives split.positives
## 1             98              74             100               4
##   merged.positives overlap.to.true.positives sensitivity specificity
## 1                2                  0.626583   0.4949495   0.5697674

Erick’s algorithm detected ~50% of all signals
44% of the detected signals were noise (no bat calls)

Compare to high quality subset:

diagnose_detection(reference = reference_best, detection = ericks_detection)

##   true.positives false.positives false.negatives split.positives
## 1            100              73              41               3
##   merged.positives overlap.to.true.positives sensitivity specificity
## 1                2                 0.6925425   0.7092199   0.5780347

Erick’s algorithm detected ~70% of high quality signals
43% of the detected signals were noise (no bat calls)

Run ohun energy detector

ohun_opt_detec <- optimize_energy_detector(reference = reference_best, threshold = c(0.5, 1:4), smooth = 1:4, hold.time = 1:8, min.duration = c(0.05, 0.1, 0.2, 0.3, 0.4, 1, 2), max.duration = c(30, 40, 50), parallel = 10, path = "./data/raw", bp = c(5, 100))

saveRDS(ohun_opt_detec, "./data/processed/high_quality_reference_optimization.RDS")


ohun_opt_detec_all <- optimize_energy_detector(reference = reference_all, threshold = c(0.5, 1:4), smooth = 1:4, hold.time = 1:8, min.duration = c(0.05, 0.1, 0.2, 0.3, 0.4, 1, 2), max.duration = c(30, 40, 50), parallel = 10, path = "./data/raw", bp = c(5, 100))

saveRDS(ohun_opt_detec_all, "./data/processed/all_calls_reference_optimization.RDS")


ohun_opt_detec_ericks <- optimize_energy_detector(reference = ericks_detection, threshold = c(0.5, 1:4), smooth = 1:4, hold.time = 1:8, min.duration = c(0.05, 0.1, 0.2, 0.3, 0.4, 1, 2), max.duration = c(30, 40, 50), parallel = 10, path = "./data/raw", bp = c(5, 100))

saveRDS(ohun_opt_detec_ericks, "./data/processed/ericks_reference_optimization.RDS")

ohun detection performance

Compare to exhaustive reference (only the highest sensitivity iteration is shown)

ohun_opt_detec_all <- readRDS("./data/processed/all_calls_reference_optimization.RDS")

ohun_opt_detec_all <- ohun_opt_detec_all[which.max(ohun_opt_detec_all$sensitivity), c(1:11, 17:18)]

ohun_opt_detec_all

##      threshold smooth hold.time min.duration max.duration thinning
## 1025         1      1         1          0.1           50        1
##      true.positives false.positives false.negatives split.positives
## 1025            138              43              60               1
##      merged.positives sensitivity specificity
## 1025                3   0.6969697   0.7624309

# oa_DT <- datatable(ohun_opt_detec_all, editable = list(
#   target = 'row'
# ), rownames = FALSE, style = "bootstrap",  filter = 'top', options = list(
#   pageLength = 100, autoWidth = TRUE, dom = 'ft'
# ), autoHideNavigation = TRUE, escape = FALSE)
# 
# formatRound(table = oa_DT, columns = sapply(ohun_opt_detec_all, is.numeric), 3)

Optimized ohun detection got ~70% of all signals
24% of the detected signals were noise (no bat calls)

Compare to high quality reference (only the highest sensitivity iteration is shown)

ohun_opt_detec <- readRDS("./data/processed/high_quality_reference_optimization.RDS")

ohun_opt_detec <- ohun_opt_detec[which.max(ohun_opt_detec$sensitivity), c(1:11, 17:18)]

ohun_opt_detec

##   threshold smooth hold.time min.duration max.duration thinning true.positives
## 1         2      1         1          0.1           40        1            105
##   false.positives false.negatives split.positives merged.positives sensitivity
## 1              48              36               0                0   0.7446809
##   specificity
## 1   0.6862745

# oa_DT <- datatable(ohun_opt_detec, editable = list(
#   target = 'row'
# ), rownames = FALSE, style = "bootstrap",  filter = 'top', options = list(
#   pageLength = 100, autoWidth = TRUE, dom = 'ft'
# ), autoHideNavigation = TRUE, escape = FALSE)
# 
# formatRound(table = oa_DT, columns = sapply(ohun_opt_detec, is.numeric), 3)

Optimized ohun detection got ~74% of high quality signals
32% of the detected signals were noise (no bat calls)

Compare to Erick’s detection (only the highest sensitivity iteration is shown)

ohun_opt_detec_ericks <- readRDS("./data/processed/ericks_reference_optimization.RDS")

ohun_opt_detec_ericks <- ohun_opt_detec_ericks[which.max(ohun_opt_detec_ericks$sensitivity), c(1:11, 17:18)]

ohun_opt_detec_ericks

##     threshold smooth hold.time min.duration max.duration thinning
## 514         2      1         1          0.1           40        1
##     true.positives false.positives false.negatives split.positives
## 514            115              51              45               1
##     merged.positives sensitivity specificity
## 514               10     0.71875   0.6927711

# oa_DT <- datatable(ohun_opt_detec_ericks, editable = list(
#   target = 'row'
# ), rownames = FALSE, style = "bootstrap",  filter = 'top', options = list(
#   pageLength = 100, autoWidth = TRUE, dom = 'ft'
# ), autoHideNavigation = TRUE, escape = FALSE)
# 
# formatRound(table = oa_DT, columns = sapply(ohun_opt_detec_ericks, is.numeric), 3)

Optimized ohun detection got ~71% of signals detected by Erick’s algorithm
31% of the detected signals were noise (no bat calls)

Filter using Random Forest

Before random forest filtering

# best parameters from all signal optimization
ohun_detec <- energy_detector(threshold = 1, smooth = 1, hold.time = 1, min.duration = 0.1, max.duration = 50, parallel = 10, path = "./data/raw", bp = c(5, 100), pb = FALSE)


diagnose_detection(reference_all, ohun_detec)

##   true.positives false.positives false.negatives split.positives
## 1            138              43              60               1
##   merged.positives overlap.to.true.positives sensitivity specificity
## 1                3                 0.9351629   0.6969697   0.7624309

After random forest filtering

ohun_detec <- label_detection(reference = reference_all, detection = ohun_detec, pb = FALSE)

# measure spectrographic parameters
spectral_parameters <- spectro_analysis(ohun_detec, bp = c(5, 100), fast = TRUE, ovlp = 70, parallel = 10, pb = FALSE)

spectral_parameters$SNR <- sig2noise(ohun_detec, mar = 0.01, pb = FALSE)$SNR

spectral_parameters$class <- ohun_detec$detection.class


spectral_parameters$class[spectral_parameters$class != "false.positive"] <- "true.positive"

# make it a factor for ranger to work 
spectral_parameters$class <- as.factor(spectral_parameters$class)
  

# run RF model on spectral parameters
  rfm <-
    ranger(
      class ~ .,
      data = spectral_parameters[, !names(spectral_parameters) %in% c("sound.files", "selec")],
      num.trees = 10000,
      importance = "impurity",
      seed = 10
    )

  # table(lab_detec$detection.class)
ohun_detec$pred.class <- rfm$predictions  
  
rf_ohun_detec <- ohun_detec[ohun_detec$pred.class == "true.positive", ]

diagnose_detection(reference = reference_all, detection = rf_ohun_detec, pb = FALSE)

##   true.positives false.positives false.negatives split.positives
## 1            134               6              64               1
##   merged.positives overlap.to.true.positives sensitivity specificity
## 1                2                 0.9347908   0.6767677   0.9571429

Random forest filtered ohun detection got ~68% of all signals
~4% of the detected signals were noise (no bat calls)

R session information

## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
## 
## locale:
##  [1] LC_CTYPE=pt_BR.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=es_CR.UTF-8        LC_COLLATE=pt_BR.UTF-8    
##  [5] LC_MONETARY=es_CR.UTF-8    LC_MESSAGES=pt_BR.UTF-8   
##  [7] LC_PAPER=es_CR.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ranger_0.13.1      DT_0.20            ohun_0.1.0         warbleR_1.1.27    
##  [5] NatureSounds_1.0.4 seewave_2.1.8      tuneR_1.3.3.1      remotes_2.4.2     
##  [9] knitr_1.36         viridis_0.6.2      viridisLite_0.4.0  pbapply_1.5-0     
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.1.1  xfun_0.28         bslib_0.2.5.1     purrr_0.3.4      
##  [5] lattice_0.20-44   colorspace_2.0-2  vctrs_0.3.8       generics_0.1.0   
##  [9] htmltools_0.5.2   yaml_2.2.1        utf8_1.2.2        rlang_0.4.12     
## [13] jquerylib_0.1.4   pillar_1.6.4      glue_1.5.1        DBI_1.1.1        
## [17] lifecycle_1.0.1   stringr_1.4.0     munsell_0.5.0     gtable_0.3.0     
## [21] htmlwidgets_1.5.4 evaluate_0.14     fastmap_1.1.0     fftw_1.0-6.1     
## [25] parallel_4.1.0    fansi_0.5.0       Rcpp_1.0.7        scales_1.1.1     
## [29] jsonlite_1.7.2    soundgen_2.1.0    gridExtra_2.3     rjson_0.2.20     
## [33] ggplot2_3.3.5     digest_0.6.29     stringi_1.7.6     dplyr_1.0.7      
## [37] dtw_1.22-3        grid_4.1.0        tools_4.1.0       bitops_1.0-7     
## [41] magrittr_2.0.1    sass_0.4.0        proxy_0.4-26      RCurl_1.98-1.5   
## [45] tibble_3.1.6      crayon_1.4.2      pkgconfig_2.0.3   Matrix_1.3-4     
## [49] ellipsis_0.3.2    MASS_7.3-54       shinyBS_0.61      assertthat_0.2.1 
## [53] rmarkdown_2.9     R6_2.5.1          signal_0.7-7      compiler_4.1.0

Ohun

Study cases

Marcelo Araya-Salas & Gloriana Chaverri

26-01-2022