split_sels <- read.csv("./data/processed/split_manual_annotations_5min.csv")
sels_22 <- split_sels[split_sels$sound.files == "T0000011-1.wav", ]
oed <- optimize_energy_detector(reference = sels_22, threshold = seq(0.015, 0.025, 0.005), min.duration = seq(0.001, 0.003, 0.001), ssmooth = c(5, 10, 15), hold.time = seq(0.08, 0.1, 0.01), path = .Options$warbleR$path, thinning = c(0.5, 1), parallel = 15, bp = c(20, 30), max.duration = 3)
# full_spectrograms(env, flim = c(20, 30), rows = 5, dest.path = "./data/processed/test_sound_files", suffix = "test_autodetec_best_parameters", width = 15, height = 8.5, fast.spec = TRUE)
# View(optim_ad[optim_ad$sensitivity == 1 & optim_ad$specificity == 1, ])
saveRDS(oed, "./data/processed/optimization_22kHz.RDS")
# test best
# ed <- energy_detector(files = unique(sels_22$sound.files), threshold = 0.015, min.duration = 0.002, ssmooth = 15, hold.time = 0.1, path = .Options$warbleR$path, thinning = 0.5, parallel = 12, bp = c(20, 30))
#
# diagnose_detection(sels_22, ed, parallel = 12)
# test best
# env_best <- autodetec(output = "list", pb = FALSE, para = 1, flist = unique(sels_22$sound.files), mindur = 0.001, threshold = 2, ssmooth = 6000, hold.time = 0.09, bp = c(20, 30), power = 1)
#
# diagnose_detection(sels_22, env_best$selection.table)
#
# env_best2 <- autodetec(output = "list", X = env, pb = FALSE, para = 1, flist = unique(sels_22$sound.files), mindur = 0.001, threshold = 2, ssmooth = 6000, hold.time = 0.09, bp = c(20, 30), power = 1)
#
# diagnose_detection(sels_22, env_best2$selection.table)
optim_ad <- readRDS("./data/processed/optimization_22kHz.RDS")
# print dynamic table
oa_DT <- datatable(optim_ad, editable = list(
target = 'row'
), rownames = FALSE, style = "bootstrap", filter = 'top', options = list(
pageLength = 100, autoWidth = TRUE, dom = 'ft'
), autoHideNavigation = TRUE, escape = FALSE)
formatRound(table = oa_DT, columns = sapply(optim_ad, is.numeric), 3)
split_sels <- read.csv("./data/processed/split_manual_annotations_5min.csv")
sels_22 <- split_sels[split_sels$peak.freq > 20 & split_sels$peak.freq < 30, ]
tab <- table(sels_22$sound.files)
sels_22 <- sels_22[sels_22$sound.files %in% names(tab)[tab > 20], ]
# best tuning parameters
oed4 <- optimize_energy_detector(reference = sels_22, files = unique(sels_22$sound.files), threshold = 0.02, min.duration = 0.002, ssmooth = c(13, 15, 17), hold.time = c(0.05, 0.08, 0.09, 0.1), path = .Options$warbleR$path, thinning = 0.5, parallel = 10, bp = c(20, 30), max.duration = c(3, 2.5, 5, 10, 10000), by.sound.file = TRUE, previous.output = oed4)
saveRDS(oed4, "./data/processed/optimization_22kHz_4_cuts.RDS")
optim_ad <- readRDS("./data/processed/optimization_22kHz_4_cuts.RDS")
# optim_ad <- optim_ad[optim_ad$sound.files == "T0000011-1.wav", ]
optim_ad <- summarize_diagnostic(optim_ad,time.diagnostics = TRUE)
oa_DT <- datatable(optim_ad, editable = list(
target = 'row'
), rownames = FALSE, style = "bootstrap", filter = 'top', options = list(
pageLength = 100, autoWidth = TRUE, dom = 'ft'
), autoHideNavigation = TRUE, escape = FALSE)
formatRound(table = oa_DT, columns = sapply(optim_ad, is.numeric), 3)
split_sels <- read.csv("./data/processed/split_manual_annotations_5min.csv")
sels_22 <- split_sels[split_sels$peak.freq > 20 & split_sels$peak.freq < 30, ]
tab <- table(sels_22$sound.files)
sels_22 <- sels_22[sels_22$sound.files %in% names(tab)[tab > 20], ]
# summary(sels_22$peak.freq)
length(unique(sels_22$sound.files))
# best tuning parameters
ed4 <- energy_detector(files = unique(sels_22$sound.files), threshold = 0.02, min.duration = 0.002, ssmooth = 17, hold.time = 0.025, path = .Options$warbleR$path, thinning = 0.5, parallel = 8, bp = c(20, 30), max.duration = 3)
# fed4
saveRDS(ed4, "./data/processed/optimal_detection_22kHz_4_cuts.RDS")
ed4 <- readRDS("./data/processed/optimal_detection_22kHz_4_cuts.RDS")
split_sels <- read.csv("./data/processed/split_manual_annotations_5min.csv")
sels_22 <- split_sels[split_sels$peak.freq > 20 & split_sels$peak.freq < 30, ]
tab <- table(sels_22$sound.files)
sels_22 <- sels_22[sels_22$sound.files %in% names(tab)[tab > 20], ]
led4 <- label_detection(reference = sels_22, detection = ed4, parallel = 10, pb = FALSE)
led4 <- filter_detection(led4, parallel = 10, pb = FALSE)
attributes(ed4)$call
## energy_detector(files = unique(sels_22$sound.files), path = .Options$warbleR$path,
## thinning = 0.5, bp = c(20, 30), ssmooth = 17, threshold = 0.02,
## hold.time = 0.025, min.duration = 0.002, max.duration = 3,
## parallel = 8)
optim_ad_bs <- diagnose_detection(reference = sels_22, detection = led4, by.sound.file = TRUE)
oa_DT <- datatable(optim_ad_bs, editable = list(
target = 'row'
), rownames = FALSE, style = "bootstrap", filter = 'top', options = list(
pageLength = 100, autoWidth = TRUE, dom = 'ft'
), autoHideNavigation = TRUE, escape = FALSE)
formatRound(table = oa_DT, columns = sapply(optim_ad_bs, is.numeric), 3)
optim_ad <- diagnose_detection(reference = sels_22, detection = led4)
oa_DT <- datatable(optim_ad, editable = list(
target = 'row'
), rownames = FALSE, style = "bootstrap", filter = 'top', options = list(
pageLength = 100, autoWidth = TRUE, dom = 'ft'
), autoHideNavigation = TRUE, escape = FALSE)
formatRound(table = oa_DT, columns = sapply(optim_ad, is.numeric), 3)
split_sels <- read.csv("./data/processed/split_manual_annotations_5min.csv")
# best tuning parameters
ed_all <- energy_detector(files = unique(split_sels$sound.files), threshold = 0.02, min.duration = 0.002, ssmooth = 17, hold.time = 0.025, path = .Options$warbleR$path, thinning = 0.5, parallel = 1, bp = c(20, 30), max.duration = 3)
saveRDS(ed_all, "./data/processed/optimal_detection_22kHz_all_cuts.RDS")
ed_all <- readRDS("./data/processed/optimal_detection_22kHz_all_cuts.RDS")
split_sels <- read.csv("./data/processed/split_manual_annotations_5min.csv")
sels_22 <- split_sels[split_sels$peak.freq > 20 & split_sels$peak.freq < 30, ]
label_ed_all <- label_detection(reference = sels_22, detection = ed_all, parallel = 10, pb = FALSE)
filter_ed_all <- filter_detection(label_ed_all, parallel = 10, pb = FALSE)
optim_ad_bs_all <- diagnose_detection(reference = sels_22, detection = filter_ed_all, by.sound.file = TRUE, pb = FALSE)
attributes(ed_all)$call
## energy_detector(files = unique(split_sels$sound.files), path = .Options$warbleR$path,
## thinning = 0.5, bp = c(20, 30), ssmooth = 17, threshold = 0.02,
## hold.time = 0.025, min.duration = 0.002, max.duration = 3,
## parallel = 1)
oa_DT <- datatable(optim_ad_bs_all, editable = list(
target = 'row'
), rownames = FALSE, style = "bootstrap", filter = 'top', options = list(
pageLength = 100, autoWidth = TRUE, dom = 'ft'
), autoHideNavigation = TRUE, escape = FALSE)
formatRound(table = oa_DT, columns = sapply(optim_ad_bs, is.numeric), 3)
optim_ad_all <- diagnose_detection(reference = sels_22, detection = ed_all, pb = FALSE)
oa_DT <- datatable(optim_ad_all, editable = list(
target = 'row'
), rownames = FALSE, style = "bootstrap", filter = 'top', options = list(
pageLength = 100, autoWidth = TRUE, dom = 'ft'
), autoHideNavigation = TRUE, escape = FALSE)
formatRound(table = oa_DT, columns = sapply(optim_ad, is.numeric), 3)
ed_all <- readRDS("./data/processed/optimal_detection_22kHz_all_cuts.RDS")
ed_all <- label_detection(reference = sels_22, detection = ed_all, parallel = 10, pb = FALSE)
ed_all <- filter_detection(ed_all, parallel = 10, pb = FALSE)
# measure spectrographic parameters
spectral_parameters <- spectro_analysis(ed_all, bp = c(20, 30), fast = TRUE, ovlp = 70, parallel = 10)
# mfccs <- mfcc_stats(X = lab_detec, bp = c(1, 3.5), ovlp = 70, parallel = 10)
# na_rows <- unique(unlist(sapply(mfccs, function(x) which(is.na(x)))))
# lab_detec <- lab_detec[-na_rows, ]
# spectral_parameters <- spectral_parameters[-na_rows, ]
# mfccs <- mfccs[-na_rows, ]
spectral_parameters$class <- ed_all$detection.class
# spectral_parameters <- data.frame(spectral_parameters, mfccs[, !names(spectral_parameters) %in% c("sound.files", "selec")])
spectral_parameters$class[spectral_parameters$class != "false.positive"] <- "true.positive"
# make it a factor for ranger to work
spectral_parameters$class <- as.factor(spectral_parameters$class)
# run RF model spectral and cepstral parameters
rfm <-
ranger(
class ~ .,
data = spectral_parameters[, !names(spectral_parameters) %in% c("sound.files", "selec")],
num.trees = 10000,
importance = "impurity",
seed = 10
)
saveRDS(list(rfm = rfm, spectral_parameters = spectral_parameters, detection = ed_all), "./data/processed/data_and_model_random_forest_22kHz_cuts.RDS")
attach(readRDS("./data/processed/data_and_model_random_forest_22kHz_cuts.RDS") )
rfm
## Ranger result
##
## Call:
## ranger(class ~ ., data = spectral_parameters[, !names(spectral_parameters) %in% c("sound.files", "selec")], num.trees = 10000, importance = "impurity", seed = 10)
##
## Type: Classification
## Number of trees: 10000
## Sample size: 23585
## Number of independent variables: 26
## Mtry: 5
## Target node size: 1
## Variable importance mode: impurity
## Splitrule: gini
## OOB prediction error: 0.09 %
Diagnostic after random forest classification:
# table(lab_detec$detection.class)
detection$pred.class <- rfm$predictions
positive_detec <- detection[detection$pred.class == "true.positive", ]
temp_detec <- positive_detec
temp_detec$detection.class <- "true.positive"
split_sels <- read.csv("./data/processed/split_manual_annotations_5min.csv")
sels_22 <- split_sels[split_sels$peak.freq > 20 & split_sels$peak.freq < 30, ]
diag <- diagnose_detection(reference = sels_22, detection = temp_detec, pb = FALSE)
# print dynamic table
oa_DT <- datatable(diag, editable = list(
target = 'row'
), rownames = FALSE, style = "bootstrap", filter = 'top', options = list(
pageLength = 100, autoWidth = TRUE, dom = 'ft'
), autoHideNavigation = TRUE, escape = FALSE)
formatRound(table = oa_DT, columns = sapply(diag, is.numeric), 3)
Black line = 1:1 gray line = model slope
obs_count <- tapply(sels_22$sound.files, sels_22$sound.files, length)
pred_count <- tapply(positive_detec$sound.files, positive_detec$sound.files, length)
sound_files <- unique(split_sels$sound.files)
# add those missing in predicted
names_pred_0 <- setdiff(c(names(obs_count),sound_files), names(pred_count))
pred_0 <- rep(0, length(names_pred_0))
names(pred_0) <- names_pred_0
pred_count <- c(pred_count, pred_0)
# add those missing in observed
names_obs_0 <- setdiff(c(names(pred_count), sound_files), names(obs_count))
obs_0 <- rep(0, length(names_obs_0))
names(obs_0) <- names_obs_0
obs_count <- c(obs_count, obs_0)
# order by name
pred_count <- pred_count[order(names(pred_count))]
obs_count <- obs_count[order(names(obs_count))]
# put both in a single data frame
df <- data.frame(sound.files = names(obs_count), observed = obs_count, predicted = pred_count)
# plot
ggplot(df, aes(x = observed, y = predicted)) +
geom_point(color = viridis(10, alpha = 0.4)[2], size = 3) +
geom_abline(slope = 1, intercept = 0) +
annotate("text", x = 50, y = 150, label = paste("r =", round(cor(obs_count, pred_count), 3)), size = 8) +
geom_smooth(method = "lm", se = FALSE, col = "gray") +
theme_classic(base_size = 20)
## `geom_smooth()` using formula 'y ~ x'
# print best fit lm model
(lm(pred_count ~ obs_count))
##
## Call:
## lm(formula = pred_count ~ obs_count)
##
## Coefficients:
## (Intercept) obs_count
## -0.04722 0.92339
# split
split_obs <- split_sound_files(path = .Options$warbleR$path, X = sels_22, only.sels = TRUE, sgmt.dur = 60, pb = FALSE, parallel = 10)
split_pred <- split_sound_files(path = .Options$warbleR$path, X = as.data.frame(positive_detec), only.sels = TRUE, sgmt.dur = 60, pb = FALSE, parallel = 10)
obs_count <- tapply(split_obs$sound.files, split_obs$sound.files, length)
pred_count <- tapply(split_pred$sound.files, split_pred$sound.files, length)
sound_files <- unique(split_sels$sound.files)
# add those missing in predicted
names_pred_0 <- setdiff(c(names(obs_count),sound_files), names(pred_count))
pred_0 <- rep(0, length(names_pred_0))
names(pred_0) <- names_pred_0
pred_count <- c(pred_count, pred_0)
# add those missing in observed
names_obs_0 <- setdiff(c(names(pred_count), sound_files), names(obs_count))
obs_0 <- rep(0, length(names_obs_0))
names(obs_0) <- names_obs_0
obs_count <- c(obs_count, obs_0)
# order by name
pred_count <- pred_count[order(names(pred_count))]
obs_count <- obs_count[order(names(obs_count))]
# put both in a single data frame
df <- data.frame(sound.files = names(obs_count), observed = obs_count, predicted = pred_count)
df$sum <- df$observed + df$predicted
# plot
ggplot(df[df$sum > 0, ], aes(x = observed, y = predicted)) +
geom_point(color = viridis(10, alpha = 0.4)[2], size = 3) +
geom_abline(slope = 1, intercept = 0) +
annotate("text", x = 20, y = 50, label = paste("r =", round(cor(obs_count, pred_count), 3)), size = 8) +
geom_smooth(method = "lm", se = FALSE, col = "gray") +
theme_classic(base_size = 20)
# print best fit lm model
(lm(pred_count ~ obs_count))
Session information
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
##
## locale:
## [1] LC_CTYPE=pt_BR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=es_CR.UTF-8 LC_COLLATE=pt_BR.UTF-8
## [5] LC_MONETARY=es_CR.UTF-8 LC_MESSAGES=pt_BR.UTF-8
## [7] LC_PAPER=es_CR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ggplot2_3.3.5 ohun_0.1.0 DT_0.18 kableExtra_1.3.4
## [5] ranger_0.13.1 readxl_1.3.1 svMisc_1.1.4 rfigshare_0.3.7
## [9] viridis_0.6.1 viridisLite_0.4.0 Rraven_1.0.13 pbapply_1.4-3
## [13] bioacoustics_0.2.5 warbleR_1.1.27 NatureSounds_1.0.4 knitr_1.33
## [17] seewave_2.1.8 tuneR_1.3.3.1 devtools_2.4.2 usethis_2.0.1
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-152 bitops_1.0-7 fs_1.5.0 webshot_0.5.2
## [5] httr_1.4.2 rprojroot_2.0.2 tools_4.1.0 bslib_0.2.5.1
## [9] utf8_1.2.1 R6_2.5.0 mgcv_1.8-36 DBI_1.1.1
## [13] colorspace_2.0-2 withr_2.4.2 tidyselect_1.1.1 gridExtra_2.3
## [17] prettyunits_1.1.1 processx_3.5.2 moments_0.14 compiler_4.1.0
## [21] rvest_1.0.1 cli_3.0.1 xml2_1.3.2 desc_1.3.0
## [25] labeling_0.4.2 sass_0.4.0 scales_1.1.1 callr_3.7.0
## [29] proxy_0.4-26 dtw_1.22-3 systemfonts_1.0.2 stringr_1.4.0
## [33] digest_0.6.27 svglite_2.0.0 rmarkdown_2.10 pkgconfig_2.0.3
## [37] htmltools_0.5.2 sessioninfo_1.1.1 highr_0.9 fastmap_1.1.0
## [41] htmlwidgets_1.5.3 rlang_0.4.11 rstudioapi_0.13 farver_2.1.0
## [45] jquerylib_0.1.4 generics_0.1.0 jsonlite_1.7.2 crosstalk_1.1.1
## [49] dplyr_1.0.7 RCurl_1.98-1.4 magrittr_2.0.1 Matrix_1.3-4
## [53] Rcpp_1.0.7 munsell_0.5.0 fansi_0.5.0 lifecycle_1.0.0
## [57] stringi_1.7.4 yaml_2.2.1 MASS_7.3-54 RJSONIO_1.3-1.4
## [61] pkgbuild_1.2.0 plyr_1.8.6 grid_4.1.0 promises_1.2.0.1
## [65] crayon_1.4.1 lattice_0.20-44 splines_4.1.0 ps_1.6.0
## [69] pillar_1.6.1 rjson_0.2.20 fftw_1.0-6 pkgload_1.2.1
## [73] XML_3.99-0.6 glue_1.4.2 evaluate_0.14 remotes_2.4.0
## [77] vctrs_0.3.8 httpuv_1.6.1 testthat_3.0.4 cellranger_1.1.0
## [81] gtable_0.3.0 purrr_0.3.4 assertthat_0.2.1 cachem_1.0.5
## [85] xfun_0.25 later_1.2.0 signal_0.7-7 tibble_3.1.2
## [89] memoise_2.0.0 ellipsis_0.3.2