Load packages

## add 'developer/' to packages to be installed from github
x <- c("devtools", "maRce10/warbleR", "bioacoustics", "pbapply", "Rraven", "parallel", "viridis", "ggplot2", "knitr", "kableExtra", "maRce10/ohun", "DT", "formatR", "ranger")

aa <- lapply(x, function(y) {
  
  # get pakage name
  pkg <- strsplit(y, "/")[[1]]
  pkg <- pkg[length(pkg)]
  
  # check if installed, if not then install 
  if (!pkg %in% installed.packages()[,"Package"])  {

      if (grepl("/", y))  devtools::install_github(y, force = TRUE) else
    install.packages(y) 
    }

  # load package
  try(require(pkg, character.only = T), silent = T)
})

opts_knit$set(root.dir = "..")

Downsample

iw <- info_wavs()

table(iw$sample.rate)

# fix_wavs(samp.rate = 10)

Create stratified 5 minute cuts

# path with waves wav_path <- '/Volumes/CHIRINO/Grabaciones SM4 2019/'

wav_path <- "/media/m/Expansion/Projects/Ongoing/Agalychnis_lemur/sound_files/10_kHz_5_min_cuts/converted_sound_files/"

# list of wave files
wavs <- list.files(path = wav_path, pattern = "\\.wav$", ignore.case = TRUE, recursive = TRUE,
    full.names = TRUE)

# data frame with waves and metadata
wav_df <- data.frame(path = dirname(wavs), file = basename(wavs))
wav_df$site <- ifelse(grepl("sukia", wav_df$file, ignore.case = TRUE), "Sukia", "Chimu")
sm_metadata <- seewave::songmeter(wav_df$file)

wav_df <- cbind(wav_df, sm_metadata)

head(wav_df)

str(wav_df)

par(mfrow = c(2, 1))
hist(wav_df$time[wav_df$site == "Sukia"], breaks = 100, col = adjustcolor("green",
    alpha.f = 0.4), main = "Sukia")
hist(wav_df$time[wav_df$site == "Chimu"], breaks = 100, col = adjustcolor("blue",
    alpha.f = 0.4), main = "Chimu")

wav_df <- wav_df[order(wav_df$time), ]

wav_df$day_count <- as.vector(round((wav_df$time - min(wav_df$time))/(3600 * 24),
    0) + 1)

wav_df$week <- cut(x = wav_df$day_count, breaks = seq(1, max(wav_df$day_count) +
    7, by = 7), include.lowest = TRUE)

wav_df <- droplevels(wav_df)

wav_df$period <- 4
wav_df$period[wav_df$hour %in% 17:19] <- 1
wav_df$period[wav_df$hour %in% 20:22] <- 2
wav_df$period[wav_df$hour %in% c(23, 0, 1)] <- 3

# create name combination
wav_df$site.week.period <- paste(wav_df$site, wav_df$week, wav_df$period, sep = "-")

# remove LAguna chimu
wav_df <- wav_df[grep("LAguna Chimu", wav_df$path, invert = TRUE), ]

set.seed(1)
sample_wav_l <- lapply(unique(wav_df$site.week.period), function(x) {

    X <- wav_df[wav_df$site.week.period == x, ]

    if (nrow(X) > 2)
        X <- X[sample(1:nrow(X), size = 2), ]

    return(X)

})

sample_wav_df <- do.call(rbind, sample_wav_l)

write.csv(sample_wav_df, file.path("/Volumes/CHIRINO/Proyecto Lemur/INV_2021/5_min_cut_lemur/",
    "5_min_cut_metadata.csv"), row.names = FALSE)

sample_wav_df$cut.name <- gsub(".wav", "_5min_cut.wav", sample_wav_df$file)

library(pbapply)

out <- pblapply(1:nrow(sample_wav_df), function(x) {

    # read first 5 mins
    wv <- try(readWave(file.path(sample_wav_df$path[x], sample_wav_df$file[x]), from = 0,
        to = 300, units = "seconds"), silent = TRUE)

    if (class(wv) == "Wave")
        # save cut
    writeWave(wv, filename = file.path("/Volumes/CHIRINO/Proyecto Lemur/INV_2021/5_min_cut_lemur/",
        sample_wav_df$cut.name[x]))

    if (class(wv) == "Wave")
        return(NULL) else return(x)
})

Cheking manual selections

# list all selection tables
sl_tbs <- list.files("~/Dropbox/estudiantes/Fabiola/proyecto_lemur/data/processed/selection_tables/all_selec_tables_lemur/")

# remove empty selection table
sl_tbs <- sl_tbs[sl_tbs != "LAGCHIMU_20200919_201500.Table.2.selections.txt"]

# import all selections
sls <- imp_raven(path = "~/Dropbox/estudiantes/Fabiola/proyecto_lemur/data/processed/selection_tables/all_selec_tables_lemur/",
    warbler.format = TRUE, all.data = TRUE, files = sl_tbs)

# relabel selec column
sls$selec <- 1:nrow(sls)

# check duration
sls$duration <- sls$end - sls$start
unique(sls$sound.files[sls$duration > 1])

# remove temporarily but must be fixed on the original individual selection
# tables
sls <- sls[sls$duration < 1, ]

# number of sound files and selection files should be the same
length(unique(sls$sound.files)) == length(unique(sls$selec.file))

# find files with more than 1 selection tab <-
# table(sls$sound.files[!duplicated(sls$selec.file)]) tab[tab > 1]
# unique(sls$selec.file[sls$sound.files ==
# 'LAGCHIMU_20190709_200000_5min_cut.wav'])

# check selections
cs <- check_sels(X = sls, pb = FALSE)
table(cs$check.res[cs$check.res != "OK"])

# those with errors were empty selections (single points)
sls <- sls[cs$check.res == "OK", ]

# check for duplicates (if any sound files was anottated more than once)
sls <- overlapping_sels(sls, indx.row = TRUE, pb = FALSE)
sls <- sls[!duplicated(sls$indx.row, incomparables = NA), ]

# should be 4 in 'SUKIA_20200207_220000_5min_cut.wav'
table(sls$sound.files[!is.na(sls$indx.row)])

# export raven multiple sound file selection table
exp_raven(sls, path = "./data/processed/selection_tables/", sound.file.path = .Options$warbleR$path,
    file.name = "temporary_doublechecking_all_selections")


# fix path for Fabis laptop
fix_path(path = "./data/processed/selection_tables/", new.begin.path = "/Users/fabiolachirino/Dropbox/Fabiola/proyecto_lemur/data/raw/sound_files/10_kHz_5_min_cuts/",
    sound.file.col = "Begin File")


# create sound file labels
sf.abbr <- gsub("0000_5min_cut.wav", "", sls$sound.files)
sf.abbr <- gsub("SUKIA_20", "S", sf.abbr)
sls$sf.abbr <- gsub("LAGCHIMU_20", "L", sf.abbr)

traing_dat <- imp_raven(path = "./data/processed/selection_tables/", files = "temporary_doublechecking_all_selections.txt")

train_files <- unique(traing_dat$`Begin File`)

fc <- file.copy(from = file.path("/media/m/Expansion/Projects/Ongoing/Agalychnis_lemur/sound_files/10_kHz_5_min_cuts/",
    train_files), to = file.path("/media/m/Expansion/Projects/Ongoing/Agalychnis_lemur/sound_files/training_sound_files/",
    train_files))

sls$tags <- "lemur"
exp_raven(sls[, c("sound.files", "selec", "start", "end", "bottom.freq", "top.freq",
    "tags")], path = "/media/m/Expansion/Projects/Ongoing/Agalychnis_lemur/sound_files/training_sound_files/",
    sound.file.path = .Options$warbleR$path, file.name = "annotations_raven_format")

fix_path(path = "/media/m/Expansion/Projects/Ongoing/Agalychnis_lemur/sound_files/training_sound_files/",
    new.begin.path = "", sound.file.col = "Begin File")
fix_path(path = "/media/m/Expansion/Projects/Ongoing/Agalychnis_lemur/sound_files/training_sound_files/",
    new.begin.path = "", sound.file.col = "Begin Path")

Create catalogs

# create catalogs
catalog(X = sls, flim = c(1, 3.5), nrow = 12, ncol = 15, ovlp = 70, height = 15,
    width = 23, same.time.scale = TRUE, mar = 0.005, wl = 512, gr = FALSE, spec.mar = 0.4,
    lab.mar = 0.8, rm.axes = TRUE, by.row = TRUE, box = TRUE, labels = c("sf.abbr",
        "selec"), fast.spec = TRUE, pal = viridis, parallel = 10)

# move images to figures folder
move_imgs(from = .Options$warbleR$path, to = "./figures", overwrite = TRUE)


# catalog for paper
catalog(X = sls[1:180, ], flim = c(1, 3.5), nrow = 12, ncol = 15, ovlp = 90, height = 15,
    width = 23, same.time.scale = TRUE, mar = 0.1, wl = 512, gr = FALSE, spec.mar = 0,
    lab.mar = 1e-04, rm.axes = TRUE, by.row = TRUE, box = FALSE, fast.spec = FALSE,
    pal = viridis, parallel = 1, img.suffix = "no_margins", collevels = seq(-115,
        0, 5))

# move images to figures folder
move_imgs(from = .Options$warbleR$path, to = "./figures", overwrite = TRUE)

Measure frequency range parameters

To figure out variation in signal structure aiming to define templates

# measure spectrographic parameters
spectral_parameters <- spectro_analysis(sls, bp = "frange", fast = TRUE, ovlp = 70,
    pb = FALSE)

saveRDS(spectral_parameters, "./data/processed/acoustic_parameters_and_selections.RDS")

spectral_parameters <- readRDS("./data/processed/acoustic_parameters_and_selections.RDS")

sls <- imp_raven(path = "./data/processed/selection_tables/", files = "temporary_doublechecking_all_selections.txt",
    pb = FALSE, warbler.format = TRUE)

# measure signal_2_noise ratio sls <- signal_2_noise(sls, mar = 0.1, before =
# TRUE, pb = FALSE, bp = 'frange')
pca <- prcomp(spectral_parameters[, 2:27], scale. = TRUE)


# extrac SNR and peak freq
sls$meanpeakf <- spectral_parameters$meanpeakf
sls$duration <- spectral_parameters$duration
sls$sp.ent <- spectral_parameters$sp.ent
sls$PC1 <- pca$x[, 1]


# stck_parameters$PC1
stck_parameters <- stack(sls[, c("duration", "meanpeakf", "sp.ent", "PC1")])

ggplot(stck_parameters, aes(x = values)) + geom_histogram(fill = viridis(10, alpha = 0.9)[2]) +
    facet_wrap(~ind, scales = "free")

spectral_parameters <- spectral_parameters[!is.infinite(spectral_parameters$SNR),
    ]

freq_variation <- sapply(c("duration", "meanpeakf", "sp.ent", "PC1"), function(x) data.frame(mean = mean(sls[,
    x]), min = min(sls[, x]), max = max(sls[, x]), sd = sd(sls[, x]), CI.2.5 = quantile(sls[,
    x], 0.025), CI.97.5 = quantile(sls[, x], 0.975)))

freq_variation <- do.call(rbind, lapply(data.frame(freq_variation), function(x) round(unlist(x),
    3)))

# print table as kable
kb <- kable(freq_variation, row.names = TRUE, digits = 3)

kb <- kable_styling(kb, bootstrap_options = c("striped", "hover", "condensed", "responsive"))

print(kb)

	mean	min	max	sd	CI.2.5	CI.97.5
duration	0.067	0.012	0.972	0.037	0.029	0.139
meanpeakf	2.337	1.199	2.956	0.109	2.145	2.517
sp.ent	0.873	0.623	0.993	0.055	0.756	0.960
PC1	0.000	-21.798	11.555	3.066	-5.660	4.920

Template-based detection

# find template close to mean peak freq
mean_peak_indx <- which.min(abs(sls$meanpeakf - freq_variation[rownames(freq_variation) ==
    "meanpeakf", 1]))

# find template close to mean duration
mean_duration_indx <- which.min(abs(sls$duration - freq_variation[rownames(freq_variation) ==
    "duration", 1]))

# find template close to mean PC1
mean_pc1_indx <- which.min(abs(sls$PC1 - freq_variation[rownames(freq_variation) ==
    "PC1", 1]))

# put both templates together
templates <- sls[c(mean_peak_indx, mean_duration_indx, mean_pc1_indx), ]

# label templates
templates$template.type <- c("mean.peak.freq", "mean.duration", "mean.PC1")

# create ext. selection table
templates_est <- selection_table(templates, extended = TRUE, confirm.extended = FALSE,
    fix.selec = TRUE)

templates_est <- rename_est_waves(templates_est, new.sound.files = templates$template.type)

catalog(templates_est, nrow = 2, ncol = 2, ovlp = 70, height = 10, width = 14, same.time.scale = TRUE,
    mar = 1, wl = 512, gr = FALSE, spec.mar = 0.6, lab.mar = 1, rm.axes = FALSE,
    by.row = TRUE, box = TRUE, labels = "sound.files", fast.spec = FALSE, pal = viridis,
    flim = c(1, 3.5), img.suffix = "templates", collevels = seq(-100, 0, 5))

te2 <- te <- templates_est[rep(2:4, 2), ]
# te2 <- rename_est_waves(te2, new.sound.files = letters[1:3])
te2$selec <- 2
te3 <- rbind(te, te2)

catalog(templates_est, nrow = 2, ncol = 3, ovlp = 90, height = 10, width = 14, same.time.scale = TRUE,
    mar = 1, wl = 512, gr = FALSE, spec.mar = 0.6, lab.mar = 2, rm.axes = FALSE,
    by.row = TRUE, box = FALSE, labels = "sound.files", fast.spec = FALSE, pal = viridis,
    flim = c(1, 3.5), img.suffix = "templates", collevels = seq(-100, 0, 5), )

# # move images to figures folder
move_imgs(from = .Options$warbleR$path, to = "./figures", overwrite = TRUE)

# save est
saveRDS(templates_est, "./data/processed/mean_acoustic_parameter_templates_est.RDS")

Detection perfomance

Using 4 templates:

mean_peakf: the call with the peak frequency closest to the mean of the population

templates_est <- readRDS("./data/processed/mean_acoustic_parameter_templates_est.RDS")

paral <- 15

# use mean peak freq sels as template, fourier spectrograms
corr_templ <- template_correlator(templates = templates_est, files = unique(sls$sound.files),
    path = .Options$warbleR$path, parallel = paral, hop.size = 11.6, ovlp = 70)

saveRDS(corr_templ, "./data/processed/template_correlations_4_templates.RDS")

corr_templ <- readRDS("./data/processed/template_correlations_4_templates.RDS")

optimize_fourier_detec <- optimize_template_detector(reference = sls, template.correlations = corr_templ,
    threshold = seq(0.05, 0.5, 0.01), parallel = 1, by.sound.file = TRUE, pb = TRUE)

optimize_fourier_detec$recall <- optimize_fourier_detec$sensitivity
optimize_fourier_detec$precision <- optimize_fourier_detec$specificity
optimize_fourier_detec$sensitivity <- optimize_fourier_detec$specificity <- NULL
optimize_fourier_detec$f1.score <- optimize_fourier_detec$recall * optimize_fourier_detec$precision

saveRDS(optimize_fourier_detec, "./data/processed/optimization_results_4_templates.RDS")

optimize_fourier_detec <- readRDS("./data/processed/optimization_results_4_templates.RDS")

optimize_fourier_detec$templates <- gsub("-1", "", optimize_fourier_detec$templates)

optimize_fourier_detec$overlap.to.true.positives <- optimize_fourier_detec$proportional.overlap.true.positives

sd_tab <- summarize_diagnostic(optimize_fourier_detec)

saveRDS(sd_tab, "./data/processed/summary_optimization_results_4_templates.RDS")

sd_tab <- readRDS("./data/processed/optimization_results_4_templates.RDS")

sd_tab <- sd_tab[grep("SNR", sd_tab$templates, invert = TRUE), ]

# sd_tab$f1.score <- sd_tab$rec * sd_tab$specificity

agg_sd <- aggregate(cbind(recall, precision, f1.score) ~ threshold + templates, data = sd_tab,
    mean)

ggplot(agg_sd, aes(x = threshold, y = recall, group = templates, color = templates)) +
    geom_line() + geom_point() + scale_color_viridis_d(end = 1) + theme_classic()

ggplot(agg_sd, aes(x = threshold, y = precision, group = templates, color = templates)) +
    geom_line() + geom_point() + scale_color_viridis_d(end = 0.8) + theme_classic()

ggplot(agg_sd, aes(y = precision, x = recall, group = templates, color = templates)) +
    geom_line() + geom_point() + scale_color_viridis_d(end = 1) + theme_classic()

ggplot(agg_sd, aes(x = threshold, y = f1.score, group = templates, color = templates)) +
    geom_line() + geom_point() + scale_color_viridis_d(end = 0.8) + theme_classic()

stck_agg_sd <- cbind(agg_sd[, 1:2], stack(agg_sd[, 3:5]))

stck_agg_sd$ind <- factor(stck_agg_sd$ind, labels = c("Recall", "Precision", "F1 score"))

# ggf1 <-
ggplot(stck_agg_sd, aes(x = threshold, y = values, group = templates, color = templates)) +
    geom_line() + geom_point() + labs(x = "Correlation threshold", color = "Template",
    y = "") + facet_wrap(~ind, nrow = 1, scales = "free_y") + scale_color_viridis_d(end = 0.8,
    labels = c("Mean duration", "Mean PC1", "Mean peak frequency"), alpha = 0.5) +
    theme_classic()

ggsave("./data/processed/multipanel_perfomance_indices.jpeg", width = 9, height = 4,
    dpi = 300)

# print dynamic table
oa_DT <- datatable(sd_tab, editable = list(target = "row"), rownames = FALSE, style = "bootstrap",
    filter = "top", options = list(pageLength = 100, autoWidth = TRUE, dom = "ft"),
    autoHideNavigation = TRUE, escape = FALSE)

formatRound(table = oa_DT, columns = sapply(sd_tab, is.numeric), 3)

Template fourier mean.PC1 threshold 0.43

Choose ‘fourier mean.PC1’ template with a threshold of 0.43 due to a good sensitivity and relatively low number of false positives

templates_est <- readRDS("./data/processed/mean_acoustic_parameter_templates_est.RDS")

corr_templ <- template_correlator(templates = templates_est[templates_est$sound.files ==
    "mean.PC1", , drop = FALSE], files = unique(sls$sound.files), path = .Options$warbleR$path,
    parallel = 10, hop.size = 11.6, ovlp = 70)

detec <- template_detector(template.correlations = corr_templ, threshold = 0.43,
    parallel = 10, pb = TRUE)

diagnostic <- diagnose_detection(reference = sls, detection = detec, parallel = 10,
    by.sound.file = TRUE)

summarize_diagnostic(diagnostic)

saveRDS(diagnostic, "./data/processed/diagnostic_mean_pc1_0.43_threshold_diagnostic.RDS")

lab_detec <- label_detection(reference = sls, detection = detec, parallel = 10)

saveRDS(lab_detec, "./data/processed/label_detection_mean_pc1_0.43_threshold.RDS")

lab_detec <- readRDS("./data/processed/label_detection_mean_pc1_0.43_threshold.RDS")

filter_lab_detec <- filter_detection(detection = lab_detec, by = "scores")

saveRDS(filter_lab_detec, "./data/processed/filtered_detection_mean_pc1_0.43_threshold.RDS")

diagnose_model <- diagnose_detection(reference = sls, filter_lab_detec, parallel = 10)


diagnose_model$f1.score <- diagnose_model$specificity * diagnose_model$sensitivity

saveRDS(diagnose_model, "./data/processed/diagnostics_detection_mean_pc1_0.43_threshold.RDS")

diag <- readRDS("./data/processed/diagnostics_detection_mean_pc1_0.43_threshold.RDS")

# print dynamic table
oa_DT <- datatable(diag, editable = list(target = "row"), rownames = FALSE, style = "bootstrap",
    filter = "top", options = list(pageLength = 100, autoWidth = TRUE, dom = "ft"),
    autoHideNavigation = TRUE, escape = FALSE)

formatRound(table = oa_DT, columns = sapply(diag, is.numeric), 3)

Random forest results

filter_lab_detec <- readRDS("./data/processed/filtered_detection_mean_pc1_0.43_threshold.RDS")

# measure spectrographic parameters
spectral_parameters <- spectro_analysis(filter_lab_detec, bp = c(1, 3.5), fast = TRUE,
    ovlp = 70, parallel = 10)

# mfccs <- mfcc_stats(X = lab_detec, bp = c(1, 3.5), ovlp = 70, parallel = 10)
# na_rows <- unique(unlist(sapply(mfccs, function(x) which(is.na(x)))))
# lab_detec <- lab_detec[-na_rows, ] spectral_parameters <-
# spectral_parameters[-na_rows, ] mfccs <- mfccs[-na_rows, ]

spectral_parameters$class <- filter_lab_detec$detection.class

# spectral_parameters <- data.frame(spectral_parameters, mfccs[,
# !names(spectral_parameters) %in% c('sound.files', 'selec')])

spectral_parameters$class[spectral_parameters$class != "false.positive"] <- "true.positive"

# make it a factor for ranger to work
spectral_parameters$class <- as.factor(spectral_parameters$class)

# run RF model spectral and cepstral parameters
rfm <- ranger(class ~ ., data = spectral_parameters[, !names(spectral_parameters) %in%
    c("sound.files", "selec")], num.trees = 10000, importance = "impurity", seed = 10)

saveRDS(list(rfm = rfm, spectral_parameters = spectral_parameters, filter_lab_detec = filter_lab_detec),
    "./data/processed/data_and_model_random_forest_0.43_threshold_only_spectro_parameters.RDS")

# attach(readRDS('./data/processed/data_and_model_random_forest_0.43_threshold.RDS'))


attach(readRDS("./data/processed/data_and_model_random_forest_0.43_threshold_only_spectro_parameters.RDS"))

rfm

## Ranger result
## 
## Call:
##  ranger(class ~ ., data = spectral_parameters[, !names(spectral_parameters) %in%      c("sound.files", "selec")], num.trees = 10000, importance = "impurity",      seed = 10) 
## 
## Type:                             Classification 
## Number of trees:                  10000 
## Sample size:                      89803 
## Number of independent variables:  26 
## Mtry:                             5 
## Target node size:                 1 
## Variable importance mode:         impurity 
## Splitrule:                        gini 
## OOB prediction error:             0.88 %

Diagnostic after random forest classification:

filter_lab_detec$pred.class <- rfm$predictions

positive_detec <- filter_lab_detec[filter_lab_detec$pred.class == "true.positive",
    ]



# save for trying stats write.csv(positive_detec,
# './data/processed/true_positive_detections_on_training_recordings.csv',
# row.names = FALSE)

# exp_raven(positive_detec, file.name =
# './data/processed/true_positive_detections_on_training_recordings',
# sound.file.path = .Options$warbleR$path)

# fix path for Fabis laptop fix_path(path = './data/processed/',new.begin.path
# =
# '/Users/fabiolachirino/Dropbox/Fabiola/proyecto_lemur/data/raw/sound_files/10_kHz_5_min_cuts/',
# sound.file.col = 'Begin File')

temp_detec <- positive_detec
temp_detec$detection.class <- "true.positive"

diag <- diagnose_detection(reference = sls, detection = temp_detec, pb = FALSE)

# print dynamic table
oa_DT <- datatable(diag, editable = list(target = "row"), rownames = FALSE, style = "bootstrap",
    filter = "top", options = list(pageLength = 100, autoWidth = TRUE, dom = "ft"),
    autoHideNavigation = TRUE, escape = FALSE)

formatRound(table = oa_DT, columns = sapply(diag, is.numeric), 3)

Black line = 1:1 gray line = model slope

obs_count <- tapply(sls$sound.files, sls$sound.files, length)
pred_count <- tapply(positive_detec$sound.files, positive_detec$sound.files, length)

int_columns <- intersect(names(obs_count), names(pred_count))
obs_count <- obs_count[names(obs_count) %in% int_columns]
pred_count <- pred_count[names(pred_count) %in% int_columns]
pred_count <- pred_count[order(names(pred_count))]
obs_count <- obs_count[order(names(obs_count))]

df <- data.frame(sound.files = names(obs_count), observed = obs_count, predicted = pred_count)

ggplot(df, aes(x = observed, y = predicted)) + geom_point(color = viridis(10, alpha = 0.4)[2],
    size = 3) + geom_abline(slope = 1, intercept = 0) + annotate("text", x = 50,
    y = 150, label = paste("r =", round(cor(obs_count, pred_count), 3)), size = 8) +
    geom_smooth(method = "lm", se = FALSE, col = "gray") + theme_classic(base_size = 20)

(lm(pred_count ~ obs_count))

## 
## Call:
## lm(formula = pred_count ~ obs_count)
## 
## Coefficients:
## (Intercept)    obs_count  
##      -0.970        0.822

Removing outlier

ggplot(df[df$observed < 100, ], aes(x = observed, y = predicted)) + geom_point(color = viridis(10,
    alpha = 0.4)[2], size = 3) + geom_abline(slope = 1, intercept = 0) + annotate("text",
    x = 50, y = 150, label = paste("r =", round(cor(obs_count[obs_count < 100], pred_count[obs_count <
        100]), 3)), size = 8) + geom_smooth(method = "lm", se = FALSE, col = "gray") +
    theme_classic(base_size = 20)

(lm(pred_count[obs_count < 100] ~ obs_count[obs_count < 100]))

## 
## Call:
## lm(formula = pred_count[obs_count < 100] ~ obs_count[obs_count < 
##     100])
## 
## Coefficients:
##                (Intercept)  obs_count[obs_count < 100]  
##                     0.0145                      0.7613

Template fourier mean.PC1 threshold 0.38

Choose ‘fourier mean.PC1’ template with a threshold of 0.43 due to a good sensitivity and relatively low number of false positives

templates_est <- readRDS("./data/processed/mean_acoustic_parameter_templates_est.RDS")

corr_templ <- template_correlator(templates = templates_est[templates_est$sound.files ==
    "mean.PC1", , drop = FALSE], files = unique(sls$sound.files), path = .Options$warbleR$path,
    parallel = 10, hop.size = 11.6, ovlp = 70)

detec <- template_detector(template.correlations = corr_templ, threshold = 0.38,
    parallel = 10, pb = TRUE)

diagnostic <- diagnose_detection(reference = sls, detection = detec, parallel = 10,
    by.sound.file = TRUE)

summarize_diagnostic(diagnostic)

saveRDS(diagnostic, "./data/processed/diagnostic_mean_pc1_0.38_threshold_diagnostic.RDS")

lab_detec <- label_detection(reference = sls, detection = detec, parallel = 10)

saveRDS(lab_detec, "./data/processed/label_detection_mean_pc1_0.38_threshold.RDS")

lab_detec <- readRDS("./data/processed/label_detection_mean_pc1_0.38_threshold.RDS")

filter_lab_detec <- filter_detection(lab_detec, parallel = 10)

saveRDS(filter_lab_detec, "./data/processed/filtered_detection_mean_pc1_0.38_threshold.RDS")

diagnose_model <- diagnose_detection(reference = sls, filter_lab_detec, parallel = 10)

saveRDS(diagnose_model, "./data/processed/diagnostics_detection_mean_pc1_0.38_threshold.RDS")

diag <- readRDS("./data/processed/diagnostics_detection_mean_pc1_0.38_threshold.RDS")

# print dynamic table
oa_DT <- datatable(diag, editable = list(target = "row"), rownames = FALSE, style = "bootstrap",
    filter = "top", options = list(pageLength = 100, autoWidth = TRUE, dom = "ft"),
    autoHideNavigation = TRUE, escape = FALSE)

formatRound(table = oa_DT, columns = sapply(diag, is.numeric), 3)

Random forest results

lab_detec <- readRDS("./data/processed/label_detection_mean_pc1_0.38_threshold.RDS")

# measure spectrographic parameters
spectral_parameters <- spectro_analysis(lab_detec, bp = c(1, 3.5), fast = TRUE, ovlp = 70,
    parallel = 10)

# mfccs <- mfcc_stats(X = lab_detec, bp = c(1, 3.5), ovlp = 70, parallel = 10)
# na_rows <- unique(unlist(sapply(mfccs, function(x) which(is.na(x)))))
# lab_detec <- lab_detec[-na_rows, ] spectral_parameters <-
# spectral_parameters[-na_rows, ] mfccs <- mfccs[-na_rows, ]

spectral_parameters$class <- lab_detec$detection.class

# spectral_parameters <- data.frame(spectral_parameters, mfccs[,
# !names(spectral_parameters) %in% c('sound.files', 'selec')])

spectral_parameters$class[spectral_parameters$class != "false.positive"] <- "true.positive"

# make it a factor for ranger to work
spectral_parameters$class <- as.factor(spectral_parameters$class)

# run RF model spectral and cepstral parameters
rfm <- ranger(class ~ ., data = spectral_parameters[, !names(spectral_parameters) %in%
    c("sound.files", "selec")], num.trees = 10000, importance = "impurity", seed = 10)

saveRDS(list(rfm = rfm, spectral_parameters = spectral_parameters, lab_detec_0.38 = lab_detec),
    "./data/processed/data_and_model_random_forest_0.38_threshold_only_spectro_parameters.RDS")

attach(readRDS("./data/processed/data_and_model_random_forest_0.38_threshold_only_spectro_parameters.RDS"))

rfm

## Ranger result
## 
## Call:
##  ranger(class ~ ., data = spectral_parameters[, !names(spectral_parameters) %in%      c("sound.files", "selec")], num.trees = 10000, importance = "impurity",      seed = 10) 
## 
## Type:                             Classification 
## Number of trees:                  10000 
## Sample size:                      215415 
## Number of independent variables:  26 
## Mtry:                             5 
## Target node size:                 1 
## Variable importance mode:         impurity 
## Splitrule:                        gini 
## OOB prediction error:             1.08 %

Diagnostic after random forest classification:

lab_detec <- lab_detec_0.38
# table(lab_detec$detection.class)
lab_detec$pred.class <- rfm$predictions

positive_detec <- lab_detec[lab_detec$pred.class == "true.positive", ]


temp_detec <- positive_detec
temp_detec$detection.class <- "true.positive"

diag <- diagnose_detection(reference = sls, detection = temp_detec, pb = FALSE)

# print dynamic table
oa_DT <- datatable(diag, editable = list(target = "row"), rownames = FALSE, style = "bootstrap",
    filter = "top", options = list(pageLength = 100, autoWidth = TRUE, dom = "ft"),
    autoHideNavigation = TRUE, escape = FALSE)

formatRound(table = oa_DT, columns = sapply(diag, is.numeric), 3)

Black line = 1:1 gray line = model slope

obs_count <- tapply(sls$sound.files, sls$sound.files, length)
pred_count <- tapply(positive_detec$sound.files, positive_detec$sound.files, length)

int_columns <- intersect(names(obs_count), names(pred_count))
obs_count <- obs_count[names(obs_count) %in% int_columns]
pred_count <- pred_count[names(pred_count) %in% int_columns]
pred_count <- pred_count[order(names(pred_count))]
obs_count <- obs_count[order(names(obs_count))]

df <- data.frame(sound.files = names(obs_count), observed = obs_count, predicted = pred_count)

ggplot(df, aes(x = observed, y = predicted)) + geom_point(color = viridis(10, alpha = 0.4)[2],
    size = 3) + geom_abline(slope = 1, intercept = 0) + annotate("text", x = 50,
    y = 150, label = paste("r =", round(cor(obs_count, pred_count), 3)), size = 8) +
    geom_smooth(method = "lm", se = FALSE, col = "gray") + theme_classic(base_size = 20)

(lm(pred_count ~ obs_count))

## 
## Call:
## lm(formula = pred_count ~ obs_count)
## 
## Coefficients:
## (Intercept)    obs_count  
##       -3.17         1.45

Removing outlier

ggplot(df[df$observed < 100, ], aes(x = observed, y = predicted)) + geom_point(color = viridis(10,
    alpha = 0.4)[2], size = 3) + geom_abline(slope = 1, intercept = 0) + annotate("text",
    x = 50, y = 150, label = paste("r =", round(cor(obs_count[obs_count < 100], pred_count[obs_count <
        100]), 3)), size = 8) + geom_smooth(method = "lm", se = FALSE, col = "gray") +
    theme_classic(base_size = 20)

(lm(pred_count[obs_count < 100] ~ obs_count[obs_count < 100]))

## 
## Call:
## lm(formula = pred_count[obs_count < 100] ~ obs_count[obs_count < 
##     100])
## 
## Coefficients:
##                (Intercept)  obs_count[obs_count < 100]  
##                     -0.801                       1.304

Session information

## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
## 
## locale:
##  [1] LC_CTYPE=pt_BR.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=es_CR.UTF-8        LC_COLLATE=pt_BR.UTF-8    
##  [5] LC_MONETARY=es_CR.UTF-8    LC_MESSAGES=pt_BR.UTF-8   
##  [7] LC_PAPER=es_CR.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] ranger_0.13.1      formatR_1.11       DT_0.26            ohun_0.1.0        
##  [5] kableExtra_1.3.4   ggplot2_3.4.0      viridis_0.6.2      viridisLite_0.4.1 
##  [9] Rraven_1.0.13      pbapply_1.6-0      bioacoustics_0.2.8 warbleR_1.1.28    
## [13] NatureSounds_1.0.4 knitr_1.41         seewave_2.2.0      tuneR_1.4.1       
## [17] devtools_2.4.2     usethis_2.0.1     
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-152      bitops_1.0-7      fs_1.5.2          webshot_0.5.4    
##  [5] httr_1.4.4        rprojroot_2.0.3   Deriv_4.1.3       tools_4.1.0      
##  [9] bslib_0.2.5.1     utf8_1.2.2        R6_2.5.1          mgcv_1.8-36      
## [13] DBI_1.1.1         colorspace_2.0-3  withr_2.5.0       tidyselect_1.2.0 
## [17] gridExtra_2.3     prettyunits_1.1.1 processx_3.8.0    moments_0.14.1   
## [21] compiler_4.1.0    textshaping_0.3.5 cli_3.4.1         rvest_1.0.3      
## [25] xml2_1.3.3        desc_1.4.2        labeling_0.4.2    sass_0.4.2       
## [29] scales_1.2.1      callr_3.7.2       proxy_0.4-27      dtw_1.23-1       
## [33] systemfonts_1.0.4 stringr_1.5.0     digest_0.6.30     Sim.DiffProc_4.8 
## [37] rmarkdown_2.17    svglite_2.1.0     pkgconfig_2.0.3   htmltools_0.5.4  
## [41] sessioninfo_1.1.1 highr_0.9         fastmap_1.1.0     htmlwidgets_1.5.4
## [45] rlang_1.0.6       rstudioapi_0.14   farver_2.1.1      jquerylib_0.1.4  
## [49] generics_0.1.3    jsonlite_1.8.3    crosstalk_1.2.0   dplyr_1.0.10     
## [53] RCurl_1.98-1.9    magrittr_2.0.3    Matrix_1.5-1      Rcpp_1.0.9       
## [57] munsell_0.5.0     fansi_1.0.3       lifecycle_1.0.3   stringi_1.7.8    
## [61] yaml_2.3.6        MASS_7.3-54       pkgbuild_1.3.1    grid_4.1.0       
## [65] crayon_1.5.2      lattice_0.20-44   splines_4.1.0     ps_1.7.2         
## [69] pillar_1.8.1      igraph_1.3.5      rjson_0.2.21      fftw_1.0-7       
## [73] pkgload_1.2.1     glue_1.6.2        evaluate_0.18     remotes_2.4.0    
## [77] vctrs_0.5.1       testthat_3.0.4    gtable_0.3.1      purrr_0.3.4      
## [81] assertthat_0.2.1  cachem_1.0.6      xfun_0.35         ragg_1.1.3       
## [85] signal_0.7-7      tibble_3.1.8      memoise_2.0.1     ellipsis_0.3.2

Model training

vocal activity in A. lemur

Marcelo Araya-Salas, PhD & Fabiola Chirino

26-12-2022

Load packages

Downsample

Create stratified 5 minute cuts

Cheking manual selections

Create catalogs

Measure frequency range parameters

Template-based detection

Detection perfomance

Template fourier mean.PC1 threshold 0.43

Random forest results

Template fourier mean.PC1 threshold 0.38

Random forest results