## add 'developer/' to packages to be installed from github
x <- c("devtools", "maRce10/warbleR", "bioacoustics", "pbapply", "Rraven", "parallel", "viridis", "ggplot2", "knitr", "kableExtra", "maRce10/ohun", "DT", "formatR", "ranger")
aa <- lapply(x, function(y) {
# get pakage name
pkg <- strsplit(y, "/")[[1]]
pkg <- pkg[length(pkg)]
# check if installed, if not then install
if (!pkg %in% installed.packages()[,"Package"]) {
if (grepl("/", y)) devtools::install_github(y, force = TRUE) else
install.packages(y)
}
# load package
try(require(pkg, character.only = T), silent = T)
})
opts_knit$set(root.dir = "..")
iw <- info_wavs()
table(iw$sample.rate)
# fix_wavs(samp.rate = 10)
# path with waves wav_path <- '/Volumes/CHIRINO/Grabaciones SM4 2019/'
wav_path <- "/media/m/Expansion/Projects/Ongoing/Agalychnis_lemur/sound_files/10_kHz_5_min_cuts/converted_sound_files/"
# list of wave files
wavs <- list.files(path = wav_path, pattern = "\\.wav$", ignore.case = TRUE, recursive = TRUE,
full.names = TRUE)
# data frame with waves and metadata
wav_df <- data.frame(path = dirname(wavs), file = basename(wavs))
wav_df$site <- ifelse(grepl("sukia", wav_df$file, ignore.case = TRUE), "Sukia", "Chimu")
sm_metadata <- seewave::songmeter(wav_df$file)
wav_df <- cbind(wav_df, sm_metadata)
head(wav_df)
str(wav_df)
par(mfrow = c(2, 1))
hist(wav_df$time[wav_df$site == "Sukia"], breaks = 100, col = adjustcolor("green",
alpha.f = 0.4), main = "Sukia")
hist(wav_df$time[wav_df$site == "Chimu"], breaks = 100, col = adjustcolor("blue",
alpha.f = 0.4), main = "Chimu")
wav_df <- wav_df[order(wav_df$time), ]
wav_df$day_count <- as.vector(round((wav_df$time - min(wav_df$time))/(3600 * 24),
0) + 1)
wav_df$week <- cut(x = wav_df$day_count, breaks = seq(1, max(wav_df$day_count) +
7, by = 7), include.lowest = TRUE)
wav_df <- droplevels(wav_df)
wav_df$period <- 4
wav_df$period[wav_df$hour %in% 17:19] <- 1
wav_df$period[wav_df$hour %in% 20:22] <- 2
wav_df$period[wav_df$hour %in% c(23, 0, 1)] <- 3
# create name combination
wav_df$site.week.period <- paste(wav_df$site, wav_df$week, wav_df$period, sep = "-")
# remove LAguna chimu
wav_df <- wav_df[grep("LAguna Chimu", wav_df$path, invert = TRUE), ]
set.seed(1)
sample_wav_l <- lapply(unique(wav_df$site.week.period), function(x) {
X <- wav_df[wav_df$site.week.period == x, ]
if (nrow(X) > 2)
X <- X[sample(1:nrow(X), size = 2), ]
return(X)
})
sample_wav_df <- do.call(rbind, sample_wav_l)
write.csv(sample_wav_df, file.path("/Volumes/CHIRINO/Proyecto Lemur/INV_2021/5_min_cut_lemur/",
"5_min_cut_metadata.csv"), row.names = FALSE)
sample_wav_df$cut.name <- gsub(".wav", "_5min_cut.wav", sample_wav_df$file)
library(pbapply)
out <- pblapply(1:nrow(sample_wav_df), function(x) {
# read first 5 mins
wv <- try(readWave(file.path(sample_wav_df$path[x], sample_wav_df$file[x]), from = 0,
to = 300, units = "seconds"), silent = TRUE)
if (class(wv) == "Wave")
# save cut
writeWave(wv, filename = file.path("/Volumes/CHIRINO/Proyecto Lemur/INV_2021/5_min_cut_lemur/",
sample_wav_df$cut.name[x]))
if (class(wv) == "Wave")
return(NULL) else return(x)
})
# list all selection tables
sl_tbs <- list.files("~/Dropbox/estudiantes/Fabiola/proyecto_lemur/data/processed/selection_tables/all_selec_tables_lemur/")
# remove empty selection table
sl_tbs <- sl_tbs[sl_tbs != "LAGCHIMU_20200919_201500.Table.2.selections.txt"]
# import all selections
sls <- imp_raven(path = "~/Dropbox/estudiantes/Fabiola/proyecto_lemur/data/processed/selection_tables/all_selec_tables_lemur/",
warbler.format = TRUE, all.data = TRUE, files = sl_tbs)
# relabel selec column
sls$selec <- 1:nrow(sls)
# check duration
sls$duration <- sls$end - sls$start
unique(sls$sound.files[sls$duration > 1])
# remove temporarily but must be fixed on the original individual selection
# tables
sls <- sls[sls$duration < 1, ]
# number of sound files and selection files should be the same
length(unique(sls$sound.files)) == length(unique(sls$selec.file))
# find files with more than 1 selection tab <-
# table(sls$sound.files[!duplicated(sls$selec.file)]) tab[tab > 1]
# unique(sls$selec.file[sls$sound.files ==
# 'LAGCHIMU_20190709_200000_5min_cut.wav'])
# check selections
cs <- check_sels(X = sls, pb = FALSE)
table(cs$check.res[cs$check.res != "OK"])
# those with errors were empty selections (single points)
sls <- sls[cs$check.res == "OK", ]
# check for duplicates (if any sound files was anottated more than once)
sls <- overlapping_sels(sls, indx.row = TRUE, pb = FALSE)
sls <- sls[!duplicated(sls$indx.row, incomparables = NA), ]
# should be 4 in 'SUKIA_20200207_220000_5min_cut.wav'
table(sls$sound.files[!is.na(sls$indx.row)])
# export raven multiple sound file selection table
exp_raven(sls, path = "./data/processed/selection_tables/", sound.file.path = .Options$warbleR$path,
file.name = "temporary_doublechecking_all_selections")
# fix path for Fabis laptop
fix_path(path = "./data/processed/selection_tables/", new.begin.path = "/Users/fabiolachirino/Dropbox/Fabiola/proyecto_lemur/data/raw/sound_files/10_kHz_5_min_cuts/",
sound.file.col = "Begin File")
# create sound file labels
sf.abbr <- gsub("0000_5min_cut.wav", "", sls$sound.files)
sf.abbr <- gsub("SUKIA_20", "S", sf.abbr)
sls$sf.abbr <- gsub("LAGCHIMU_20", "L", sf.abbr)
traing_dat <- imp_raven(path = "./data/processed/selection_tables/", files = "temporary_doublechecking_all_selections.txt")
train_files <- unique(traing_dat$`Begin File`)
fc <- file.copy(from = file.path("/media/m/Expansion/Projects/Ongoing/Agalychnis_lemur/sound_files/10_kHz_5_min_cuts/",
train_files), to = file.path("/media/m/Expansion/Projects/Ongoing/Agalychnis_lemur/sound_files/training_sound_files/",
train_files))
sls$tags <- "lemur"
exp_raven(sls[, c("sound.files", "selec", "start", "end", "bottom.freq", "top.freq",
"tags")], path = "/media/m/Expansion/Projects/Ongoing/Agalychnis_lemur/sound_files/training_sound_files/",
sound.file.path = .Options$warbleR$path, file.name = "annotations_raven_format")
fix_path(path = "/media/m/Expansion/Projects/Ongoing/Agalychnis_lemur/sound_files/training_sound_files/",
new.begin.path = "", sound.file.col = "Begin File")
fix_path(path = "/media/m/Expansion/Projects/Ongoing/Agalychnis_lemur/sound_files/training_sound_files/",
new.begin.path = "", sound.file.col = "Begin Path")
# create catalogs
catalog(X = sls, flim = c(1, 3.5), nrow = 12, ncol = 15, ovlp = 70, height = 15,
width = 23, same.time.scale = TRUE, mar = 0.005, wl = 512, gr = FALSE, spec.mar = 0.4,
lab.mar = 0.8, rm.axes = TRUE, by.row = TRUE, box = TRUE, labels = c("sf.abbr",
"selec"), fast.spec = TRUE, pal = viridis, parallel = 10)
# move images to figures folder
move_imgs(from = .Options$warbleR$path, to = "./figures", overwrite = TRUE)
# catalog for paper
catalog(X = sls[1:180, ], flim = c(1, 3.5), nrow = 12, ncol = 15, ovlp = 90, height = 15,
width = 23, same.time.scale = TRUE, mar = 0.1, wl = 512, gr = FALSE, spec.mar = 0,
lab.mar = 1e-04, rm.axes = TRUE, by.row = TRUE, box = FALSE, fast.spec = FALSE,
pal = viridis, parallel = 1, img.suffix = "no_margins", collevels = seq(-115,
0, 5))
# move images to figures folder
move_imgs(from = .Options$warbleR$path, to = "./figures", overwrite = TRUE)
To figure out variation in signal structure aiming to define templates
# measure spectrographic parameters
spectral_parameters <- spectro_analysis(sls, bp = "frange", fast = TRUE, ovlp = 70,
pb = FALSE)
saveRDS(spectral_parameters, "./data/processed/acoustic_parameters_and_selections.RDS")
spectral_parameters <- readRDS("./data/processed/acoustic_parameters_and_selections.RDS")
sls <- imp_raven(path = "./data/processed/selection_tables/", files = "temporary_doublechecking_all_selections.txt",
pb = FALSE, warbler.format = TRUE)
# measure signal_2_noise ratio sls <- signal_2_noise(sls, mar = 0.1, before =
# TRUE, pb = FALSE, bp = 'frange')
pca <- prcomp(spectral_parameters[, 2:27], scale. = TRUE)
# extrac SNR and peak freq
sls$meanpeakf <- spectral_parameters$meanpeakf
sls$duration <- spectral_parameters$duration
sls$sp.ent <- spectral_parameters$sp.ent
sls$PC1 <- pca$x[, 1]
# stck_parameters$PC1
stck_parameters <- stack(sls[, c("duration", "meanpeakf", "sp.ent", "PC1")])
ggplot(stck_parameters, aes(x = values)) + geom_histogram(fill = viridis(10, alpha = 0.9)[2]) +
facet_wrap(~ind, scales = "free")
spectral_parameters <- spectral_parameters[!is.infinite(spectral_parameters$SNR),
]
freq_variation <- sapply(c("duration", "meanpeakf", "sp.ent", "PC1"), function(x) data.frame(mean = mean(sls[,
x]), min = min(sls[, x]), max = max(sls[, x]), sd = sd(sls[, x]), CI.2.5 = quantile(sls[,
x], 0.025), CI.97.5 = quantile(sls[, x], 0.975)))
freq_variation <- do.call(rbind, lapply(data.frame(freq_variation), function(x) round(unlist(x),
3)))
# print table as kable
kb <- kable(freq_variation, row.names = TRUE, digits = 3)
kb <- kable_styling(kb, bootstrap_options = c("striped", "hover", "condensed", "responsive"))
print(kb)
mean | min | max | sd | CI.2.5 | CI.97.5 | |
---|---|---|---|---|---|---|
duration | 0.067 | 0.012 | 0.972 | 0.037 | 0.029 | 0.139 |
meanpeakf | 2.337 | 1.199 | 2.956 | 0.109 | 2.145 | 2.517 |
sp.ent | 0.873 | 0.623 | 0.993 | 0.055 | 0.756 | 0.960 |
PC1 | 0.000 | -21.798 | 11.555 | 3.066 | -5.660 | 4.920 |
# find template close to mean peak freq
mean_peak_indx <- which.min(abs(sls$meanpeakf - freq_variation[rownames(freq_variation) ==
"meanpeakf", 1]))
# find template close to mean duration
mean_duration_indx <- which.min(abs(sls$duration - freq_variation[rownames(freq_variation) ==
"duration", 1]))
# find template close to mean PC1
mean_pc1_indx <- which.min(abs(sls$PC1 - freq_variation[rownames(freq_variation) ==
"PC1", 1]))
# put both templates together
templates <- sls[c(mean_peak_indx, mean_duration_indx, mean_pc1_indx), ]
# label templates
templates$template.type <- c("mean.peak.freq", "mean.duration", "mean.PC1")
# create ext. selection table
templates_est <- selection_table(templates, extended = TRUE, confirm.extended = FALSE,
fix.selec = TRUE)
templates_est <- rename_est_waves(templates_est, new.sound.files = templates$template.type)
catalog(templates_est, nrow = 2, ncol = 2, ovlp = 70, height = 10, width = 14, same.time.scale = TRUE,
mar = 1, wl = 512, gr = FALSE, spec.mar = 0.6, lab.mar = 1, rm.axes = FALSE,
by.row = TRUE, box = TRUE, labels = "sound.files", fast.spec = FALSE, pal = viridis,
flim = c(1, 3.5), img.suffix = "templates", collevels = seq(-100, 0, 5))
te2 <- te <- templates_est[rep(2:4, 2), ]
# te2 <- rename_est_waves(te2, new.sound.files = letters[1:3])
te2$selec <- 2
te3 <- rbind(te, te2)
catalog(templates_est, nrow = 2, ncol = 3, ovlp = 90, height = 10, width = 14, same.time.scale = TRUE,
mar = 1, wl = 512, gr = FALSE, spec.mar = 0.6, lab.mar = 2, rm.axes = FALSE,
by.row = TRUE, box = FALSE, labels = "sound.files", fast.spec = FALSE, pal = viridis,
flim = c(1, 3.5), img.suffix = "templates", collevels = seq(-100, 0, 5), )
# # move images to figures folder
move_imgs(from = .Options$warbleR$path, to = "./figures", overwrite = TRUE)
# save est
saveRDS(templates_est, "./data/processed/mean_acoustic_parameter_templates_est.RDS")
Using 4 templates:
templates_est <- readRDS("./data/processed/mean_acoustic_parameter_templates_est.RDS")
paral <- 15
# use mean peak freq sels as template, fourier spectrograms
corr_templ <- template_correlator(templates = templates_est, files = unique(sls$sound.files),
path = .Options$warbleR$path, parallel = paral, hop.size = 11.6, ovlp = 70)
saveRDS(corr_templ, "./data/processed/template_correlations_4_templates.RDS")
corr_templ <- readRDS("./data/processed/template_correlations_4_templates.RDS")
optimize_fourier_detec <- optimize_template_detector(reference = sls, template.correlations = corr_templ,
threshold = seq(0.05, 0.5, 0.01), parallel = 1, by.sound.file = TRUE, pb = TRUE)
optimize_fourier_detec$recall <- optimize_fourier_detec$sensitivity
optimize_fourier_detec$precision <- optimize_fourier_detec$specificity
optimize_fourier_detec$sensitivity <- optimize_fourier_detec$specificity <- NULL
optimize_fourier_detec$f1.score <- optimize_fourier_detec$recall * optimize_fourier_detec$precision
saveRDS(optimize_fourier_detec, "./data/processed/optimization_results_4_templates.RDS")
optimize_fourier_detec <- readRDS("./data/processed/optimization_results_4_templates.RDS")
optimize_fourier_detec$templates <- gsub("-1", "", optimize_fourier_detec$templates)
optimize_fourier_detec$overlap.to.true.positives <- optimize_fourier_detec$proportional.overlap.true.positives
sd_tab <- summarize_diagnostic(optimize_fourier_detec)
saveRDS(sd_tab, "./data/processed/summary_optimization_results_4_templates.RDS")
sd_tab <- readRDS("./data/processed/optimization_results_4_templates.RDS")
sd_tab <- sd_tab[grep("SNR", sd_tab$templates, invert = TRUE), ]
# sd_tab$f1.score <- sd_tab$rec * sd_tab$specificity
agg_sd <- aggregate(cbind(recall, precision, f1.score) ~ threshold + templates, data = sd_tab,
mean)
ggplot(agg_sd, aes(x = threshold, y = recall, group = templates, color = templates)) +
geom_line() + geom_point() + scale_color_viridis_d(end = 1) + theme_classic()
ggplot(agg_sd, aes(x = threshold, y = precision, group = templates, color = templates)) +
geom_line() + geom_point() + scale_color_viridis_d(end = 0.8) + theme_classic()
ggplot(agg_sd, aes(y = precision, x = recall, group = templates, color = templates)) +
geom_line() + geom_point() + scale_color_viridis_d(end = 1) + theme_classic()
ggplot(agg_sd, aes(x = threshold, y = f1.score, group = templates, color = templates)) +
geom_line() + geom_point() + scale_color_viridis_d(end = 0.8) + theme_classic()
stck_agg_sd <- cbind(agg_sd[, 1:2], stack(agg_sd[, 3:5]))
stck_agg_sd$ind <- factor(stck_agg_sd$ind, labels = c("Recall", "Precision", "F1 score"))
# ggf1 <-
ggplot(stck_agg_sd, aes(x = threshold, y = values, group = templates, color = templates)) +
geom_line() + geom_point() + labs(x = "Correlation threshold", color = "Template",
y = "") + facet_wrap(~ind, nrow = 1, scales = "free_y") + scale_color_viridis_d(end = 0.8,
labels = c("Mean duration", "Mean PC1", "Mean peak frequency"), alpha = 0.5) +
theme_classic()
ggsave("./data/processed/multipanel_perfomance_indices.jpeg", width = 9, height = 4,
dpi = 300)
# print dynamic table
oa_DT <- datatable(sd_tab, editable = list(target = "row"), rownames = FALSE, style = "bootstrap",
filter = "top", options = list(pageLength = 100, autoWidth = TRUE, dom = "ft"),
autoHideNavigation = TRUE, escape = FALSE)
formatRound(table = oa_DT, columns = sapply(sd_tab, is.numeric), 3)
Choose ‘fourier mean.PC1’ template with a threshold of 0.43 due to a good sensitivity and relatively low number of false positives
templates_est <- readRDS("./data/processed/mean_acoustic_parameter_templates_est.RDS")
corr_templ <- template_correlator(templates = templates_est[templates_est$sound.files ==
"mean.PC1", , drop = FALSE], files = unique(sls$sound.files), path = .Options$warbleR$path,
parallel = 10, hop.size = 11.6, ovlp = 70)
detec <- template_detector(template.correlations = corr_templ, threshold = 0.43,
parallel = 10, pb = TRUE)
diagnostic <- diagnose_detection(reference = sls, detection = detec, parallel = 10,
by.sound.file = TRUE)
summarize_diagnostic(diagnostic)
saveRDS(diagnostic, "./data/processed/diagnostic_mean_pc1_0.43_threshold_diagnostic.RDS")
lab_detec <- label_detection(reference = sls, detection = detec, parallel = 10)
saveRDS(lab_detec, "./data/processed/label_detection_mean_pc1_0.43_threshold.RDS")
lab_detec <- readRDS("./data/processed/label_detection_mean_pc1_0.43_threshold.RDS")
filter_lab_detec <- filter_detection(detection = lab_detec, by = "scores")
saveRDS(filter_lab_detec, "./data/processed/filtered_detection_mean_pc1_0.43_threshold.RDS")
diagnose_model <- diagnose_detection(reference = sls, filter_lab_detec, parallel = 10)
diagnose_model$f1.score <- diagnose_model$specificity * diagnose_model$sensitivity
saveRDS(diagnose_model, "./data/processed/diagnostics_detection_mean_pc1_0.43_threshold.RDS")
diag <- readRDS("./data/processed/diagnostics_detection_mean_pc1_0.43_threshold.RDS")
# print dynamic table
oa_DT <- datatable(diag, editable = list(target = "row"), rownames = FALSE, style = "bootstrap",
filter = "top", options = list(pageLength = 100, autoWidth = TRUE, dom = "ft"),
autoHideNavigation = TRUE, escape = FALSE)
formatRound(table = oa_DT, columns = sapply(diag, is.numeric), 3)
filter_lab_detec <- readRDS("./data/processed/filtered_detection_mean_pc1_0.43_threshold.RDS")
# measure spectrographic parameters
spectral_parameters <- spectro_analysis(filter_lab_detec, bp = c(1, 3.5), fast = TRUE,
ovlp = 70, parallel = 10)
# mfccs <- mfcc_stats(X = lab_detec, bp = c(1, 3.5), ovlp = 70, parallel = 10)
# na_rows <- unique(unlist(sapply(mfccs, function(x) which(is.na(x)))))
# lab_detec <- lab_detec[-na_rows, ] spectral_parameters <-
# spectral_parameters[-na_rows, ] mfccs <- mfccs[-na_rows, ]
spectral_parameters$class <- filter_lab_detec$detection.class
# spectral_parameters <- data.frame(spectral_parameters, mfccs[,
# !names(spectral_parameters) %in% c('sound.files', 'selec')])
spectral_parameters$class[spectral_parameters$class != "false.positive"] <- "true.positive"
# make it a factor for ranger to work
spectral_parameters$class <- as.factor(spectral_parameters$class)
# run RF model spectral and cepstral parameters
rfm <- ranger(class ~ ., data = spectral_parameters[, !names(spectral_parameters) %in%
c("sound.files", "selec")], num.trees = 10000, importance = "impurity", seed = 10)
saveRDS(list(rfm = rfm, spectral_parameters = spectral_parameters, filter_lab_detec = filter_lab_detec),
"./data/processed/data_and_model_random_forest_0.43_threshold_only_spectro_parameters.RDS")
# attach(readRDS('./data/processed/data_and_model_random_forest_0.43_threshold.RDS'))
attach(readRDS("./data/processed/data_and_model_random_forest_0.43_threshold_only_spectro_parameters.RDS"))
rfm
## Ranger result
##
## Call:
## ranger(class ~ ., data = spectral_parameters[, !names(spectral_parameters) %in% c("sound.files", "selec")], num.trees = 10000, importance = "impurity", seed = 10)
##
## Type: Classification
## Number of trees: 10000
## Sample size: 89803
## Number of independent variables: 26
## Mtry: 5
## Target node size: 1
## Variable importance mode: impurity
## Splitrule: gini
## OOB prediction error: 0.88 %
Diagnostic after random forest classification:
filter_lab_detec$pred.class <- rfm$predictions
positive_detec <- filter_lab_detec[filter_lab_detec$pred.class == "true.positive",
]
# save for trying stats write.csv(positive_detec,
# './data/processed/true_positive_detections_on_training_recordings.csv',
# row.names = FALSE)
# exp_raven(positive_detec, file.name =
# './data/processed/true_positive_detections_on_training_recordings',
# sound.file.path = .Options$warbleR$path)
# fix path for Fabis laptop fix_path(path = './data/processed/',new.begin.path
# =
# '/Users/fabiolachirino/Dropbox/Fabiola/proyecto_lemur/data/raw/sound_files/10_kHz_5_min_cuts/',
# sound.file.col = 'Begin File')
temp_detec <- positive_detec
temp_detec$detection.class <- "true.positive"
diag <- diagnose_detection(reference = sls, detection = temp_detec, pb = FALSE)
# print dynamic table
oa_DT <- datatable(diag, editable = list(target = "row"), rownames = FALSE, style = "bootstrap",
filter = "top", options = list(pageLength = 100, autoWidth = TRUE, dom = "ft"),
autoHideNavigation = TRUE, escape = FALSE)
formatRound(table = oa_DT, columns = sapply(diag, is.numeric), 3)
Black line = 1:1 gray line = model slope
obs_count <- tapply(sls$sound.files, sls$sound.files, length)
pred_count <- tapply(positive_detec$sound.files, positive_detec$sound.files, length)
int_columns <- intersect(names(obs_count), names(pred_count))
obs_count <- obs_count[names(obs_count) %in% int_columns]
pred_count <- pred_count[names(pred_count) %in% int_columns]
pred_count <- pred_count[order(names(pred_count))]
obs_count <- obs_count[order(names(obs_count))]
df <- data.frame(sound.files = names(obs_count), observed = obs_count, predicted = pred_count)
ggplot(df, aes(x = observed, y = predicted)) + geom_point(color = viridis(10, alpha = 0.4)[2],
size = 3) + geom_abline(slope = 1, intercept = 0) + annotate("text", x = 50,
y = 150, label = paste("r =", round(cor(obs_count, pred_count), 3)), size = 8) +
geom_smooth(method = "lm", se = FALSE, col = "gray") + theme_classic(base_size = 20)
(lm(pred_count ~ obs_count))
##
## Call:
## lm(formula = pred_count ~ obs_count)
##
## Coefficients:
## (Intercept) obs_count
## -0.970 0.822
Removing outlier
ggplot(df[df$observed < 100, ], aes(x = observed, y = predicted)) + geom_point(color = viridis(10,
alpha = 0.4)[2], size = 3) + geom_abline(slope = 1, intercept = 0) + annotate("text",
x = 50, y = 150, label = paste("r =", round(cor(obs_count[obs_count < 100], pred_count[obs_count <
100]), 3)), size = 8) + geom_smooth(method = "lm", se = FALSE, col = "gray") +
theme_classic(base_size = 20)
(lm(pred_count[obs_count < 100] ~ obs_count[obs_count < 100]))
##
## Call:
## lm(formula = pred_count[obs_count < 100] ~ obs_count[obs_count <
## 100])
##
## Coefficients:
## (Intercept) obs_count[obs_count < 100]
## 0.0145 0.7613
Choose ‘fourier mean.PC1’ template with a threshold of 0.43 due to a good sensitivity and relatively low number of false positives
templates_est <- readRDS("./data/processed/mean_acoustic_parameter_templates_est.RDS")
corr_templ <- template_correlator(templates = templates_est[templates_est$sound.files ==
"mean.PC1", , drop = FALSE], files = unique(sls$sound.files), path = .Options$warbleR$path,
parallel = 10, hop.size = 11.6, ovlp = 70)
detec <- template_detector(template.correlations = corr_templ, threshold = 0.38,
parallel = 10, pb = TRUE)
diagnostic <- diagnose_detection(reference = sls, detection = detec, parallel = 10,
by.sound.file = TRUE)
summarize_diagnostic(diagnostic)
saveRDS(diagnostic, "./data/processed/diagnostic_mean_pc1_0.38_threshold_diagnostic.RDS")
lab_detec <- label_detection(reference = sls, detection = detec, parallel = 10)
saveRDS(lab_detec, "./data/processed/label_detection_mean_pc1_0.38_threshold.RDS")
lab_detec <- readRDS("./data/processed/label_detection_mean_pc1_0.38_threshold.RDS")
filter_lab_detec <- filter_detection(lab_detec, parallel = 10)
saveRDS(filter_lab_detec, "./data/processed/filtered_detection_mean_pc1_0.38_threshold.RDS")
diagnose_model <- diagnose_detection(reference = sls, filter_lab_detec, parallel = 10)
saveRDS(diagnose_model, "./data/processed/diagnostics_detection_mean_pc1_0.38_threshold.RDS")
diag <- readRDS("./data/processed/diagnostics_detection_mean_pc1_0.38_threshold.RDS")
# print dynamic table
oa_DT <- datatable(diag, editable = list(target = "row"), rownames = FALSE, style = "bootstrap",
filter = "top", options = list(pageLength = 100, autoWidth = TRUE, dom = "ft"),
autoHideNavigation = TRUE, escape = FALSE)
formatRound(table = oa_DT, columns = sapply(diag, is.numeric), 3)
lab_detec <- readRDS("./data/processed/label_detection_mean_pc1_0.38_threshold.RDS")
# measure spectrographic parameters
spectral_parameters <- spectro_analysis(lab_detec, bp = c(1, 3.5), fast = TRUE, ovlp = 70,
parallel = 10)
# mfccs <- mfcc_stats(X = lab_detec, bp = c(1, 3.5), ovlp = 70, parallel = 10)
# na_rows <- unique(unlist(sapply(mfccs, function(x) which(is.na(x)))))
# lab_detec <- lab_detec[-na_rows, ] spectral_parameters <-
# spectral_parameters[-na_rows, ] mfccs <- mfccs[-na_rows, ]
spectral_parameters$class <- lab_detec$detection.class
# spectral_parameters <- data.frame(spectral_parameters, mfccs[,
# !names(spectral_parameters) %in% c('sound.files', 'selec')])
spectral_parameters$class[spectral_parameters$class != "false.positive"] <- "true.positive"
# make it a factor for ranger to work
spectral_parameters$class <- as.factor(spectral_parameters$class)
# run RF model spectral and cepstral parameters
rfm <- ranger(class ~ ., data = spectral_parameters[, !names(spectral_parameters) %in%
c("sound.files", "selec")], num.trees = 10000, importance = "impurity", seed = 10)
saveRDS(list(rfm = rfm, spectral_parameters = spectral_parameters, lab_detec_0.38 = lab_detec),
"./data/processed/data_and_model_random_forest_0.38_threshold_only_spectro_parameters.RDS")
attach(readRDS("./data/processed/data_and_model_random_forest_0.38_threshold_only_spectro_parameters.RDS"))
rfm
## Ranger result
##
## Call:
## ranger(class ~ ., data = spectral_parameters[, !names(spectral_parameters) %in% c("sound.files", "selec")], num.trees = 10000, importance = "impurity", seed = 10)
##
## Type: Classification
## Number of trees: 10000
## Sample size: 215415
## Number of independent variables: 26
## Mtry: 5
## Target node size: 1
## Variable importance mode: impurity
## Splitrule: gini
## OOB prediction error: 1.08 %
Diagnostic after random forest classification:
lab_detec <- lab_detec_0.38
# table(lab_detec$detection.class)
lab_detec$pred.class <- rfm$predictions
positive_detec <- lab_detec[lab_detec$pred.class == "true.positive", ]
temp_detec <- positive_detec
temp_detec$detection.class <- "true.positive"
diag <- diagnose_detection(reference = sls, detection = temp_detec, pb = FALSE)
# print dynamic table
oa_DT <- datatable(diag, editable = list(target = "row"), rownames = FALSE, style = "bootstrap",
filter = "top", options = list(pageLength = 100, autoWidth = TRUE, dom = "ft"),
autoHideNavigation = TRUE, escape = FALSE)
formatRound(table = oa_DT, columns = sapply(diag, is.numeric), 3)
Black line = 1:1 gray line = model slope
obs_count <- tapply(sls$sound.files, sls$sound.files, length)
pred_count <- tapply(positive_detec$sound.files, positive_detec$sound.files, length)
int_columns <- intersect(names(obs_count), names(pred_count))
obs_count <- obs_count[names(obs_count) %in% int_columns]
pred_count <- pred_count[names(pred_count) %in% int_columns]
pred_count <- pred_count[order(names(pred_count))]
obs_count <- obs_count[order(names(obs_count))]
df <- data.frame(sound.files = names(obs_count), observed = obs_count, predicted = pred_count)
ggplot(df, aes(x = observed, y = predicted)) + geom_point(color = viridis(10, alpha = 0.4)[2],
size = 3) + geom_abline(slope = 1, intercept = 0) + annotate("text", x = 50,
y = 150, label = paste("r =", round(cor(obs_count, pred_count), 3)), size = 8) +
geom_smooth(method = "lm", se = FALSE, col = "gray") + theme_classic(base_size = 20)
(lm(pred_count ~ obs_count))
##
## Call:
## lm(formula = pred_count ~ obs_count)
##
## Coefficients:
## (Intercept) obs_count
## -3.17 1.45
Removing outlier
ggplot(df[df$observed < 100, ], aes(x = observed, y = predicted)) + geom_point(color = viridis(10,
alpha = 0.4)[2], size = 3) + geom_abline(slope = 1, intercept = 0) + annotate("text",
x = 50, y = 150, label = paste("r =", round(cor(obs_count[obs_count < 100], pred_count[obs_count <
100]), 3)), size = 8) + geom_smooth(method = "lm", se = FALSE, col = "gray") +
theme_classic(base_size = 20)
(lm(pred_count[obs_count < 100] ~ obs_count[obs_count < 100]))
##
## Call:
## lm(formula = pred_count[obs_count < 100] ~ obs_count[obs_count <
## 100])
##
## Coefficients:
## (Intercept) obs_count[obs_count < 100]
## -0.801 1.304
Session information
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
##
## locale:
## [1] LC_CTYPE=pt_BR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=es_CR.UTF-8 LC_COLLATE=pt_BR.UTF-8
## [5] LC_MONETARY=es_CR.UTF-8 LC_MESSAGES=pt_BR.UTF-8
## [7] LC_PAPER=es_CR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ranger_0.13.1 formatR_1.11 DT_0.26 ohun_0.1.0
## [5] kableExtra_1.3.4 ggplot2_3.4.0 viridis_0.6.2 viridisLite_0.4.1
## [9] Rraven_1.0.13 pbapply_1.6-0 bioacoustics_0.2.8 warbleR_1.1.28
## [13] NatureSounds_1.0.4 knitr_1.41 seewave_2.2.0 tuneR_1.4.1
## [17] devtools_2.4.2 usethis_2.0.1
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-152 bitops_1.0-7 fs_1.5.2 webshot_0.5.4
## [5] httr_1.4.4 rprojroot_2.0.3 Deriv_4.1.3 tools_4.1.0
## [9] bslib_0.2.5.1 utf8_1.2.2 R6_2.5.1 mgcv_1.8-36
## [13] DBI_1.1.1 colorspace_2.0-3 withr_2.5.0 tidyselect_1.2.0
## [17] gridExtra_2.3 prettyunits_1.1.1 processx_3.8.0 moments_0.14.1
## [21] compiler_4.1.0 textshaping_0.3.5 cli_3.4.1 rvest_1.0.3
## [25] xml2_1.3.3 desc_1.4.2 labeling_0.4.2 sass_0.4.2
## [29] scales_1.2.1 callr_3.7.2 proxy_0.4-27 dtw_1.23-1
## [33] systemfonts_1.0.4 stringr_1.5.0 digest_0.6.30 Sim.DiffProc_4.8
## [37] rmarkdown_2.17 svglite_2.1.0 pkgconfig_2.0.3 htmltools_0.5.4
## [41] sessioninfo_1.1.1 highr_0.9 fastmap_1.1.0 htmlwidgets_1.5.4
## [45] rlang_1.0.6 rstudioapi_0.14 farver_2.1.1 jquerylib_0.1.4
## [49] generics_0.1.3 jsonlite_1.8.3 crosstalk_1.2.0 dplyr_1.0.10
## [53] RCurl_1.98-1.9 magrittr_2.0.3 Matrix_1.5-1 Rcpp_1.0.9
## [57] munsell_0.5.0 fansi_1.0.3 lifecycle_1.0.3 stringi_1.7.8
## [61] yaml_2.3.6 MASS_7.3-54 pkgbuild_1.3.1 grid_4.1.0
## [65] crayon_1.5.2 lattice_0.20-44 splines_4.1.0 ps_1.7.2
## [69] pillar_1.8.1 igraph_1.3.5 rjson_0.2.21 fftw_1.0-7
## [73] pkgload_1.2.1 glue_1.6.2 evaluate_0.18 remotes_2.4.0
## [77] vctrs_0.5.1 testthat_3.0.4 gtable_0.3.1 purrr_0.3.4
## [81] assertthat_0.2.1 cachem_1.0.6 xfun_0.35 ragg_1.1.3
## [85] signal_0.7-7 tibble_3.1.8 memoise_2.0.1 ellipsis_0.3.2